diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..3550a30f2d --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index 919cdccd05..968cc71d2e 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,7 @@ wasm-pack.log # IDEs .vscode/ + +# nix +.direnv/ +result diff --git a/.readthedocs.yml b/.readthedocs.yml index e58229537a..0c7ae9f116 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -12,7 +12,7 @@ conda: environment: doc/environment.yml python: - version: 3.7 + version: 3.8 install: - method: pip path: . diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000000..ef043fcddb --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1323 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "az" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f771a5d1f5503f7f4279a30f3643d3421ba149848b89ecaaec0ea2acf04a5ac4" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "buf_redux" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" +dependencies = [ + "memchr", + "safemem", +] + +[[package]] +name = "bumpalo" +version = "3.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a45a46ab1f2412e53d3a0ade76ffad2025804294569aae387231a0cd6e0899" + +[[package]] +name = "bytecount" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "bzip2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "capnp" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c262726f68118392269a3f7a5546baf51dcfe5cb3c3f0957b502106bf1a065" + +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "codepage-437" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e40c1169585d8d08e5675a39f2fc056cd19a258fc4cba5e3bbf4a9c1026de535" +dependencies = [ + "csv", +] + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "counter" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "878793089e7b461e0f316f45eb6d12ef73c0e5ec5194d372617eb47bb61a85ed" +dependencies = [ + "num-traits", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00d6d2ea26e8b151d99093005cb442fb9a37aeaca582a03ec70946f49ab5ed9" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e5bed1f1c269533fa816a0a5492b3545209a205ca1a54842be180eb63a16a6" +dependencies = [ + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "finch" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f5b421df230ee6000ccb42073103407a8b29c0adc2b5870a346d2fd6281ceec" +dependencies = [ + "bincode", + "capnp", + "memmap", + "murmurhash3", + "ndarray", + "needletail", + "rayon", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "fixedbitset" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e" + +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "getrandom" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getset" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + +[[package]] +name = "js-sys" +version = "0.3.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.119" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" + +[[package]] +name = "log" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lzma-sys" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb4b7c3eddad11d3af9e86c487607d2d2442d185d848575365c4856ba96d619" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "matrixmultiply" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916806ba0031cd542105d916a97c8572e1fa6dd79c9c51e7eb43a09ec2dd84c1" +dependencies = [ + "rawpointer", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "memmap2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "murmurhash3" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" + +[[package]] +name = "ndarray" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c0d5c9540a691d153064dc47a4db2504587a75eae07bf1d73f7a596ebc73c04" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "needletail" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb4c43ebd04b0e776119c8fc3bd4c28178619cd04e1f19f600a4ef0282fa3cc" +dependencies = [ + "buf_redux", + "bytecount", + "bzip2", + "flate2", + "memchr", + "xz2", +] + +[[package]] +name = "niffler" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68c7ffd42bdba05fc9fbfda31283d44c5c8a88fed1a191f68795dba23cc8204b" +dependencies = [ + "cfg-if", + "flate2", + "thiserror", +] + +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + +[[package]] +name = "num-complex" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "747d632c0c558b87dbabbe6a82f3b4ae03720d0646ac5b7b4dae89394be5f2c5" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "piz" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58c75d1c00e6d407e283cc66d9d4fd0985ef1703c761520845b93c4f981bfb65" +dependencies = [ + "chrono", + "codepage-437", + "crc32fast", + "flate2", + "log", + "thiserror", + "twoway", +] + +[[package]] +name = "pkg-config" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" + +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "primal-check" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01419cee72c1a1ca944554e23d83e483e1bccf378753344e881de28b5487511d" +dependencies = [ + "num-integer", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "proptest" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5" +dependencies = [ + "bitflags", + "byteorder", + "lazy_static", + "num-traits", + "quick-error", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", +] + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quote" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scoped-tls" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" + +[[package]] +name = "serde" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" +dependencies = [ + "itoa 1.0.1", + "ryu", + "serde", +] + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "sorted-iter" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1341053f34bb13b5e9590afb7d94b48b48d4b87467ec28e3c238693bb553de" + +[[package]] +name = "sourmash" +version = "0.11.0" +dependencies = [ + "assert_matches", + "az", + "bytecount", + "byteorder", + "cfg-if", + "counter", + "criterion", + "finch", + "fixedbitset", + "getrandom", + "getset", + "log", + "md5", + "memmap2", + "murmurhash3", + "needletail", + "niffler", + "nohash-hasher", + "num-iter", + "once_cell", + "piz", + "primal-check", + "proptest", + "rand", + "rayon", + "serde", + "serde_json", + "tempfile", + "thiserror", + "twox-hash", + "typed-builder", + "vec-collections", + "wasm-bindgen", + "wasm-bindgen-test", + "web-sys", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "twoway" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c57ffb460d7c24cd6eda43694110189030a3d1dfe418416d9468fd1c1d290b47" +dependencies = [ + "memchr", + "unchecked-index", +] + +[[package]] +name = "twox-hash" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee73e6e4924fe940354b8d4d98cad5231175d615cd855b758adc658c0aac6a0" +dependencies = [ + "cfg-if", + "rand", + "static_assertions", +] + +[[package]] +name = "typed-builder" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unchecked-index" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vec-collections" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2390c4dc8ae8640c57d067b1a3d40bc05c124cc6bc7394d761b53435d41b76" +dependencies = [ + "num-traits", + "serde", + "smallvec", + "sorted-iter", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06" +dependencies = [ + "cfg-if", + "serde", + "serde_json", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2" + +[[package]] +name = "wasm-bindgen-test" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45c8d417d87eefa0087e62e3c75ad086be39433449e2961add9a5d9ce5acc2f1" +dependencies = [ + "console_error_panic_hook", + "js-sys", + "scoped-tls", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0e560d44db5e73b69a9757a15512fe7e1ef93ed2061c928871a4025798293dd" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "web-sys" +version = "0.3.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xz2" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c179869f34fc7c01830d3ce7ea2086bc3a07e0d35289b667d0a8bf910258926c" +dependencies = [ + "lzma-sys", +] diff --git a/default.nix b/default.nix new file mode 100644 index 0000000000..89308a3888 --- /dev/null +++ b/default.nix @@ -0,0 +1,10 @@ +(import + ( + fetchTarball { + url = "https://github.com/edolstra/flake-compat/archive/99f1c2157fba4bfe6211a321fd0ee43199025dbf.tar.gz"; + sha256 = "0x2jn3vrawwv9xp15674wjz9pixwjyj3j771izayl962zziivbx2"; + } + ) + { + src = ./.; + }).defaultNix diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000000..f53f3c1900 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,18 @@ +# Documentation on the docs + +We use +[MyST](https://myst-parser.readthedocs.io/en/latest/sphinx/intro.html) +to generate Sphinx doc output from Markdown input. + +## Useful tips and tricks: + +### Linking internally between sections in the docs + +For linking within the sourmash docs, you should use the +[auto-generated header anchors](https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#auto-generated-header-anchors) +provided by MyST. + +You can generate a list of these for a given document with: +``` +myst-anchors -l 3 command-line.md +``` diff --git a/doc/classifying-signatures.md b/doc/classifying-signatures.md index c40be86adf..11cc2c1bbd 100644 --- a/doc/classifying-signatures.md +++ b/doc/classifying-signatures.md @@ -57,7 +57,7 @@ genomes based on greedy partitioning. Essentially, it takes a query metagenome and searches the database for the most highly contained genome; it then subtracts that match from the metagenome, and repeats. At the end it reports how much of the metagenome remains unknown. The -[basic sourmash tutorial](tutorial-basic.md#what-s-in-my-metagenome) +[basic sourmash tutorial](tutorial-basic.md#whats-in-my-metagenome) has some sample output from using gather with GenBank. See Appendix A at the bottom of this page for more technical details. diff --git a/doc/command-line.md b/doc/command-line.md index a3900e9825..88cae603cf 100644 --- a/doc/command-line.md +++ b/doc/command-line.md @@ -119,12 +119,13 @@ information for each command. Most of the commands in sourmash work with **signatures**, which contain information about genomic or proteomic sequences. Each signature contains one or more **sketches**, which are compressed versions of these sequences. Using sourmash, you can search, compare, and analyze these sequences in various ways. -To create a signature with one or more sketches, you use the `sourmash sketch` command. There are three main commands: +To create a signature with one or more sketches, you use the `sourmash sketch` command. There are four main commands: ``` sourmash sketch dna sourmash sketch protein sourmash sketch translate +sourmash sketch fromfile ``` The `sketch dna` command reads in **DNA sequences** and outputs **DNA sketches**. @@ -133,10 +134,14 @@ The `sketch protein` command reads in **protein sequences** and outputs **protei The `sketch translate` command reads in **DNA sequences**, translates them in all six frames, and outputs **protein sketches**. -`sourmash sketch` takes FASTA or FASTQ sequences as input; input data can be -uncompressed, compressed with gzip, or compressed with bzip2. The output -will be one or more JSON signature files that can be used with the other -sourmash commands. +The `sketch fromfile` command takes in a CSV file containing the +locations of genomes and proteomes, and outputs all of the requested +sketches. It is primarily intended for large-scale database construction. + +All of the `sourmash sketch` commands take FASTA or FASTQ sequences as +input; input data can be uncompressed, compressed with gzip, or +compressed with bzip2. The output will be one or more signature files +that can be used by other sourmash commands. Please see [the `sourmash sketch` documentation page](sourmash-sketch.md) for @@ -948,10 +953,7 @@ for an example use case. ## `sourmash signature` subcommands for signature manipulation -These commands manipulate signatures from the command line. Currently -supported subcommands are `merge`, `rename`, `intersect`, -`extract`, `downsample`, `subtract`, `import`, `export`, `info`, -`flatten`, `filter`, `cat`, and `split`. +These commands manipulate signatures from the command line. The signature commands that combine or otherwise have multiple signatures interacting (`merge`, `intersect`, `subtract`) work only on @@ -989,20 +991,25 @@ Display signature details. For example, ``` -sourmash sig describe tests/test-data/47.fa.sig +sourmash sig describe tests/test-data/track_abund/47.fa.sig ``` will display: ``` -signature filename: tests/test-data/47.fa.sig +signature filename: tests/test-data/track_abund/47.fa.sig signature: NC_009665.1 Shewanella baltica OS185, complete genome -source file: 47.fa +source file: podar-ref/47.fa md5: 09a08691ce52952152f0e866a59f6261 -k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=0 +k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=1 size: 5177 +sum hashes: 5292 signature license: CC0 ``` +Here, the `size` is the number of distinct hashes in the sketch, and +`sum_hashes` is the total number of hashes in the sketch, with abundances. +When `track_abundance` is 0, `size` is always the same as `sum_hashes`. + ### `sourmash signature fileinfo` - display a summary of the contents of a sourmash collection Display signature file, database, or collection. @@ -1179,10 +1186,23 @@ will output the intersection of all the hashes in those three files to The `intersect` command flattens all signatures, i.e. the abundances in any signatures will be ignored and the output signature will have -`track_abundance` turned off. +`track_abundance` turned off. The `-A/--abundance-from` argument will +borrow abundances from the specified signature (which will also be added +to the intersection). -Note: `intersect` only creates one output file, with one signature in it, -in the JSON `.sig` format. +### `sourmash signature inflate` - transfer abundances from one signature to others + +Use abundances from one signature to provide abundances on other signatures. + +For example, + +``` +sourmash signature inflate file1.sig file2.sig file3.sig -o inflated.sig +``` +will take the abundances from hashes `file1.sig` and use them to set +the abundances on matching hashes in `file2.sig` and `file3.sig`. +Any hashes that are not present in `file1.sig` will be removed from +`file2.sig` and `file3.sig` as they will now have zero abundance. ### `sourmash signature downsample` - decrease the size of a signature @@ -1379,6 +1399,25 @@ iterating over the signatures in the input file. This can be slow for large collections. Use `--no-rebuild-manifest` to load an existing manifest if it is available. +### `sourmash signature check` - compare picklists and manifests + +Compare picklists and manifests across databases, and optionally output matches +and missing items. + +For example, +``` +sourmash sig check tests/test-data/gather/GCF*.sig \ + --picklist tests/test-data/gather/salmonella-picklist.csv::manifest +``` +will load all of the `GCF` signatures and compare them to the given picklist. +With `-o/--output-missing`, `sig check` will save unmatched elements of the +picklist CSV. With `--save-manifest-matching`, `sig check` will save all +of the _matched_ elements to a manifest file, which can then be used as a +sourmash database. + +`sourmash sig check` is particularly useful when working with large +collections of signatures and identifiers. + ## Advanced command-line usage ### Loading signatures and databases @@ -1551,6 +1590,9 @@ to stdout. All of these save formats can be loaded by sourmash commands. +**We strongly suggest using .zip files to store signatures: they are fast, +small, and fully supported by all the sourmash commands.** + ### Loading many signatures #### Loading signatures within a directory hierarchy @@ -1606,3 +1648,69 @@ sig` commands will output to stdout. So, for example, `sourmash sketch ... -o - | sourmash sig describe -` will describe the signatures that were just created. + +### Using manifests to explicitly refer to collections of files + +(sourmash v4.4.0 and later) + +Manifests are metadata catalogs of signatures that are used for +signature selection and loading. They are used extensively by sourmash +internals to speed up signature selection through picklists and +pattern matching. + +Manifests can _also_ be used externally (via the command-line), and +may be useful for organizing large collections of signatures. They can +be generated with `sourmash sig manifest` as well as `sourmash sig check`. + +Suppose you have a large collection of signature (`.sig` or `.sig.gz` +files) under a directory. You can create a manifest file for them like so: +``` +sourmash sig manifest -o /manifest.csv +``` +and then use the manifest directly for sourmash operations: +``` +sourmash sig fileinfo /manifest.csv +``` +This manifest can be used as a database target for most sourmash +operations - search, gather, etc. Note that manifests for directories +must be placed within (and loaded from) the directory from which the +manifest was generated; the specific manifest filename does not +matter. + +A more advanced and slightly tricky way to use explicit manifest files +is with lists of files. If you create a file with a path list +containing the locations of loadable sourmash collections, you can run +`sourmash sig manifest pathlist.txt -o mf.csv` to generate a manifest +of all of the files. The resulting manifest in `mf.csv` can then be +loaded directly. This is very handy when you have many sourmash +signatures, or large signature files. The tricky part in doing this +is that the manifest will store the same paths listed in the pathlist +file - whether they are relative or absolute paths - and these paths +must be resolvable by sourmash from the current working directory. +This makes explicit manifests built from pathlist files less portable +within or across systems than the other sourmash collections, which +are all relocatable. + +For example, if you create a pathlist file `paths.txt` containing the +following: +``` +/path/to/zipfile.zip +local_directory/some_signature.sig.gz +local_dir2/ +``` +and then run: +``` +sourmash sig manifest paths.txt -o mf.csv +``` +you will be able to use `mf.csv` as a database for `sourmash search` +and `sourmash gather` commands. But, because it contains two relative paths, +you will only be able to use it _from the directory that contains those +two relative paths_. + +**Our advice:** We suggest using zip file collections for most +situations; we primarily recommend using explicit manifests for +situations where you have a **very large** collection of signatures +(1000s or more), and don't want to make multiple copies of signatures +in the collection (as you would have to, with a zipfile). This can be +useful if you want to refer to different subsets of the collection +without making multiple copies in a zip file. diff --git a/doc/conf.py b/doc/conf.py index e98aa84031..bb3f9e4f19 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -37,7 +37,7 @@ 'sphinx.ext.doctest', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', - 'sphinxcontrib.napoleon', + 'sphinx.ext.napoleon', 'nbsphinx', 'IPython.sphinxext.ipython_console_highlighting', 'myst_parser' @@ -302,3 +302,4 @@ #texinfo_no_detailmenu = False autodoc_mock_imports = ["sourmash.minhash"] +myst_heading_anchors = 3 diff --git a/doc/developer.md b/doc/developer.md index f21ee4b03d..1e4d4dbbeb 100644 --- a/doc/developer.md +++ b/doc/developer.md @@ -105,6 +105,11 @@ Code coverage can be viewed interactively at [codecov.io][1]. [1]: https://codecov.io/gh/sourmash-bio/sourmash/ [2]: https://github.com/sourmash-bio/sourmash/actions +## Writing docs. + +Please see [the docs README](README.md) for information on how we +write and build the sourmash docs. + ## Code organization There are three main components in the sourmash repo: diff --git a/doc/index.md b/doc/index.md index 75d83e2f55..9d532088b1 100644 --- a/doc/index.md +++ b/doc/index.md @@ -28,7 +28,7 @@ background information on how and why MinHash works. **Want to migrate to sourmash v4?** sourmash v4 is now available, and has a number of incompatibilites with v2 and v3. Please see -[our migration guide](support.md#migrating-from-sourmash-v3-x-to-sourmash-v4-x)! +[our migration guide](support.md#migrating-from-sourmash-v3x-to-sourmash-v4x)! ---- diff --git a/doc/release-notes/sourmash-2.0.md b/doc/release-notes/sourmash-2.0.md index 8c85bda3ba..f3fd868661 100644 --- a/doc/release-notes/sourmash-2.0.md +++ b/doc/release-notes/sourmash-2.0.md @@ -19,11 +19,11 @@ in the This is a list of substantial new features and functionality in sourmash 2.0. * Added Sequence Bloom Tree search to enable similarity and containment queries on very large collections of signatures in low memory; see `sourmash index`, `sourmash search`, and `sourmash gather` in [the command line documentation](../command-line.md). -* Added "LCA databases" for fast searching of large databases in not-so-low memory; see [`sourmash lca index` in command-line docs](../command-line.md#sourmash-lca-subcommands-for-taxonomic-classification). +* Added "LCA databases" for fast searching of large databases in not-so-low memory; see [`sourmash lca index` in command-line docs](../command-line.md#sourmash-lca-subcommands-for-in-memory-taxonomy-integration). * Created [precomputed databases](../databases.md) for most of GenBank genomes. -* Added taxonomic reporting functionality in the `sourmash lca` submodule - [see command-line docs](../command-line.md#sourmash-lca-subcommands-for-taxonomic-classification). +* Added taxonomic reporting functionality in the `sourmash lca` submodule - [see command-line docs](../command-line.md#sourmash-lca-subcommands-for-in-memory-taxonomy-integration). * Added signature manipulation utilities in the `sourmash signature` submodule - [see command-line docs](../command-line.md#sourmash-signature-subcommands-for-signature-manipulation) -* Introduced new modulo hash or "scaled" signatures for containment analysis; see [Using sourmash: a practical guide](../using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-how-should-i-create-them) and [more details in the Python API examples](../api-example.md#advanced-features-of-sourmash-minhash-objects-scaled-and-num). +* Introduced new modulo hash or "scaled" signatures for containment analysis; see [Using sourmash: a practical guide](../using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) and [more details in the Python API examples](../api-example.md#advanced-features-of-sourmash-minhash-objects---scaled-and-num). * Switched to using JSON instead of YAML for signatures. * Many performance optimizations! * Many more tests! diff --git a/doc/release-notes/sourmash-4.0.md b/doc/release-notes/sourmash-4.0.md index 2b5a780266..681233ad16 100644 --- a/doc/release-notes/sourmash-4.0.md +++ b/doc/release-notes/sourmash-4.0.md @@ -9,7 +9,7 @@ contains many feature improvements and new functionality, as well as many breaking changes with sourmash 2.x and 3.x. Please see -[our migration guide](../support.md#migrating-from-sourmash-v3-x-to-sourmash-v4-x) +[our migration guide](../support.md#migrating-from-sourmash-v3x-to-sourmash-v4x) for guidance on updating to sourmash v4, and post questions about migrating to sourmash 4.0 in the [sourmash issue tracker](https://github.com/dib-lab/sourmash/issues/new). diff --git a/doc/sourmash-sketch.md b/doc/sourmash-sketch.md index 346f77b54e..1dd380d76e 100644 --- a/doc/sourmash-sketch.md +++ b/doc/sourmash-sketch.md @@ -1,5 +1,9 @@ # `sourmash sketch` documentation +```{contents} Contents +:depth: 3 +``` + Most of the commands in sourmash work with **signatures**, which contain information about genomic or proteomic sequences. Each signature contains one or more **sketches**, which are compressed versions of these sequences. Using sourmash, you can search, compare, and analyze these sequences in various ways. To create a signature with one or more sketches, you use the `sourmash sketch` command. There are three main commands: @@ -8,6 +12,7 @@ To create a signature with one or more sketches, you use the `sourmash sketch` c sourmash sketch dna sourmash sketch protein sourmash sketch translate +sourmash sketch fromfile ``` The `sketch dna` command reads in **DNA sequences** and outputs **DNA sketches**. @@ -16,10 +21,14 @@ The `sketch protein` command reads in **protein sequences** and outputs **protei The `sketch translate` command reads in **DNA sequences**, translates them in all six frames, and outputs **protein sketches**. +The `sketch fromfile` command takes in a CSV file containing the +locations of genomes and proteomes, and outputs all of the requested +sketches. It is primarily intended for large-scale database construction. + All `sourmash sketch` commands take FASTA or FASTQ sequences as input; input data can be uncompressed, compressed with gzip, or compressed -with bzip2. The output will be one or more JSON signature files that -can be used with the other sourmash commands. +with bzip2. The output will be one or more signature files that +can be used by other sourmash commands. ## Quickstart @@ -61,6 +70,53 @@ If you want to use different encodings, you can specify them in a few ways; here sourmash sketch protein -p k=25,scaled=500,dayhoff genome.faa ``` +### Translated DNA sketches for metagenomes + +The command +``` +sourmash sketch translate metagenome.fq +``` +will take each read in the FASTQ file and translate the read into +amino acid sequence in all six possible coding frames. No attempt is +made to determine the right frame (but we are working on ways to +determine this; see [orpheum](https://github.com/czbiohub/orpheum)). + +We suggest using this primarily on unassembled metagenome data. For +most microbial genomes, it is both higher quality and more efficient +to first predict the coding sequences (using e.g. prodigal) and then +use `sketch protein` to build signatures. + +### Bulk sketch construction from many files + +The `sourmash sketch fromfile` command is intended for use when +building many signatures as part of a larger workflow. It supports a +variety of options to build new signatures, parallelize +signature construction, and otherwise aid in tracking and managing +database construction. + +The command +``` +sourmash sketch fromfile datasets.csv -p dna -p protein -o database.zip +``` +will ingest a CSV spreadsheet containing (at a minimum) the three columns +`name`, `genome_filename`, and `protein_filename`, and build all of +the signatures requested by the parameter strings. Other columns in +this file will be ignored. + +If no protein, hp, or dayhoff sketches are requested, `protein_filename` +can be empty for a given row; likewise, if no DNA sketches are requested, +`genome_filename` can be empty for a given row. + +Some of the key command-line options supported by `fromfile` are: +* `-o/--output-signatures` will save generated signatures to any of the [standard supported output formats](command-line.md#saving-signatures-more-generally). +* `-o/--output-csv-info` will save a CSV file of input filenames and parameter strings for use with the `sourmash sketch` command line; this can be used to construct signatures in parallel. +* `--already-done` will take a list of existing signatures/databases to check against; signatures with matching names and parameter strings will not be rebuilt. +* `--output-manifest-matching` will output a manifest of already-existing signatures, which can then be used with `sourmash sig cat` to collate signatures across databases; see [using manifests](command-line.md#using-manifests-to-explicitly-refer-to-collections-of-files). (This provides [`sourmash sig check` functionality](command-line.md#sourmash-signature-check---compare-picklists-and-manifests) in `sketch fromfile`.) + +If you would like help and advice on constructing large databases, or +pointers to code for generating the `fromfile` CSV format, please ask +[on the sourmash issue tracker](https://github.com/sourmash-bio/sourmash/issues) or [gitter support channel](https://gitter.im/sourmash-bio/community). + ## More detailed documentation ### Input formats @@ -109,8 +165,8 @@ The `-p` argument to `sourmash sketch` provides parameter strings to sourmash, a A parameter string is a space-delimited collection that can contain one or more fields, comma-separated. * `k=` - create a sketch at this k-mer size; can provide more than one time in a parameter string. Typically `ksize` is between 4 and 100. -* `scaled=` - create a scaled MinHash with k-mers sampled deterministically at 1 per `` value. This controls sketch compression rates and resolution; for example, a 5 Mbp genome sketched with a scaled of 1000 would yield approximately 5,000 k-mers. `scaled` is incompatible with `num`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-how-should-i-create-them) for more information. -* `num=` - create a standard MinHash with no more than `` k-mers kept. This will produce sketches identical to [mash sketches](https://mash.readthedocs.io/en/latest/). `num` is incompatible with `scaled`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-how-should-i-create-them) for more information. +* `scaled=` - create a scaled MinHash with k-mers sampled deterministically at 1 per `` value. This controls sketch compression rates and resolution; for example, a 5 Mbp genome sketched with a scaled of 1000 would yield approximately 5,000 k-mers. `scaled` is incompatible with `num`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) for more information. +* `num=` - create a standard MinHash with no more than `` k-mers kept. This will produce sketches identical to [mash sketches](https://mash.readthedocs.io/en/latest/). `num` is incompatible with `scaled`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) for more information. * `abund` / `noabund` - create abundance-weighted (or not) sketches. See [Classify signatures: Abundance Weighting](classifying-signatures.md#abundance-weighting) for details of how this works. * `dna`, `protein`, `dayhoff`, `hp` - create this kind of sketch. Note that `sourmash sketch dna -p protein` and `sourmash sketch protein -p dna` are invalid; please use `sourmash sketch translate` for the former. @@ -189,7 +245,7 @@ Unfortunately, changing the k-mer size or using different DNA/protein encodings ### Examining the output of `sourmash sketch` -You can use `sourmash sig describe` to get detailed information about the contents of a signature file. This can help if you want to see exactly what a particular `sourmash sketch` command does! +You can use `sourmash sig describe` to get detailed information about the contents of a signature file, and `sourmash sig fileinfo` to get a human-readable summary of the contents. This can help if you want to see exactly what a particular `sourmash sketch` command does! ### Filing issues and asking for help diff --git a/doc/support.md b/doc/support.md index d7a388293f..5e702ddf41 100644 --- a/doc/support.md +++ b/doc/support.md @@ -29,7 +29,7 @@ that depend on sourmash, e.g. specifying `sourmash >=3,<4` for software that is tested with sourmash 3.x. Read on for details! Upgrading major versions (to sourmash 4.0, for example) will often involve -more work; see the [next section](#upgrading-versions) for more +more work; see the [next section](#upgrading-major-versions) for more our suggested process. ### Semantic versioning @@ -148,7 +148,7 @@ If you use sourmash from the command line, there are a few major changes in 4.0 First, **`sourmash compute` is deprecated in favor of [`sourmash sketch`](sourmash-sketch.md)**, which provides quite a bit more flexibility in creating signatures. -Second, **`sourmash index` will now save databases in the Zip format (`.sbt.zip`) instead of the old JSON+subdirectory format** (see [updated docs](command-line.md#sourmash-index-build-an-sbt-index-of-signatures)). You can revert to the old behavior by explicitly specifying the `.sbt.json` filename for output when running `sourmash index`. +Second, **`sourmash index` will now save databases in the Zip format (`.sbt.zip`) instead of the old JSON+subdirectory format** (see [updated docs](command-line.md#sourmash-index---build-an-sbt-index-of-signatures)). You can revert to the old behavior by explicitly specifying the `.sbt.json` filename for output when running `sourmash index`. Third, all sourmash commands that operate on signatures should now be able to directly read from lists of signatures in signature files, SBT databases, LCA databases, directories, and files containing lists of filenames (see [updated docs](command-line.md#advanced-command-line-usage)). diff --git a/doc/tutorials.md b/doc/tutorials.md index 276d560ad7..a3a0277e38 100644 --- a/doc/tutorials.md +++ b/doc/tutorials.md @@ -13,7 +13,7 @@ X and Linux. They require about 5 GB of disk space and 5 GB of RAM. These next three tutorials are all notebooks that you can view, run yourself, or run interactively online via the -[binder](http://mybinder.org) service. +[binder](https://mybinder.org) service. * [An introduction to k-mers for genome comparison and analysis](kmers-and-minhash.md) diff --git a/doc/using-sourmash-a-guide.md b/doc/using-sourmash-a-guide.md index 2b62be021b..bde3827182 100644 --- a/doc/using-sourmash-a-guide.md +++ b/doc/using-sourmash-a-guide.md @@ -189,7 +189,7 @@ built and searched directly from the command line. Reverse indexed or LCA databases are *in-memory* databases that, once loaded from disk, support fast search and gather across 10s of thousands -of signatures. They can be created using `sourmash lca index` ([docs](command-line.md#sourmash-lca-index-build-an-lca-database)) +of signatures. They can be created using `sourmash lca index` ([docs](command-line.md#sourmash-lca-index---build-an-lca-database)) LCA databases are currently stored in JSON files (that can be gzipped). As these files get larger, the time required to load them from disk @@ -198,7 +198,7 @@ can be substantial. LCA databases are also currently (sourmash 2.0-4.0) the only databases that support the inclusion of taxonomic information in the database, and there is an associated collection of commands -[under `sourmash lca`](command.md#sourmash-lca-subcommands-for-taxonomic-classification). +[under `sourmash lca`](command-line.md#sourmash-lca-subcommands-for-in-memory-taxonomy-integration). However, they can also be used as regular indexed databases for search and gather as above. diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000..f86273ccb8 --- /dev/null +++ b/flake.lock @@ -0,0 +1,184 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1642700792, + "narHash": "sha256-XqHrk7hFb+zBvRg6Ghl+AZDq03ov6OshJLiSWOoX5es=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "846b2ae0fc4cc943637d3d1def4454213e203cba", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "mach-nix": { + "inputs": { + "flake-utils": [ + "utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "pypi-deps-db": [ + "pypi-deps-db" + ] + }, + "locked": { + "lastModified": 1643953409, + "narHash": "sha256-CJDg/RpZdUVyI3QIAXUqIoYDl7VkxFtNE4JWih0ucKc=", + "owner": "DavHau", + "repo": "mach-nix", + "rev": "fe5255e6fd8df57e9507b7af82fc59dda9e9ff2b", + "type": "github" + }, + "original": { + "owner": "DavHau", + "ref": "3.4.0", + "repo": "mach-nix", + "type": "github" + } + }, + "naersk": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1639947939, + "narHash": "sha256-pGsM8haJadVP80GFq4xhnSpNitYNQpaXk4cnA796Cso=", + "owner": "nix-community", + "repo": "naersk", + "rev": "2fc8ce9d3c025d59fee349c1f80be9785049d653", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "naersk", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1645937171, + "narHash": "sha256-n9f9GZBNMe8UMhcgmmaXNObkH01jjgp7INMrUgBgcy4=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "22dc22f8cedc58fcb11afe1acb08e9999e78be9c", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1643805626, + "narHash": "sha256-AXLDVMG+UaAGsGSpOtQHPIKB+IZ0KSd9WS77aanGzgc=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "554d2d8aa25b6e583575459c297ec23750adb6cb", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-unstable", + "type": "indirect" + } + }, + "pypi-deps-db": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs_2", + "pypi-deps-db": "pypi-deps-db_2" + }, + "locked": { + "lastModified": 1643953409, + "narHash": "sha256-CJDg/RpZdUVyI3QIAXUqIoYDl7VkxFtNE4JWih0ucKc=", + "owner": "DavHau", + "repo": "mach-nix", + "rev": "fe5255e6fd8df57e9507b7af82fc59dda9e9ff2b", + "type": "github" + }, + "original": { + "owner": "DavHau", + "ref": "3.4.0", + "repo": "mach-nix", + "type": "github" + } + }, + "pypi-deps-db_2": { + "flake": false, + "locked": { + "lastModified": 1643877077, + "narHash": "sha256-jv8pIvRFTP919GybOxXE5TfOkrjTbdo9QiCO1TD3ZaY=", + "owner": "DavHau", + "repo": "pypi-deps-db", + "rev": "da53397f0b782b0b18deb72ef8e0fb5aa7c98aa3", + "type": "github" + }, + "original": { + "owner": "DavHau", + "repo": "pypi-deps-db", + "type": "github" + } + }, + "root": { + "inputs": { + "mach-nix": "mach-nix", + "naersk": "naersk", + "nixpkgs": "nixpkgs", + "pypi-deps-db": "pypi-deps-db", + "rust-overlay": "rust-overlay", + "utils": "utils" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": [ + "utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1645928338, + "narHash": "sha256-pNbkG19Nb4QTNRCIWwxv06JKKJNCUrDzgRrriEd7W1A=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "4f6e6588b07427cd8ddc99b664bf0fab02799804", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "utils": { + "locked": { + "lastModified": 1644229661, + "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000..9ec1cd8300 --- /dev/null +++ b/flake.nix @@ -0,0 +1,126 @@ +{ + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + utils.url = "github:numtide/flake-utils"; + + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "utils"; + }; + }; + + naersk = { + url = "github:nix-community/naersk"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "utils"; + }; + }; + + mach-nix = { + url = "github:DavHau/mach-nix/3.4.0"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "utils"; + inputs.pypi-deps-db.follows = "pypi-deps-db"; + }; + + pypi-deps-db = { + url = "github:DavHau/mach-nix/3.4.0"; + }; + }; + + outputs = { self, nixpkgs, naersk, rust-overlay, mach-nix, pypi-deps-db, utils }: + utils.lib.eachDefaultSystem (system: + let + overlays = [ (import rust-overlay) ]; + pkgs = import nixpkgs { + inherit system overlays; + }; + rustVersion = pkgs.rust-bin.stable.latest.default.override { + #extensions = [ "rust-src" ]; + #targets = [ "x86_64-unknown-linux-musl" ]; + targets = [ "wasm32-wasi" "wasm32-unknown-unknown" "wasm32-unknown-emscripten" ]; + }; + rustPlatform = pkgs.makeRustPlatform { + cargo = rustVersion; + rustc = rustVersion; + }; + naersk-lib = naersk.lib."${system}".override { + cargo = rustPlatform.rust.cargo; + rustc = rustPlatform.rust.rustc; + }; + + python = "python39"; + mach-nix-wrapper = import mach-nix { inherit pkgs python; }; + in + + with pkgs; + { + packages = { + lib = naersk-lib.buildPackage { + pname = "libsourmash"; + root = ./.; + copyLibs = true; + }; + sourmash = mach-nix-wrapper.buildPythonPackage { + src = ./.; + version = "4.3.0"; + requirementsExtra = '' + setuptools >= 48, <60 + milksnake + setuptools_scm[toml] >= 4, <6 + ''; + SETUPTOOLS_SCM_PRETEND_VERSION = "4.3.0"; + DYLD_LIBRARY_PATH = "${self.packages.${system}.lib}/lib"; + NO_BUILD = "1"; + }; + }; + + defaultPackage = self.packages.${system}.sourmash; + + devShell = mkShell { + nativeBuildInputs = [ + clang_13 + ]; + + buildInputs = [ + rustPlatform.rust.cargo + openssl + pkgconfig + + git + stdenv.cc.cc.lib + (python310.withPackages (ps: with ps; [ virtualenv tox setuptools ])) + (python39.withPackages (ps: with ps; [ virtualenv setuptools ])) + (python38.withPackages (ps: with ps; [ virtualenv setuptools ])) + + rust-cbindgen + + wasmtime + wasm-pack + nodejs-16_x + + py-spy + heaptrack + cargo-watch + cargo-limit + cargo-outdated + cargo-udeps + nixpkgs-fmt + + llvmPackages_13.libclang + llvmPackages_13.libcxxClang + ]; + + BINDGEN_EXTRA_CLANG_ARGS = "-isystem ${llvmPackages_13.libclang.lib}/lib/clang/${lib.getVersion clang}/include"; + LIBCLANG_PATH = "${llvmPackages_13.libclang.lib}/lib"; + LD_LIBRARY_PATH = "${stdenv.cc.cc.lib}/lib64:$LD_LIBRARY_PATH"; + + # workaround for https://github.com/NixOS/nixpkgs/blob/48dfc9fa97d762bce28cc8372a2dd3805d14c633/doc/languages-frameworks/python.section.md#python-setuppy-bdist_wheel-cannot-create-whl + SOURCE_DATE_EPOCH = 315532800; # 1980 + }; + }); +} diff --git a/nix/rust.nix b/nix/rust.nix deleted file mode 100644 index 0938a178a2..0000000000 --- a/nix/rust.nix +++ /dev/null @@ -1,15 +0,0 @@ -# nix/rust.nix -{ sources ? import ./sources.nix }: -let - pkgs = - import sources.nixpkgs { overlays = [ (import sources.rust-overlay) ]; }; - rustVersion = pkgs.rust-bin.stable.latest.default.override { - #extensions = [ "rust-src" ]; - #targets = [ "x86_64-unknown-linux-musl" ]; - targets = [ "wasm32-wasi" "wasm32-unknown-unknown" ]; - }; -in -pkgs.makeRustPlatform { - cargo = rustVersion; - rustc = rustVersion; -} diff --git a/nix/sources.json b/nix/sources.json deleted file mode 100644 index dd5dac1531..0000000000 --- a/nix/sources.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "niv": { - "branch": "master", - "description": "Easy dependency management for Nix projects", - "homepage": "https://github.com/nmattia/niv", - "owner": "nmattia", - "repo": "niv", - "rev": "9cb7ef336bb71fd1ca84fc7f2dff15ef4b033f2a", - "sha256": "1ajyqr8zka1zlb25jx1v4xys3zqmdy3prbm1vxlid6ah27a8qnzh", - "type": "tarball", - "url": "https://github.com/nmattia/niv/archive/9cb7ef336bb71fd1ca84fc7f2dff15ef4b033f2a.tar.gz", - "url_template": "https://github.com///archive/.tar.gz" - }, - "nixpkgs": { - "branch": "nixpkgs-unstable", - "description": "Nix Packages collection", - "homepage": "", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "1882c6b7368fd284ad01b0a5b5601ef136321292", - "sha256": "0zg7ak2mcmwzi2kg29g4v9fvbvs0viykjsg2pwaphm1fi13s7s0i", - "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/1882c6b7368fd284ad01b0a5b5601ef136321292.tar.gz", - "url_template": "https://github.com///archive/.tar.gz" - }, - "rust-overlay": { - "branch": "master", - "description": null, - "homepage": null, - "owner": "oxalica", - "repo": "rust-overlay", - "rev": "14c48021a9a5fe6ea8ae6b21c15caa106afa9d19", - "sha256": "009nlf6if5nrkk9sl25n3ahh8l9bfmfbs3d1l8n4rb92hq2sdvjd", - "type": "tarball", - "url": "https://github.com/oxalica/rust-overlay/archive/14c48021a9a5fe6ea8ae6b21c15caa106afa9d19.tar.gz", - "url_template": "https://github.com///archive/.tar.gz" - } -} diff --git a/nix/sources.nix b/nix/sources.nix deleted file mode 100644 index 1938409ddd..0000000000 --- a/nix/sources.nix +++ /dev/null @@ -1,174 +0,0 @@ -# This file has been generated by Niv. - -let - - # - # The fetchers. fetch_ fetches specs of type . - # - - fetch_file = pkgs: name: spec: - let - name' = sanitizeName name + "-src"; - in - if spec.builtin or true then - builtins_fetchurl { inherit (spec) url sha256; name = name'; } - else - pkgs.fetchurl { inherit (spec) url sha256; name = name'; }; - - fetch_tarball = pkgs: name: spec: - let - name' = sanitizeName name + "-src"; - in - if spec.builtin or true then - builtins_fetchTarball { name = name'; inherit (spec) url sha256; } - else - pkgs.fetchzip { name = name'; inherit (spec) url sha256; }; - - fetch_git = name: spec: - let - ref = - if spec ? ref then spec.ref else - if spec ? branch then "refs/heads/${spec.branch}" else - if spec ? tag then "refs/tags/${spec.tag}" else - abort "In git source '${name}': Please specify `ref`, `tag` or `branch`!"; - in - builtins.fetchGit { url = spec.repo; inherit (spec) rev; inherit ref; }; - - fetch_local = spec: spec.path; - - fetch_builtin-tarball = name: throw - ''[${name}] The niv type "builtin-tarball" is deprecated. You should instead use `builtin = true`. - $ niv modify ${name} -a type=tarball -a builtin=true''; - - fetch_builtin-url = name: throw - ''[${name}] The niv type "builtin-url" will soon be deprecated. You should instead use `builtin = true`. - $ niv modify ${name} -a type=file -a builtin=true''; - - # - # Various helpers - # - - # https://github.com/NixOS/nixpkgs/pull/83241/files#diff-c6f540a4f3bfa4b0e8b6bafd4cd54e8bR695 - sanitizeName = name: - ( - concatMapStrings (s: if builtins.isList s then "-" else s) - ( - builtins.split "[^[:alnum:]+._?=-]+" - ((x: builtins.elemAt (builtins.match "\\.*(.*)" x) 0) name) - ) - ); - - # The set of packages used when specs are fetched using non-builtins. - mkPkgs = sources: system: - let - sourcesNixpkgs = - import (builtins_fetchTarball { inherit (sources.nixpkgs) url sha256; }) { inherit system; }; - hasNixpkgsPath = builtins.any (x: x.prefix == "nixpkgs") builtins.nixPath; - hasThisAsNixpkgsPath = == ./.; - in - if builtins.hasAttr "nixpkgs" sources - then sourcesNixpkgs - else if hasNixpkgsPath && ! hasThisAsNixpkgsPath then - import {} - else - abort - '' - Please specify either (through -I or NIX_PATH=nixpkgs=...) or - add a package called "nixpkgs" to your sources.json. - ''; - - # The actual fetching function. - fetch = pkgs: name: spec: - - if ! builtins.hasAttr "type" spec then - abort "ERROR: niv spec ${name} does not have a 'type' attribute" - else if spec.type == "file" then fetch_file pkgs name spec - else if spec.type == "tarball" then fetch_tarball pkgs name spec - else if spec.type == "git" then fetch_git name spec - else if spec.type == "local" then fetch_local spec - else if spec.type == "builtin-tarball" then fetch_builtin-tarball name - else if spec.type == "builtin-url" then fetch_builtin-url name - else - abort "ERROR: niv spec ${name} has unknown type ${builtins.toJSON spec.type}"; - - # If the environment variable NIV_OVERRIDE_${name} is set, then use - # the path directly as opposed to the fetched source. - replace = name: drv: - let - saneName = stringAsChars (c: if isNull (builtins.match "[a-zA-Z0-9]" c) then "_" else c) name; - ersatz = builtins.getEnv "NIV_OVERRIDE_${saneName}"; - in - if ersatz == "" then drv else - # this turns the string into an actual Nix path (for both absolute and - # relative paths) - if builtins.substring 0 1 ersatz == "/" then /. + ersatz else /. + builtins.getEnv "PWD" + "/${ersatz}"; - - # Ports of functions for older nix versions - - # a Nix version of mapAttrs if the built-in doesn't exist - mapAttrs = builtins.mapAttrs or ( - f: set: with builtins; - listToAttrs (map (attr: { name = attr; value = f attr set.${attr}; }) (attrNames set)) - ); - - # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 - range = first: last: if first > last then [] else builtins.genList (n: first + n) (last - first + 1); - - # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 - stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); - - # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 - stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); - concatMapStrings = f: list: concatStrings (map f list); - concatStrings = builtins.concatStringsSep ""; - - # https://github.com/NixOS/nixpkgs/blob/8a9f58a375c401b96da862d969f66429def1d118/lib/attrsets.nix#L331 - optionalAttrs = cond: as: if cond then as else {}; - - # fetchTarball version that is compatible between all the versions of Nix - builtins_fetchTarball = { url, name ? null, sha256 }@attrs: - let - inherit (builtins) lessThan nixVersion fetchTarball; - in - if lessThan nixVersion "1.12" then - fetchTarball ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) - else - fetchTarball attrs; - - # fetchurl version that is compatible between all the versions of Nix - builtins_fetchurl = { url, name ? null, sha256 }@attrs: - let - inherit (builtins) lessThan nixVersion fetchurl; - in - if lessThan nixVersion "1.12" then - fetchurl ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) - else - fetchurl attrs; - - # Create the final "sources" from the config - mkSources = config: - mapAttrs ( - name: spec: - if builtins.hasAttr "outPath" spec - then abort - "The values in sources.json should not have an 'outPath' attribute" - else - spec // { outPath = replace name (fetch config.pkgs name spec); } - ) config.sources; - - # The "config" used by the fetchers - mkConfig = - { sourcesFile ? if builtins.pathExists ./sources.json then ./sources.json else null - , sources ? if isNull sourcesFile then {} else builtins.fromJSON (builtins.readFile sourcesFile) - , system ? builtins.currentSystem - , pkgs ? mkPkgs sources system - }: rec { - # The sources, i.e. the attribute set of spec name to spec - inherit sources; - - # The "pkgs" (evaluated nixpkgs) to use for e.g. non-builtin fetchers - inherit pkgs; - }; - -in -mkSources (mkConfig {}) // { __functor = _: settings: mkSources (mkConfig settings); } diff --git a/setup.cfg b/setup.cfg index 37e5444e52..bd3aeea86d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,13 +64,14 @@ demo = jupyter_client ipython doc = - sphinx - myst-parser>=0.13.7,<0.15.0 + sphinx>=4.4.0,<5 + myst-parser==0.17.0 + Jinja2==3.0.3 alabaster sphinxcontrib-napoleon nbsphinx ipython - docutils>=0.17.1 + docutils>=0.17.1,<0.18.0 storage = ipfshttpclient>=0.4.13 redis diff --git a/setup.py b/setup.py index 08d7dbc8f8..9384aba4b4 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,24 @@ DEBUG_BUILD = os.environ.get("SOURMASH_DEBUG") == "1" +NO_BUILD = os.environ.get("NO_BUILD") == "1" + + +def find_dylib(name, paths): + to_find = None + if sys.platform == 'darwin': + to_find = f'lib{name}.dylib' + elif sys.platform == 'win32': + to_find = f'{name}.dll' + else: + to_find = f'lib{name}.so' + + for path in paths.split(":"): + for filename in os.listdir(path): + if filename == to_find: + return os.path.join(path, filename) + + raise LookupError('dylib %r not found' % name) def build_native(spec): @@ -18,15 +36,21 @@ def build_native(spec): cmd.append("--release") target = "release" - build = spec.add_external_build(cmd=cmd, path=".") + if NO_BUILD: + dylib = lambda: find_dylib("sourmash", os.environ["DYLD_LIBRARY_PATH"]) + header_filename = lambda: "include/sourmash.h" + else: + build = spec.add_external_build(cmd=cmd, path=".") + dylib=lambda: build.find_dylib("sourmash", in_path="target/%s" % target) + header_filename=lambda: build.find_header("sourmash.h", in_path="include") rtld_flags = ["NOW"] if sys.platform == "darwin": rtld_flags.append("NODELETE") spec.add_cffi_module( module_path="sourmash._lowlevel", - dylib=lambda: build.find_dylib("sourmash", in_path="target/%s" % target), - header_filename=lambda: build.find_header("sourmash.h", in_path="include"), + dylib=dylib, + header_filename=header_filename, rtld_flags=rtld_flags, ) diff --git a/shell.nix b/shell.nix index f3c250cada..47458ad8c0 100644 --- a/shell.nix +++ b/shell.nix @@ -1,46 +1,10 @@ -let - sources = import ./nix/sources.nix; - rustPlatform = import ./nix/rust.nix { inherit sources; }; - pkgs = import sources.nixpkgs { overlays = [ (import sources.rust-overlay) ]; }; -in - with pkgs; - - pkgs.mkShell { - nativeBuildInputs = [ - clang_13 - ]; - - buildInputs = [ - rustPlatform.rust.cargo - openssl - pkg-config - - git - stdenv.cc.cc.lib - (python310.withPackages(ps: with ps; [ virtualenv tox setuptools ])) - (python39.withPackages(ps: with ps; [ virtualenv setuptools ])) - (python38.withPackages(ps: with ps; [ virtualenv setuptools ])) - - rust-cbindgen - - wasmtime - wasm-pack - nodejs-16_x - - py-spy - heaptrack - cargo-watch - cargo-limit - cargo-udeps - - llvmPackages_13.libclang - llvmPackages_13.libcxxClang - ]; - - BINDGEN_EXTRA_CLANG_ARGS = "-isystem ${llvmPackages_13.libclang.lib}/lib/clang/${lib.getVersion clang}/include"; - LIBCLANG_PATH = "${llvmPackages_13.libclang.lib}/lib"; - LD_LIBRARY_PATH = "${stdenv.cc.cc.lib}/lib64:$LD_LIBRARY_PATH"; - - # workaround for https://github.com/NixOS/nixpkgs/blob/48dfc9fa97d762bce28cc8372a2dd3805d14c633/doc/languages-frameworks/python.section.md#python-setuppy-bdist_wheel-cannot-create-whl - SOURCE_DATE_EPOCH = 315532800; # 1980 - } +(import + ( + fetchTarball { + url = "https://github.com/edolstra/flake-compat/archive/99f1c2157fba4bfe6211a321fd0ee43199025dbf.tar.gz"; + sha256 = "0x2jn3vrawwv9xp15674wjz9pixwjyj3j771izayl962zziivbx2"; + } + ) + { + src = ./.; + }).shellNix diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 0efe6dc1c2..736e506c20 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -42,7 +42,7 @@ serde = { version = "1.0.110", features = ["derive"] } serde_json = "1.0.53" primal-check = "0.3.1" thiserror = "1.0" -typed-builder = "0.9.0" +typed-builder = "0.10.0" twox-hash = "1.6.0" vec-collections = "0.3.4" piz = "0.4.0" diff --git a/src/sourmash/cli/sig/__init__.py b/src/sourmash/cli/sig/__init__.py index 52ac549401..4bb956e7b3 100644 --- a/src/sourmash/cli/sig/__init__.py +++ b/src/sourmash/cli/sig/__init__.py @@ -15,7 +15,9 @@ from . import fileinfo as summarize from . import grep from . import kmers +from . import check from . import intersect +from . import inflate from . import manifest from . import merge from . import rename diff --git a/src/sourmash/cli/sig/check.py b/src/sourmash/cli/sig/check.py new file mode 100644 index 0000000000..e218850d19 --- /dev/null +++ b/src/sourmash/cli/sig/check.py @@ -0,0 +1,66 @@ +"""check signature collections against a picklist""" + +usage=""" + + sourmash sig check --picklist ... -o miss.csv -m manifest.csv + +This will check the signature contents of against the given +picklist, optionally outputting the unmatched picklist rows to 'miss.csv' +and optionally outputting a manifest of the matched signatures to +'manifest.csv'. + +By default, 'sig check' requires a pre-existing manifest for collections; +this prevents potentially slow manifest rebuilding. You +can turn this check off with '--no-require-manifest'. + +""" + +from sourmash.cli.utils import (add_moltype_args, add_ksize_arg, + add_picklist_args, add_pattern_args) + + +def subparser(subparsers): + subparser = subparsers.add_parser('check', usage=usage) + subparser.add_argument('signatures', nargs='*') + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-d', '--debug', action='store_true', + help='provide debugging output' + ) + subparser.add_argument( + '-o', '--output-missing', metavar='FILE', + help='output picklist with remaining unmatched entries to this file', + ) + subparser.add_argument( + '-f', '--force', action='store_true', + help='try to load all files as signatures' + ) + subparser.add_argument( + '--from-file', + help='a text file containing a list of files to load signatures from' + ) + subparser.add_argument( + '-m', '--save-manifest-matching', + help='save a manifest of the matching entries to this file.' + ) + subparser.add_argument( + '--fail-if-missing', action='store_true', + help='exit with an error code (-1) if there are any missing picklist values.' + ) + subparser.add_argument( + '--no-require-manifest', + help='do not require a manifest; generate dynamically if needed', + action='store_true' + ) + add_ksize_arg(subparser, 31) + add_moltype_args(subparser) + add_pattern_args(subparser) + add_picklist_args(subparser) + + +def main(args): + import sourmash + return sourmash.sig.__main__.check(args) diff --git a/src/sourmash/cli/sig/describe.py b/src/sourmash/cli/sig/describe.py index ca382732d8..79833da9a8 100644 --- a/src/sourmash/cli/sig/describe.py +++ b/src/sourmash/cli/sig/describe.py @@ -1,7 +1,7 @@ """show details of signature""" from sourmash.cli.utils import (add_moltype_args, add_ksize_arg, - add_picklist_args) + add_picklist_args, add_pattern_args) def subparser(subparsers): @@ -11,6 +11,10 @@ def subparser(subparsers): '-q', '--quiet', action='store_true', help='suppress non-error output' ) + subparser.add_argument( + '-d', '--debug', action='store_true', + help='provide debugging output' + ) subparser.add_argument( '--csv', metavar='FILE', help='output information to a CSV file' @@ -26,6 +30,7 @@ def subparser(subparsers): add_ksize_arg(subparser, 31) add_moltype_args(subparser) add_picklist_args(subparser) + add_pattern_args(subparser) def main(args): diff --git a/src/sourmash/cli/sig/inflate.py b/src/sourmash/cli/sig/inflate.py new file mode 100644 index 0000000000..a467670d93 --- /dev/null +++ b/src/sourmash/cli/sig/inflate.py @@ -0,0 +1,30 @@ +"""borrow abundances from one signature => one or more other signatures""" + +from sourmash.cli.utils import (add_moltype_args, add_ksize_arg, + add_picklist_args) + + +def subparser(subparsers): + subparser = subparsers.add_parser('inflate') + subparser.add_argument('signature_from') + subparser.add_argument('other_sigs', nargs='+') + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-o', '--output', metavar='FILE', default='-', + help='output signature to this file (default stdout)' + ) + subparser.add_argument( + '-f', '--force', action='store_true', + help='try to load all files as signatures' + ) + add_ksize_arg(subparser, 31) + add_moltype_args(subparser) + add_picklist_args(subparser) + + +def main(args): + import sourmash + return sourmash.sig.__main__.inflate(args) diff --git a/src/sourmash/cli/sig/subtract.py b/src/sourmash/cli/sig/subtract.py index 483a6cc027..feeec38d69 100644 --- a/src/sourmash/cli/sig/subtract.py +++ b/src/sourmash/cli/sig/subtract.py @@ -19,6 +19,10 @@ def subparser(subparsers): '--flatten', action='store_true', help='remove abundance from signatures before subtracting' ) + subparser.add_argument( + '-A', '--abundances-from', metavar='FILE', + help='intersect with & take abundances from this signature' + ) add_ksize_arg(subparser, 31) add_moltype_args(subparser) diff --git a/src/sourmash/cli/sketch/__init__.py b/src/sourmash/cli/sketch/__init__.py index 81808fc327..22abf26ed1 100644 --- a/src/sourmash/cli/sketch/__init__.py +++ b/src/sourmash/cli/sketch/__init__.py @@ -10,6 +10,7 @@ from . import protein as aa from . import protein as prot from . import translate +from . import fromfile from ..utils import command_list from argparse import SUPPRESS, RawDescriptionHelpFormatter import os diff --git a/src/sourmash/cli/sketch/dna.py b/src/sourmash/cli/sketch/dna.py index ea4f45358f..1d82f9df65 100644 --- a/src/sourmash/cli/sketch/dna.py +++ b/src/sourmash/cli/sketch/dna.py @@ -38,7 +38,7 @@ def subparser(subparsers): ) subparser.add_argument( '--check-sequence', action='store_true', - help='complain if input sequence is invalid (NOTE: only checks DNA)' + help='complain if input sequence is invalid DNA' ) subparser.add_argument( '-p', '--param-string', default=[], diff --git a/src/sourmash/cli/sketch/fromfile.py b/src/sourmash/cli/sketch/fromfile.py new file mode 100644 index 0000000000..84291b2931 --- /dev/null +++ b/src/sourmash/cli/sketch/fromfile.py @@ -0,0 +1,78 @@ +"""create signatures from a CSV file""" + +usage=""" + + sourmash sketch fromfile --output-signatures -p <...> + +The 'sketch fromfile' command takes in a CSV file with list of names +and filenames to be used for building signatures. It is intended for +batch use, when building large collections of signatures. + +One or more parameter strings must be specified with '-p'. + +One or more existing collections of signatures can be provided via +'--already-done' and already-existing signatures (based on name and +sketch type) will not be recalculated or output. + +If a location is provided via '--output-signatures', signatures will be saved +to that location. + +Please see the 'sketch' documentation for more details: + https://sourmash.readthedocs.io/en/latest/sourmash-sketch.html +""" + +import sourmash +from sourmash.logging import notify, print_results, error + +from sourmash import command_sketch + + +def subparser(subparsers): + subparser = subparsers.add_parser('fromfile', + usage=usage) + subparser.add_argument( + 'csvs', nargs='+', + help="input CSVs providing 'name', 'genome_filename', and 'protein_filename'" + ) + subparser.add_argument( + '-p', '--param-string', default=[], + help='signature parameters to use.', action='append', + ) + subparser.add_argument( + '--already-done', nargs='+', default=[], + help='one or more collections of existing signatures to avoid recalculating' + ) + subparser.add_argument( + '--license', default='CC0', type=str, + help='signature license. Currently only CC0 is supported.' + ) + subparser.add_argument( + '--check-sequence', action='store_true', + help='complain if input sequence is invalid (NOTE: only checks DNA)' + ) + file_args = subparser.add_argument_group('File handling options') + file_args.add_argument( + '-o', '--output-signatures', + help='output computed signatures to this file', + ) + file_args.add_argument( + '--force-output-already-exists', action='store_true', + help='overwrite/append to --output-signatures location' + ) + file_args.add_argument( + '--ignore-missing', action='store_true', + help='proceed with building possible signatures, even if some input files are missing' + ) + file_args.add_argument( + '--output-csv-info', + help='output information about what signatures need to be generated' + ) + file_args.add_argument( + '--output-manifest-matching', + help='output a manifest file of already-existing signatures' + ) + + +def main(args): + import sourmash.command_sketch + return sourmash.command_sketch.fromfile(args) diff --git a/src/sourmash/cli/sketch/protein.py b/src/sourmash/cli/sketch/protein.py index edc199b83c..24324ea905 100644 --- a/src/sourmash/cli/sketch/protein.py +++ b/src/sourmash/cli/sketch/protein.py @@ -36,10 +36,6 @@ def subparser(subparsers): '--license', default='CC0', type=str, help='signature license. Currently only CC0 is supported.' ) - subparser.add_argument( - '--check-sequence', action='store_true', - help='complain if input sequence is invalid' - ) subparser.add_argument( '-p', '--param-string', default=[], help='signature parameters to use.', action='append', diff --git a/src/sourmash/cli/sketch/translate.py b/src/sourmash/cli/sketch/translate.py index 79356bd5a0..df48d4818a 100644 --- a/src/sourmash/cli/sketch/translate.py +++ b/src/sourmash/cli/sketch/translate.py @@ -38,7 +38,7 @@ def subparser(subparsers): ) subparser.add_argument( '--check-sequence', action='store_true', - help='complain if input sequence is invalid' + help='complain if input sequence is invalid DNA' ) subparser.add_argument( '-p', '--param-string', default=[], diff --git a/src/sourmash/command_compute.py b/src/sourmash/command_compute.py index 1dda0bcccd..be0d87db00 100644 --- a/src/sourmash/command_compute.py +++ b/src/sourmash/command_compute.py @@ -14,6 +14,7 @@ from ._lowlevel import ffi, lib DEFAULT_COMPUTE_K = '21,31,51' +DEFAULT_MMHASH_SEED = 42 DEFAULT_LINE_COUNT = 1500 @@ -197,8 +198,13 @@ def _compute_individual(args, signatures_factory): if args.singleton: for n, record in enumerate(screed_iter): sigs = signatures_factory() - add_seq(sigs, record.sequence, - args.input_is_protein, args.check_sequence) + try: + add_seq(sigs, record.sequence, + args.input_is_protein, args.check_sequence) + except ValueError as exc: + error(f"ERROR when reading from '{filename}' - ") + error(str(exc)) + sys.exit(-1) set_sig_name(sigs, filename, name=record.name) save_sigs_to_location(sigs, save_sigs) @@ -211,7 +217,7 @@ def _compute_individual(args, signatures_factory): sigs = signatures_factory() # consume & calculate signatures - notify('... reading sequences from {}', filename) + notify(f'... reading sequences from {filename}') name = None for n, record in enumerate(screed_iter): if n % 10000 == 0: @@ -220,8 +226,13 @@ def _compute_individual(args, signatures_factory): elif args.name_from_first: name = record.name - add_seq(sigs, record.sequence, - args.input_is_protein, args.check_sequence) + try: + add_seq(sigs, record.sequence, + args.input_is_protein, args.check_sequence) + except ValueError as exc: + error(f"ERROR when reading from '{filename}' - ") + error(str(exc)) + sys.exit(-1) notify('...{} {} sequences', filename, n, end='') @@ -328,6 +339,85 @@ def __init__(self, ksizes, seed, protein, dayhoff, hp, dna, num_hashes, track_ab self.track_abundance = track_abundance self.scaled = scaled + @classmethod + def from_manifest_row(cls, row): + "convert a CollectionManifest row into a ComputeParameters object" + is_dna = is_protein = is_dayhoff = is_hp = False + if row['moltype'] == 'DNA': + is_dna = True + elif row['moltype'] == 'protein': + is_protein = True + elif row['moltype'] == 'hp': + is_hp = True + elif row['moltype'] == 'dayhoff': + is_dayhoff = True + else: + assert 0 + + if is_dna: + ksize = row['ksize'] + else: + ksize = row['ksize'] * 3 + + p = cls([ksize], DEFAULT_MMHASH_SEED, is_protein, is_dayhoff, is_hp, is_dna, + row['num'], row['with_abundance'], row['scaled']) + + return p + + def to_param_str(self): + "Convert object to equivalent params str." + pi = [] + + if self.dna: + pi.append("dna") + elif self.protein: + pi.append("protein") + elif self.hp: + pi.append("hp") + elif self.dayhoff: + pi.append("dayhoff") + else: + assert 0 # must be one of the previous + + if self.dna: + kstr = [f"k={k}" for k in self.ksizes] + else: + # for protein, divide ksize by three. + kstr = [f"k={k//3}" for k in self.ksizes] + assert kstr + pi.extend(kstr) + + if self.num_hashes != 0: + pi.append(f"num={self.num_hashes}") + elif self.scaled != 0: + pi.append(f"scaled={self.scaled}") + else: + assert 0 + + if self.track_abundance: + pi.append("abund") + # noabund is default + + if self.seed != DEFAULT_MMHASH_SEED: + pi.append(f"seed={self.seed}") + # self.seed + + return ",".join(pi) + + def __repr__(self): + return f"ComputeParameters({self.ksizes}, {self.seed}, {self.protein}, {self.dayhoff}, {self.hp}, {self.dna}, {self.num_hashes}, {self.track_abundance}, {self.scaled})" + + def __eq__(self, other): + return (self.ksizes == other.ksizes and + self.seed == other.seed and + self.protein == other.protein and + self.dayhoff == other.dayhoff and + self.hp == other.hp and + self.dna == other.dna and + self.num_hashes == other.num_hashes and + self.track_abundance == other.track_abundance and + self.scaled == other.scaled) + @staticmethod def from_args(args): ptr = lib.computeparams_new() @@ -394,6 +484,16 @@ def dna(self): def dna(self, v): return self._methodcall(lib.computeparams_set_dna, v) + @property + def moltype(self): + if self.dna: moltype = 'DNA' + elif self.protein: moltype = 'protein' + elif self.hp: moltype = 'hp' + elif self.dayhoff: moltype = 'dayhoff' + else: assert 0 + + return moltype + @property def num_hashes(self): return self._methodcall(lib.computeparams_num_hashes) diff --git a/src/sourmash/command_sketch.py b/src/sourmash/command_sketch.py index 6b5c1c5b51..dd02dcb8e2 100644 --- a/src/sourmash/command_sketch.py +++ b/src/sourmash/command_sketch.py @@ -2,12 +2,23 @@ Functions implementing the 'sketch' subcommands and related functions. """ import sys +import os +from collections import defaultdict, Counter +import csv +import shlex +import screed + +import sourmash from .signature import SourmashSignature -from .logging import notify, error, set_quiet +from .logging import notify, error, set_quiet, print_results from .command_compute import (_compute_individual, _compute_merged, - ComputeParameters) + ComputeParameters, add_seq, set_sig_name, + DEFAULT_MMHASH_SEED) +from sourmash import sourmash_args from sourmash.sourmash_args import check_scaled_bounds, check_num_bounds +from sourmash.sig.__main__ import _summarize_manifest, _SketchInfo +from sourmash.manifest import CollectionManifest DEFAULTS = dict( dna='k=31,scaled=1000,noabund', @@ -66,14 +77,8 @@ def _parse_params_str(params_str): if len(item) < 6 or item[4] != '=': raise ValueError("seed takes a parameter, e.g. 'seed=42'") params['seed'] = int(item[5:]) - elif item == 'protein': - moltype = 'protein' - elif item == 'dayhoff': - moltype = 'dayhoff' - elif item == 'hp': - moltype = 'hp' - elif item == 'dna': - moltype = 'dna' + elif item in ('protein', 'dayhoff', 'hp', 'dna'): + moltype = item else: raise ValueError(f"unknown component '{item}' in params string") @@ -82,8 +87,7 @@ def _parse_params_str(params_str): class _signatures_for_sketch_factory(object): "Build sigs on demand, based on args input to 'sketch'." - def __init__(self, params_str_list, default_moltype, mult_ksize_by_3): - + def __init__(self, params_str_list, default_moltype): # first, set up defaults per-moltype defaults = {} for moltype, pstr in DEFAULTS.items(): @@ -94,7 +98,7 @@ def __init__(self, params_str_list, default_moltype, mult_ksize_by_3): # next, fill out params_list self.params_list = [] - self.mult_ksize_by_3 = mult_ksize_by_3 + self.mult_ksize_by_3 = True if params_str_list: # parse each params_str passed in, using default_moltype if none @@ -103,21 +107,25 @@ def __init__(self, params_str_list, default_moltype, mult_ksize_by_3): moltype, params = _parse_params_str(params_str) if moltype and moltype != 'dna' and default_moltype == 'dna': raise ValueError(f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'; maybe use 'sketch translate'?") - elif moltype == 'dna' and default_moltype != 'dna': + elif moltype == 'dna' and default_moltype and default_moltype != 'dna': raise ValueError(f"Incompatible sketch type ({default_moltype}) and parameter override ({moltype}) in '{params_str}'") elif moltype is None: + if default_moltype is None: + raise ValueError(f"No default moltype and none specified in param string") moltype = default_moltype self.params_list.append((moltype, params)) else: + if default_moltype is None: + raise ValueError(f"No default moltype and none specified in param string") # no params str? default to a single sig, using default_moltype. self.params_list.append((default_moltype, {})) - def get_compute_params(self): + def get_compute_params(self, *, split_ksizes=False): for moltype, params_d in self.params_list: # get defaults for this moltype from self.defaults: default_params = self.defaults[moltype] - def_seed = default_params.get('seed', 42) + def_seed = default_params.get('seed', DEFAULT_MMHASH_SEED) def_num = default_params.get('num', 0) def_abund = default_params['track_abundance'] def_scaled = default_params.get('scaled', 0) @@ -134,26 +142,33 @@ def get_compute_params(self): if not ksizes: ksizes = def_ksizes - if self.mult_ksize_by_3: + # 'command sketch' adjusts k-mer sizes by 3 if non-DNA sketch. + if self.mult_ksize_by_3 and not def_dna: ksizes = [ k*3 for k in ksizes ] - params_obj = ComputeParameters(ksizes, - params_d.get('seed', def_seed), - def_protein, - def_dayhoff, - def_hp, - def_dna, - params_d.get('num', def_num), - params_d.get('track_abundance', - def_abund), - params_d.get('scaled', def_scaled)) - - yield params_obj - - def __call__(self): + make_param = lambda ksizes: ComputeParameters(ksizes, + params_d.get('seed', def_seed), + def_protein, + def_dayhoff, + def_hp, + def_dna, + params_d.get('num', def_num), + params_d.get('track_abundance', + def_abund), + params_d.get('scaled', def_scaled)) + + if split_ksizes: + for ksize in ksizes: + params_obj = make_param([ksize]) + yield params_obj + else: + params_obj = make_param(ksizes) + yield params_obj + + def __call__(self, *, split_ksizes=False): "Produce a new set of signatures built to match the param strings." sigs = [] - for params in self.get_compute_params(): + for params in self.get_compute_params(split_ksizes=split_ksizes): sig = SourmashSignature.from_params(params) sigs.append(sig) @@ -214,8 +229,7 @@ def dna(args): try: signatures_factory = _signatures_for_sketch_factory(args.param_string, - 'dna', - mult_ksize_by_3=False) + 'dna') except ValueError as e: error(f"Error creating signatures: {str(e)}") sys.exit(-1) @@ -231,6 +245,7 @@ def protein(args): """ # for protein: args.input_is_protein = True + args.check_sequence = False # provide good defaults for dayhoff/hp/protein! if args.dayhoff and args.hp: @@ -244,8 +259,7 @@ def protein(args): try: signatures_factory = _signatures_for_sketch_factory(args.param_string, - moltype, - mult_ksize_by_3=True) + moltype) except ValueError as e: error(f"Error creating signatures: {str(e)}") sys.exit(-1) @@ -274,11 +288,310 @@ def translate(args): try: signatures_factory = _signatures_for_sketch_factory(args.param_string, - moltype, - mult_ksize_by_3=True) + moltype) except ValueError as e: error(f"Error creating signatures: {str(e)}") sys.exit(-1) _add_from_file_to_filenames(args) _execute_sketch(args, signatures_factory) + + +def _compute_sigs(to_build, output, *, check_sequence=False): + "actually build the signatures in 'to_build' and output them to 'output'" + save_sigs = sourmash_args.SaveSignaturesToLocation(output) + save_sigs.open() + + for (name, filename), param_objs in to_build.items(): + assert param_objs + + # now, set up to iterate over sequences. + with screed.open(filename) as screed_iter: + if not screed_iter: + error(f"ERROR: no sequences found in '{filename}'?!") + sys.exit(-1) + + # build the set of empty sigs + sigs = [] + + is_dna = param_objs[0].dna + for p in param_objs: + if p.dna: assert is_dna + sig = SourmashSignature.from_params(p) + sigs.append(sig) + + input_is_protein = not is_dna + + # read sequence records & sketch + notify(f'... reading sequences from {filename}') + for n, record in enumerate(screed_iter): + if n % 10000 == 0: + if n: + notify('\r...{} {}', filename, n, end='') + + try: + add_seq(sigs, record.sequence, input_is_protein, + check_sequence) + except ValueError as exc: + error(f"ERROR when reading from '{filename}' - ") + error(str(exc)) + sys.exit(-1) + + notify('...{} {} sequences', filename, n, end='') + + set_sig_name(sigs, filename, name) + for sig in sigs: + save_sigs.add(sig) + + notify(f'calculated {len(sigs)} signatures for {n+1} sequences in {filename}') + + + save_sigs.close() + notify(f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0.") + + +def _output_csv_info(filename, sigs_to_build): + "output information about what signatures to build, in CSV format" + output_n = 0 + with sourmash_args.FileOutputCSV(filename) as csv_fp: + w = csv.DictWriter(csv_fp, fieldnames=['filename', 'sketchtype', + 'output_index', 'name', + 'param_strs']) + w.writeheader() + + output_n = 0 + for (name, filename), param_objs in sigs_to_build.items(): + param_strs = [] + + # should all be the same! + if param_objs[0].dna: + assert all( ( p.dna for p in param_objs ) ) + sketchtype = "dna" + else: + assert not any( ( p.dna for p in param_objs ) ) + sketchtype = "protein" + + for p in param_objs: + param_strs.append(p.to_param_str()) + + row = dict(filename=filename, sketchtype=sketchtype, + param_strs="-p " + " -p ".join(param_strs), + name=name, output_index=output_n) + + w.writerow(row) + + output_n += 1 + + +def fromfile(args): + if args.license != 'CC0': + error('error: sourmash only supports CC0-licensed signatures. sorry!') + sys.exit(-1) + + if args.output_signatures and os.path.exists(args.output_signatures): + if not args.force_output_already_exists: + error(f"** ERROR: output location '{args.output_signatures}' already exists!") + error(f"** Not overwriting/appending.") + error(f"** Use --force-output-already-exists if you want to overwrite/append.") + sys.exit(-1) + + # now, create the set of desired sketch specs. + try: + # omit a default moltype - must be provided in param string. + sig_factory = _signatures_for_sketch_factory(args.param_string, None) + except ValueError as e: + error(f"Error creating signatures: {str(e)}") + sys.exit(-1) + + # take the signatures factory => convert into a bunch of ComputeParameters + # objects. + build_params = list(sig_factory.get_compute_params(split_ksizes=True)) + + # confirm that they do not adjust seed, which is not supported in + # 'fromfile' b/c we don't store that info in manifests. (see #1849) + for p in build_params: + if p.seed != DEFAULT_MMHASH_SEED: + error("** ERROR: cannot set 'seed' in 'sketch fromfile'") + sys.exit(-1) + + # cross-product all of the names in the input CSV file + # with the sketch spec(s) provided on the command line. + + to_build = defaultdict(list) + all_names = {} + total_rows = 0 + skipped_sigs = 0 + n_missing_name = 0 + n_duplicate_name = 0 + + for csvfile in args.csvs: + with open(csvfile, newline="") as fp: + r = csv.DictReader(fp) + + for row in r: + name = row['name'] + if not name: + n_missing_name += 1 + continue + + genome = row['genome_filename'] + proteome = row['protein_filename'] + total_rows += 1 + + if name in all_names: + n_duplicate_name += 1 + else: + all_names[name] = (genome, proteome) + + fail_exit = False + if n_duplicate_name: + error(f"** ERROR: {n_duplicate_name} entries have duplicate 'name' records. Exiting!") + fail_exit = True + + if n_missing_name: + error(f"** ERROR: {n_missing_name} entries have blank 'name's? Exiting!") + fail_exit = True + + if fail_exit: + sys.exit(-1) + + # load manifests from '--already-done' databases => turn into + # ComputeParameters objects, indexed by name. + + already_done = defaultdict(list) + already_done_rows = [] + for filename in args.already_done: + idx = sourmash.load_file_as_index(filename) + manifest = idx.manifest + assert manifest + + # for each manifest row, + for row in manifest.rows: + name = row['name'] + if name: + # build a ComputeParameters object for later comparison + p = ComputeParameters.from_manifest_row(row) + + # add to list for this name + already_done[name].append(p) + + # matching name? check if we already have sig. if so, store! + if name in all_names: + if p in build_params: + already_done_rows.append(row) + + already_done_manifest = CollectionManifest(already_done_rows) + if args.already_done: + notify(f"Loaded {len(already_done)} pre-existing names from manifest(s)") + notify(f"collected {len(already_done_rows)} rows for already-done signatures.") + + ## now check which are already done and track only those that are + ## need to be done. + + total_sigs = 0 + missing = defaultdict(list) + missing_count = 0 + for name, (genome, proteome) in all_names.items(): + plist = already_done.get(name, []) + + # check list of already done against build parameters + for p in build_params: + total_sigs += 1 + + # does this signature already exist? + if p not in plist: + # nope - figure out genome/proteome needed + filename = genome if p.dna else proteome + filetype = 'genome' if p.dna else 'proteome' + + if filename: + # add to build list + to_build[(name, filename)].append(p) + else: + notify(f"WARNING: fromfile entry '{name}' is missing a {filetype}") + missing[name].append(p) + missing_count += 1 + else: + skipped_sigs += 1 + + ## we now have 'to_build' which contains the things we can build, + ## and 'missing', which contains anything we cannot build. Report! + + notify(f"Read {total_rows} rows, requesting that {total_sigs} signatures be built.") + + if already_done_manifest: + info_d = _summarize_manifest(already_done_manifest) + print_results('---') + print_results("summary of already-done sketches:") + + for ski in info_d['sketch_info']: + mh_type = f"num={ski['num']}" if ski['num'] else f"scaled={ski['scaled']}" + mh_abund = ", abund" if ski['abund'] else "" + + sketch_str = f"{ski['count']} sketches with {ski['moltype']}, k={ski['ksize']}, {mh_type}{mh_abund}" + + print_results(f" {sketch_str: <50} {ski['n_hashes']} total hashes") + + print_results('---') + + if args.output_manifest_matching: + already_done_manifest.write_to_filename(args.output_manifest_matching) + notify(f"output {len(already_done_manifest)} already-done signatures to '{args.output_manifest_matching}' in manifest format.") + + if missing: + error("** ERROR: we cannot build some of the requested signatures.") + error(f"** {missing_count} total signatures (for {len(missing)} names) cannot be built.") + if args.ignore_missing: + error("** (continuing past this error because --ignore-missing was set)") + else: + sys.exit(-1) + + notify(f"** {total_sigs - skipped_sigs} new signatures to build from {len(to_build)} files;") + if not to_build: + notify(f"** Nothing to build. Exiting!") + sys.exit(0) + + if skipped_sigs: + notify(f"** {skipped_sigs} already exist, so skipping those.") + else: + notify(f"** we found no pre-existing signatures that match.") + + ## first, print out a summary of to_build: + + print_results('---') + print_results("summary of sketches to build:") + + counter = Counter() + build_info_d = {} + for filename, param_objs in to_build.items(): + for p in param_objs: + moltype = p.moltype + assert len(p.ksizes) == 1 + ksize = p.ksizes[0] + if not p.dna: ksize //= 3 + + ski = _SketchInfo(ksize=ksize, moltype=p.moltype, + scaled=p.scaled, num=p.num_hashes, + abund=p.track_abundance) + counter[ski] += 1 + + for ski, count in counter.items(): + mh_type = f"num={ski.num}" if ski.num else f"scaled={ski.scaled}" + mh_abund = ", abund" if ski.abund else "" + + sketch_str = f"{count} sketches with {ski.moltype}, k={ski.ksize}, {mh_type}{mh_abund}" + + print_results(f" {sketch_str: <50}") + + print_results('---') + + ## now, onward ho - do we build anything, or output stuff, or just exit? + + if args.output_signatures: # actually compute + _compute_sigs(to_build, args.output_signatures, + check_sequence=args.check_sequence) + + if args.output_csv_info: # output info necessary to construct + _output_csv_info(args.output_csv_info, to_build) + + notify(f"** {total_sigs} total requested; output {total_sigs - skipped_sigs}, skipped {skipped_sigs}") diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py index db8cb00c97..60f1940c36 100644 --- a/src/sourmash/index/__init__.py +++ b/src/sourmash/index/__init__.py @@ -25,10 +25,14 @@ ZipFileLinearIndex - simple on-disk storage of signatures. -class MultiIndex - in-memory storage and selection of signatures from multiple -index objects, using manifests. +MultiIndex - in-memory storage and selection of signatures from multiple +index objects, using manifests. All signatures are kept in memory. + +StandaloneManifestIndex - load manifests directly, and do lazy loading of +signatures on demand. No signatures are kept in memory. LazyLoadedIndex - selection on manifests with loading of index on demand. +(Consider using StandaloneManifestIndex instead.) CounterGather - an ancillary class returned by the 'counter_gather()' method. """ @@ -37,8 +41,7 @@ class MultiIndex - in-memory storage and selection of signatures from multiple import sourmash from abc import abstractmethod, ABC from collections import namedtuple, Counter -import csv -from io import TextIOWrapper +from collections import defaultdict from ..search import make_jaccard_search_query, make_gather_query from ..manifest import CollectionManifest @@ -49,7 +52,12 @@ class MultiIndex - in-memory storage and selection of signatures from multiple IndexSearchResult = namedtuple('Result', 'score, signature, location') class Index(ABC): + # this will be removed soon; see sourmash#1894. is_database = False + + # 'manifest', when set, implies efficient selection and direct + # access to signatures. Signatures may be stored in the manifest + # or loaded on demand from disk depending on the class, however. manifest = None @abstractmethod @@ -71,7 +79,11 @@ def signatures_with_location(self): yield ss, self.location def _signatures_with_internal(self): - """Return an iterator of tuples (ss, location, internal_location). + """Return an iterator of tuples (ss, internal_location). + + Unlike 'signatures_with_location()', this iterator should return + _all_ signatures in the object, not just those that remain after + selection/filtering. This is an internal API for use in generating manifests, and may change without warning. @@ -341,7 +353,7 @@ def select(self, ksize=None, moltype=None, scaled=None, num=None, def select_signature(ss, *, ksize=None, moltype=None, scaled=0, num=0, containment=False, abund=None, picklist=None): - "Check that the given signature matches the specificed requirements." + "Check that the given signature matches the specified requirements." # ksize match? if ksize and ksize != ss.minhash.ksize: return False @@ -600,7 +612,7 @@ def load(cls, location, traverse_yield_all=False, use_manifest=True): use_manifest=use_manifest) def _signatures_with_internal(self): - """Return an iterator of tuples (ss, location, internal_location). + """Return an iterator of tuples (ss, internal_location). Note: does not limit signatures to subsets. """ @@ -615,7 +627,7 @@ def _signatures_with_internal(self): self.traverse_yield_all: fp = zf.open(zipinfo) for ss in load_signatures(fp): - yield ss, zf.filename, zipinfo.filename + yield ss, zipinfo.filename def signatures(self): "Load all signatures in the zip file." @@ -888,14 +900,13 @@ def signatures_with_location(self): yield row['signature'], loc def _signatures_with_internal(self): - """Return an iterator of tuples (ss, parent, location) + """Return an iterator of tuples (ss, location) CTB note: here, 'internal_location' is the source file for the index. This is a special feature of this (in memory) class. """ - parent = self.parent for row in self.manifest.rows: - yield row['signature'], parent, row['internal_location'] + yield row['signature'], row['internal_location'] def __len__(self): @@ -927,8 +938,13 @@ def sigloc_iter(): for ss in idx.signatures(): yield ss, iloc - # build manifest; note, signatures are stored in memory. + # build manifest; note, ALL signatures are stored in memory. # CTB: could do this on demand? + # CTB: should we use get_manifest functionality? + # CTB: note here that the manifest is created by iteration + # *even if it already exists.* This could be changed to be more + # efficient... but for now, use StandaloneManifestIndex if you + # want to avoid this when loading from multiple files. manifest = CollectionManifest.create_manifest(sigloc_iter()) # create! @@ -941,6 +957,8 @@ def load_from_directory(cls, pathname, *, force=False): Takes directory path plus optional boolean 'force'. Attempts to load all files ending in .sig or .sig.gz, by default; if 'force' is True, will attempt to load _all_ files, ignoring errors. + + Will not load anything other than JSON signature files. """ from ..sourmash_args import traverse_find_sigs @@ -958,11 +976,11 @@ def load_from_directory(cls, pathname, *, force=False): rel = os.path.relpath(thisfile, pathname) source_list.append(rel) - except (IOError, sourmash.exceptions.SourmashError): + except (IOError, sourmash.exceptions.SourmashError) as exc: if force: continue # ignore error else: - raise # stop loading! + raise ValueError(exc) # stop loading! # did we load anything? if not, error if not index_list: @@ -1003,8 +1021,8 @@ def load_from_path(cls, pathname, force=False): def load_from_pathlist(cls, filename): """Create a MultiIndex from all files listed in a text file. - Note: this will load signatures from directories and databases, too, - if they are listed in the text file; it uses 'load_file_as_index' + Note: this will attempt to load signatures from each file, + including zip collections, etc; it uses 'load_file_as_index' underneath. """ from ..sourmash_args import (load_pathlist_from_file, @@ -1043,6 +1061,8 @@ class LazyLoadedIndex(Index): from disk every time they are needed (e.g. 'find(...)', 'signatures()'). Wrapper class; signatures dynamically loaded from disk; uses manifests. + + CTB: This may be redundant with StandaloneManifestIndex. """ def __init__(self, filename, manifest): "Create an Index with given filename and manifest." @@ -1135,3 +1155,126 @@ def select(self, **kwargs): new_manifest = manifest.select_to_manifest(**kwargs) return LazyLoadedIndex(self.filename, new_manifest) + + +class StandaloneManifestIndex(Index): + """Load a standalone manifest as an Index. + + This class is useful for the situation where you have a directory + with many signature collections underneath it, and you don't want to load + every collection each time you run sourmash. + + Instead, you can run 'sourmash sig manifest -o mf.csv' to + output a manifest and then use this class to load 'mf.csv' directly. + Sketch type selection, picklists, and pattern matching will all work + directly on the manifest and will load signatures only upon demand. + + One feature of this class is that absolute paths to sketches in + the 'internal_location' field of the manifests will be loaded properly. + This permits manifests to be constructed for various collections of + signatures that reside elsewhere, and not just below a single directory + prefix. + + StandaloneManifestIndex does _not_ store signatures in memory. + + This class overlaps in concept with LazyLoadedIndex and behaves + identically when a manifest contains only rows from a single + on-disk Index object. However, unlike LazyLoadedIndex, this class + can be used to reference multiple on-disk Index objects. + + This class also overlaps in concept with MultiIndex when + MultiIndex.load_from_pathlist is used to load other Index + objects. However, this class does not store any signatures in + memory, unlike MultiIndex. + """ + is_database = True + + def __init__(self, manifest, location, *, prefix=None): + """Create object. 'location' is path of manifest file, 'prefix' is + prepended to signature paths when loading non-abspaths.""" + assert manifest is not None + self.manifest = manifest + self._location = location + self.prefix = prefix + + @classmethod + def load(cls, location, *, prefix=None): + """Load manifest file from given location. + + If prefix is None (default), it is automatically set from dirname. + Set prefix='' to avoid this, or provide an explicit prefix. + """ + if not os.path.isfile(location): + raise ValueError(f"provided manifest location '{location}' is not a file") + + with open(location, newline='') as fp: + m = CollectionManifest.load_from_csv(fp) + + if prefix is None: + prefix = os.path.dirname(location) + + return cls(m, location, prefix=prefix) + + @property + def location(self): + "Return the path to this manifest." + return self._location + + def signatures_with_location(self): + "Return an iterator over all signatures and their locations." + for ss, loc in self._signatures_with_internal(): + yield ss, loc + + def signatures(self): + "Return an iterator over all signatures." + for ss, loc in self._signatures_with_internal(): + yield ss + + def _signatures_with_internal(self): + """Return an iterator over all sigs of (sig, internal_location) + + Note that this is implemented differently from most Index + objects in that it only lists subselected parts of the + manifest, and not the original manifest. This was done out of + convenience: we don't currently have access to the original + manifest in this class. + """ + # collect all internal locations + iloc_to_rows = defaultdict(list) + for row in self.manifest.rows: + iloc = row['internal_location'] + iloc_to_rows[iloc].append(row) + + # iterate over internal locations, selecting relevant sigs + for iloc, iloc_rows in iloc_to_rows.items(): + # prepend with prefix? + if not iloc.startswith('/') and self.prefix: + iloc = os.path.join(self.prefix, iloc) + + sub_mf = CollectionManifest(iloc_rows) + picklist = sub_mf.to_picklist() + + idx = sourmash.load_file_as_index(iloc) + idx = idx.select(picklist=picklist) + for ss in idx.signatures(): + yield ss, iloc + + def __len__(self): + "Number of signatures in this manifest (after any select)." + return len(self.manifest) + + def __bool__(self): + "Is this manifest empty?" + return bool(self.manifest) + + def save(self, *args): + raise NotImplementedError + + def insert(self, *args): + raise NotImplementedError + + def select(self, **kwargs): + "Run 'select' on the manifest." + new_manifest = self.manifest.select_to_manifest(**kwargs) + return StandaloneManifestIndex(new_manifest, self._location, + prefix=self.prefix) diff --git a/src/sourmash/index/revindex.py b/src/sourmash/index/revindex.py index f4346f074c..8951dbc759 100644 --- a/src/sourmash/index/revindex.py +++ b/src/sourmash/index/revindex.py @@ -1,3 +1,7 @@ +""" +RevIndex - a rust-based reverse index by hashes. +""" + import weakref from sourmash.index import Index, IndexSearchResult diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py index fbd2fc8d8b..fb9119def4 100644 --- a/src/sourmash/lca/lca_db.py +++ b/src/sourmash/lca/lca_db.py @@ -58,6 +58,10 @@ class LCA_Database(Index): """ is_database = True + # we set manifest to None to avoid implication of fast on-disk access to + # sketches. This may be revisited later. + manifest = None + def __init__(self, ksize, scaled, moltype='DNA'): self.ksize = int(ksize) self.scaled = int(scaled) @@ -181,8 +185,9 @@ def signatures(self): yield v def _signatures_with_internal(self): + "Return all of the signatures in this LCA database." for idx, ss in self._signatures.items(): - yield ss, self.location, idx + yield ss, idx def select(self, ksize=None, moltype=None, num=0, scaled=0, abund=None, containment=False, picklist=None): diff --git a/src/sourmash/manifest.py b/src/sourmash/manifest.py index ca690a30f7..78c1a139ff 100644 --- a/src/sourmash/manifest.py +++ b/src/sourmash/manifest.py @@ -27,13 +27,25 @@ class CollectionManifest: def __init__(self, rows): "Initialize from an iterable of metadata dictionaries." - self.rows = tuple(rows) + self.rows = () + self._md5_set = set() - # build a fast lookup table for md5sums in particular - md5set = set() + self._add_rows(rows) + + def _add_rows(self, rows): + self.rows += tuple(rows) + + # maintain a fast lookup table for md5sums + md5set = self._md5_set for row in self.rows: md5set.add(row['md5']) - self._md5_set = md5set + + def __iadd__(self, other): + self._add_rows(other.rows) + return self + + def __add__(self, other): + return CollectionManifest(self.rows + other.rows) def __bool__(self): return bool(self.rows) @@ -44,6 +56,11 @@ def __len__(self): def __eq__(self, other): return self.rows == other.rows + @classmethod + def load_from_filename(cls, filename): + with open(filename, newline="") as fp: + return cls.load_from_csv(fp) + @classmethod def load_from_csv(cls, fp): "load a manifest from a CSV file." @@ -80,6 +97,10 @@ def load_from_csv(cls, fp): return cls(manifest_list) + def write_to_filename(self, filename): + with open(filename, "w", newline="") as fp: + return self.write_to_csv(fp, write_header=True) + @classmethod def write_csv_header(cls, fp): "write header for manifest CSV format" diff --git a/src/sourmash/minhash.py b/src/sourmash/minhash.py index 21b065aeb0..5e5a14c4cf 100644 --- a/src/sourmash/minhash.py +++ b/src/sourmash/minhash.py @@ -833,10 +833,15 @@ def to_frozen(self): return new_mh def inflate(self, from_mh): - "return a new MinHash object with abundances taken from 'from_mh'" + """return a new MinHash object with abundances taken from 'from_mh' + + note that this implicitly does an intersection: hashes that have + no abundance in 'from_mh' are set to abundance 0 and removed from + 'self'. + """ if not self.track_abundance and from_mh.track_abundance: orig_abunds = from_mh.hashes - abunds = { h: orig_abunds[h] for h in self.hashes } + abunds = { h: orig_abunds.get(h, 0) for h in self.hashes } abund_mh = from_mh.copy_and_clear() @@ -937,4 +942,4 @@ def __setstate__(self, tup): def __copy__(self): return self - copy = __copy__ \ No newline at end of file + copy = __copy__ diff --git a/src/sourmash/picklist.py b/src/sourmash/picklist.py index f1949942f2..fc306782b6 100644 --- a/src/sourmash/picklist.py +++ b/src/sourmash/picklist.py @@ -62,6 +62,8 @@ def __init__(self, coltype, *, pickfile=None, column_name=None, valid_coltypes.update(self.supported_coltypes) if coltype not in valid_coltypes: raise ValueError(f"invalid picklist column type '{coltype}'") + self.orig_coltype = coltype + self.orig_colname = column_name # if we're using gather or prefetch or manifest, set column_name # automatically (after checks). @@ -226,6 +228,19 @@ def matches_manifest_row(self, row): return True return False + def matched_csv_row(self, row): + """did the given CSV row object match this picklist? + + This is used for examining matches/nomatches to original picklist file. + """ + q = row[self.column_name] + q = self.preprocess_fn(q) + self.n_queries += 1 + + if q in self.found: + return True + return False + def filter(self, it): "yield all signatures in the given iterator that are in the picklist" for ss in it: diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py index 74548264c4..d974c23c91 100644 --- a/src/sourmash/sbt.py +++ b/src/sourmash/sbt.py @@ -189,7 +189,7 @@ def _signatures_with_internal(self): """ for k in self.leaves(): ss = k.data - yield ss, self.location, k._path + yield ss, k._path def select(self, ksize=None, moltype=None, num=0, scaled=0, containment=False, abund=None, picklist=None): @@ -831,8 +831,12 @@ def load(cls, location, *, leaf_loader=None, storage=None, print_version_warning sbt_fn = os.path.join(dirname, sbt_name) if not sbt_fn.endswith('.sbt.json') and tempfile is None: sbt_fn += '.sbt.json' - with open(sbt_fn) as fp: - jnodes = json.load(fp) + + try: + with open(sbt_fn) as fp: + jnodes = json.load(fp) + except NotADirectoryError as exc: + raise ValueError(str(exc)) if tempfile is not None: tempfile.close() diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 5c8106076d..fa46a7d209 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -202,9 +202,10 @@ def describe(args): """ provide basic info on signatures """ - set_quiet(args.quiet) + set_quiet(args.quiet, args.debug) moltype = sourmash_args.calculate_moltype(args) picklist = sourmash_args.load_picklist(args) + pattern_search = sourmash_args.load_include_exclude_db_patterns(args) _extend_signatures_with_from_file(args) # write CSV? @@ -214,11 +215,11 @@ def describe(args): csv_obj = sourmash_args.FileOutputCSV(args.csv) csv_fp = csv_obj.open() - # CTB: might want to switch to sourmash_args.FileOutputCSV here? w = csv.DictWriter(csv_fp, ['signature_file', 'md5', 'ksize', 'moltype', 'num', 'scaled', 'n_hashes', 'seed', - 'with_abundance', 'name', 'filename', 'license'], + 'with_abundance', 'name', 'filename', 'license', + 'sum_hashes'], extrasaction='ignore') w.writeheader() @@ -230,10 +231,12 @@ def describe(args): picklist=picklist, progress=progress, yield_all_files=args.force, - force=args.force) + force=args.force, + pattern=pattern_search) for sig, location in loader: # extract info, write as appropriate. + signature_file = location mh = sig.minhash ksize = mh.ksize moltype = mh.moltype @@ -241,6 +244,7 @@ def describe(args): num = mh.num seed = mh.seed n_hashes = len(mh) + sum_hashes = sum(mh.hashes.values()) with_abundance = 0 if mh.track_abundance: with_abundance = 1 @@ -262,6 +266,7 @@ def describe(args): md5: {md5} k={ksize} molecule={moltype} num={num} scaled={scaled} seed={seed} track_abundance={with_abundance} size: {n_hashes} +sum hashes: {sum_hashes} signature license: {license} ''', **locals()) @@ -282,7 +287,8 @@ def manifest(args): loader = sourmash_args.load_file_as_index(args.location, yield_all_files=args.force) except ValueError as exc: - error(f"Cannot open '{args.location}'.") + error(f"Cannot open '{args.location}' as a sourmash signature collection.") + error("Use -d/--debug for details.") sys.exit(-1) rebuild = True @@ -443,7 +449,7 @@ def intersect(args): """ intersect one or more signatures by taking the intersection of hashes. - This function always removes abundances. + This function always removes abundances unless -A specified. """ set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -468,24 +474,20 @@ def intersect(args): first_sig = sigobj mins = set(sigobj.minhash.hashes) else: - # check signature compatibility -- + # check signature compatibility -- if no ksize/moltype specified + # 'first_sig' may be incompatible with later sigs. if not sigobj.minhash.is_compatible(first_sig.minhash): error("incompatible minhashes; specify -k and/or molecule type.") sys.exit(-1) mins.intersection_update(sigobj.minhash.hashes) - if len(progress) == 0: - error("no signatures to merge!?") - sys.exit(-1) - # forcibly turn off track_abundance, unless --abundances-from set. - if not args.abundances_from: - intersect_mh = first_sig.minhash.copy_and_clear() - intersect_mh.track_abundance = False - intersect_mh.add_many(mins) - intersect_sigobj = sourmash.SourmashSignature(intersect_mh) - else: + intersect_mh = first_sig.minhash.copy_and_clear().flatten() + intersect_mh.add_many(mins) + + # borrow abundances from a signature? + if args.abundances_from: notify(f'loading signature from {args.abundances_from}, keeping abundances') abund_sig = sourmash.load_one_signature(args.abundances_from, ksize=args.ksize, @@ -493,16 +495,10 @@ def intersect(args): if not abund_sig.minhash.track_abundance: error("--track-abundance not set on loaded signature?! exiting.") sys.exit(-1) - intersect_mh = abund_sig.minhash.copy_and_clear() - abund_mins = abund_sig.minhash.hashes - # do one last intersection - mins.intersection_update(abund_mins) - abund_mins = { k: abund_mins[k] for k in mins } - - intersect_mh.set_abundances(abund_mins) - intersect_sigobj = sourmash.SourmashSignature(intersect_mh) + intersect_mh = intersect_mh.inflate(abund_sig.minhash) + intersect_sigobj = sourmash.SourmashSignature(intersect_mh) with FileOutput(args.output, 'wt') as fp: sourmash.save_signatures([intersect_sigobj], fp=fp) @@ -511,6 +507,53 @@ def intersect(args): sourmash_args.report_picklist(args, picklist) +def inflate(args): + """ + inflate one or more other signatures from the first. + """ + set_quiet(args.quiet) + moltype = sourmash_args.calculate_moltype(args) + picklist = sourmash_args.load_picklist(args) + + inflate_sig = sourmash_args.load_query_signature(args.signature_from, + ksize=args.ksize, + select_moltype=moltype) + inflate_from_mh = inflate_sig.minhash + ksize = inflate_from_mh.ksize + moltype = inflate_from_mh.moltype + + if not inflate_from_mh.track_abundance: + error(f"ERROR: signature '{inflate_sig.name}' from ") + error(f"file '{args.signature_from}' has no abundances.") + sys.exit(-1) + + # start loading! + progress = sourmash_args.SignatureLoadingProgress() + loader = sourmash_args.load_many_signatures(args.other_sigs, + ksize=ksize, + moltype=moltype, + picklist=picklist, + progress=progress, + yield_all_files=args.force, + force=args.force) + + with sourmash_args.SaveSignaturesToLocation(args.output) as save_sigs: + for sigobj, sigloc in loader: + inflated_mh = sigobj.minhash.inflate(inflate_from_mh) + inflated_sigobj = sourmash.SourmashSignature(inflated_mh, + name=sigobj.name) + + save_sigs.add(inflated_sigobj) + + if len(progress) == 0: + error("no signatures to inflate!?") + sys.exit(-1) + + notify(f'loaded and intersected {len(save_sigs)} signatures') + if picklist: + sourmash_args.report_picklist(args, picklist) + + def subtract(args): """ subtract one or more signatures from another @@ -521,6 +564,9 @@ def subtract(args): from_sigfile = args.signature_from from_sigobj = sourmash.load_one_signature(from_sigfile, ksize=args.ksize, select_moltype=moltype) + if args.abundances_from: # it's ok to work with abund signatures if -A. + args.flatten = True + from_mh = from_sigobj.minhash if from_mh.track_abundance and not args.flatten: error('Cannot use subtract on signatures with abundance tracking, sorry!') @@ -553,9 +599,22 @@ def subtract(args): error("no signatures to subtract!?") sys.exit(-1) - subtract_mh = from_sigobj.minhash.copy_and_clear() + # build new minhash with new mins + subtract_mh = from_sigobj.minhash.copy_and_clear().flatten() subtract_mh.add_many(subtract_mins) + # borrow abundances from somewhere? + if args.abundances_from: + notify(f'loading signature from {args.abundances_from}, keeping abundances') + abund_sig = sourmash.load_one_signature(args.abundances_from, + ksize=args.ksize, + select_moltype=moltype) + if not abund_sig.minhash.track_abundance: + error("--track-abundance not set on loaded signature?! exiting.") + sys.exit(-1) + + subtract_mh = subtract_mh.inflate(abund_sig.minhash) + subtract_sigobj = sourmash.SourmashSignature(subtract_mh) with FileOutput(args.output, 'wt') as fp: @@ -1135,6 +1194,35 @@ def kmers(args): _SketchInfo = namedtuple('_SketchInfo', 'ksize, moltype, scaled, num, abund') +def _summarize_manifest(manifest): + info_d = {} + + # use a namedtuple to track counts of distinct sketch types and n hashes + total_size = 0 + counter = Counter() + hashcounts = Counter() + for row in manifest.rows: + ski = _SketchInfo(ksize=row['ksize'], moltype=row['moltype'], + scaled=row['scaled'], num=row['num'], + abund=row['with_abundance']) + counter[ski] += 1 + hashcounts[ski] += row['n_hashes'] + total_size += row['n_hashes'] + + # store in info_d + info_d['total_hashes'] = total_size + sketch_info = [] + for ski, count in counter.items(): + sketch_d = dict(ski._asdict()) + sketch_d['count'] = count + sketch_d['n_hashes'] = hashcounts[ski] + sketch_info.append(sketch_d) + info_d['sketch_info'] = sketch_info + + return info_d + + +# NOTE: also aliased as 'summarize' def fileinfo(args): """ provide summary information on the given path (collection, index, etc.) @@ -1151,7 +1239,8 @@ def fileinfo(args): idx = sourmash_args.load_file_as_index(args.path, yield_all_files=args.force) except ValueError: - error(f"Cannot open '{args.path}'.") + error(f"Cannot open '{args.path}' as a sourmash signature collection.") + error("Use -d/--debug for details.") sys.exit(-1) print_bool = lambda x: "yes" if x else "no" @@ -1183,27 +1272,7 @@ def fileinfo(args): notify("** no manifest and cannot be generated; exiting.") sys.exit(0) - # use a namedtuple to track counts of distinct sketch types and n hashes - total_size = 0 - counter = Counter() - hashcounts = Counter() - for row in manifest.rows: - ski = _SketchInfo(ksize=row['ksize'], moltype=row['moltype'], - scaled=row['scaled'], num=row['num'], - abund=row['with_abundance']) - counter[ski] += 1 - hashcounts[ski] += row['n_hashes'] - total_size += row['n_hashes'] - - # store in info_d - info_d['total_hashes'] = total_size - sketch_info = [] - for ski, count in counter.items(): - sketch_d = dict(ski._asdict()) - sketch_d['count'] = count - sketch_d['n_hashes'] = hashcounts[ski] - sketch_info.append(sketch_d) - info_d['sketch_info'] = sketch_info + info_d.update(_summarize_manifest(manifest)) if text_out: print_results(f"total hashes: {info_d['total_hashes']}") @@ -1222,6 +1291,97 @@ def fileinfo(args): print(json.dumps(info_d)) +def check(args): + """ + check signature db(s) against a picklist. + """ + from sourmash.picklist import PickStyle + set_quiet(args.quiet, args.debug) + moltype = sourmash_args.calculate_moltype(args) + picklist = sourmash_args.load_picklist(args) + pattern_search = sourmash_args.load_include_exclude_db_patterns(args) + _extend_signatures_with_from_file(args) + + if not picklist: + error("** No picklist provided?! Exiting.") + sys.exit(-1) + + if picklist.pickstyle == PickStyle.EXCLUDE and args.output_missing: + error("** ERROR: Cannot use an 'exclude' picklist with '-o/--output-missing'") + sys.exit(-1) + + # require manifests? + require_manifest = True + if args.no_require_manifest: + require_manifest = False + debug("sig check: manifest will not be required") + else: + debug("sig check: manifest required") + + total_manifest_rows = [] + + # start loading! + total_rows_examined = 0 + for filename in args.signatures: + idx = sourmash_args.load_file_as_index(filename, + yield_all_files=args.force) + + idx = idx.select(ksize=args.ksize, moltype=moltype) + + if idx.manifest is None and require_manifest: + error(f"ERROR on filename '{filename}'.") + error("sig check requires a manifest by default, but no manifest present.") + error("specify --no-require-manifest to dynamically generate one.") + sys.exit(-1) + + # has manifest, or ok to build (require_manifest=False) - continue! + manifest = sourmash_args.get_manifest(idx, require=True) + manifest_rows = manifest._select(picklist=picklist) + total_rows_examined += len(manifest) + total_manifest_rows += manifest_rows + + notify(f"loaded {total_rows_examined} signatures.") + + sourmash_args.report_picklist(args, picklist) + + # output picklist of non-matching in same format as input picklist + n_missing = len(picklist.pickset - picklist.found) + if args.output_missing and n_missing: + pickfile = picklist.pickfile + + # go through the input file and pick out missing rows. + n_input = 0 + n_output = 0 + with open(pickfile, newline='') as csvfp: + r = csv.DictReader(csvfp) + + with open(args.output_missing, "w", newline='') as outfp: + w = csv.DictWriter(outfp, fieldnames=r.fieldnames) + w.writeheader() + + for row in r: + n_input += 1 + if not picklist.matched_csv_row(row): + n_output += 1 + w.writerow(row) + notify(f"saved {n_output} non-matching rows of {n_input} picklist rows to '{args.output_missing}'") + elif args.output_missing: + notify(f"(no remaining picklist entries; not saving to '{args.output_missing}')") + + # save manifest of matching! + if args.save_manifest_matching and total_manifest_rows: + mf = CollectionManifest(total_manifest_rows) + with open(args.save_manifest_matching, 'w', newline="") as fp: + mf.write_to_csv(fp, write_header=True) + notify(f"wrote {len(mf)} matching manifest rows to '{args.save_manifest_matching}'") + elif args.save_manifest_matching: + notify(f"(not saving matching manifest to '{args.save_manifest_matching}' because no matches)") + + if args.fail_if_missing and n_missing: + error("** ERROR: missing values, and --fail-if-missing requested. Exiting.") + sys.exit(-1) + + def main(arglist=None): args = sourmash.cli.get_parser().parse_args(arglist) submod = getattr(sourmash.cli.sig, args.subcmd) diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index a66709d8d9..dc001013e3 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -364,6 +364,12 @@ def _load_stdin(filename, **kwargs): return db +def _load_standalone_manifest(filename, **kwargs): + from sourmash.index import StandaloneManifestIndex + idx = StandaloneManifestIndex.load(filename) + return idx + + def _multiindex_load_from_pathlist(filename, **kwargs): "Load collection from a list of signature/database files" db = MultiIndex.load_from_pathlist(filename) @@ -416,6 +422,7 @@ def _load_zipfile(filename, **kwargs): # all loader functions, in order. _loader_functions = [ ("load from stdin", _load_stdin), + ("load from standalone manifest", _load_standalone_manifest), ("load from path (file or directory)", _multiindex_load_from_path), ("load from file list", _multiindex_load_from_pathlist), ("load SBT", _load_sbt), @@ -596,7 +603,8 @@ def open(self): return self.fp def close(self): - self.fp.close() + if self.fp is not None: # in case of stdout + self.fp.close() def __enter__(self): return self.open() @@ -758,6 +766,7 @@ def get_manifest(idx, *, require=True, rebuild=False): Retrieve a manifest for this idx, loaded with `load_file_as_index`. If a manifest exists and `rebuild` is False, return the manifest. + Even if a manifest exists and `rebuild` is True, rebuild the manifest. If a manifest does not exist or `rebuild` is True, try to build one. If a manifest cannot be built and `require` is True, error exit. @@ -775,16 +784,10 @@ def get_manifest(idx, *, require=True, rebuild=False): debug_literal(f"get_manifest: no manifest found / rebuild={rebuild}") - # CTB: CollectionManifest.create_manifest wants (ss, iloc). - # so this is an adaptor function! Might want to just change - # what `create_manifest` takes. - def manifest_iloc_iter(idx): - for (ss, loc, iloc) in idx._signatures_with_internal(): - yield ss, iloc - # need to build one... try: - m = CollectionManifest.create_manifest(manifest_iloc_iter(idx), + debug_literal("get_manifest: rebuilding manifest") + m = CollectionManifest.create_manifest(idx._signatures_with_internal(), include_signature=False) debug_literal("get_manifest: rebuilt manifest.") except NotImplementedError: diff --git a/tests/test-data/47.abunds.fa.sig b/tests/test-data/47.abunds.fa.sig deleted file mode 100644 index 74a9f495cb..0000000000 --- a/tests/test-data/47.abunds.fa.sig +++ /dev/null @@ -1 +0,0 @@ -[{"class":"sourmash_signature","email":"","filename":"47.fa","hash_function":"0.murmur64","license":"CC0","name":"NC_009665.1 Shewanella baltica OS185, complete genome","signatures":[{"ksize":31,"max_hash":18446744073709552,"md5sum":"09a08691ce52952152f0e866a59f6261","mins":[2925290528259,7397951382043,9478766578752,26390034908046,31811219567311,36191627174349,39112643786682,46822418898135,47180432856748,60017138985701,60046869099761,65325381835497,73805228897455,74037001801154,75800414195236,81855770871884,83631867214312,86442965329695,89858161015356,90806331781332,95108107091043,97258972896665,109728134835863,111162670259148,113585458770972,116166720583475,121382935674939,125296899385152,141176320451685,141284968207060,141805235471354,147190179068733,149024066888166,153783847123278,157255282423883,160902593196961,162823771630571,166163367169365,174979625787948,175032069345452,182141449646872,187503667710897,191814288543916,192890223167288,195186364664284,196037984804395,197033160819668,203335269479450,204822233856042,209233297808434,210990374921109,214600505227173,216861451361880,217827490079709,224612774123844,227683744949779,228540468204721,228644441858825,228848037454334,235478348737722,240579984608212,245029062906088,248581735034297,251186192464160,258255664267571,258924003299576,265587486568360,269368370464968,274235329360733,287831833139065,293035680952788,294558365931778,295357672887769,303548064148961,303884611876696,306041902505698,307504482597750,309969810458414,316701230964482,316897730501733,318378982650332,318568418472400,318769251839299,319335385321196,324290895668437,335605928681508,339214912158009,341001360876621,349347535027435,351258437206186,360700437330047,367327558614874,373471575728001,374188784960382,380562917179168,384016070832594,386412107814027,389279696836396,393985777467936,395356088720884,396054053894786,399215565767837,399215750881719,411030707384650,414934253467214,423759820188444,430191392037330,431262729545883,437506450440821,438105428895659,438530381378884,439044119332850,444273467310604,449680755457024,450506164772110,457138551847407,457671098462976,461477875054528,473505790593386,481815788294090,487479264340595,489519873805078,494381455384554,495601542123242,500121418104925,502603922576313,506180131137999,506336140549160,516283812540815,518803929727716,536385923460112,536713413896697,537757852470225,538244971589768,540208451183188,540588787405694,542763181011925,549192277958979,550069279000761,553900351455263,554447489693319,559226934389812,561316274253850,569061433009767,578397933313938,578899728742280,582924953100697,583572058199369,589631402773527,595681341596523,615537076898013,626475972546369,632243908286808,639824119626438,645743921515803,648018730756195,654100189449365,668365295793413,670523964406925,671677182717796,671759739945458,676515376899555,677670347980377,684837528099741,687895771489510,693758846688308,694523064126211,697547171219962,698360853391060,698383699159430,699304671955329,703696716274708,706932232475763,708449170262947,726292867622433,726450649964317,727800693698567,728799639190186,734019394597526,735119835330596,737721455578775,738543439712395,741951415758063,748275069435017,750168693442959,763201112060730,763669867104092,763903450865190,767121298622699,767417571203746,770436202573059,771683466150501,772051111454828,772152509572841,787291725467630,798319271383660,802162977380527,806341566938246,813805466325024,815570804752811,816564335333987,817024725405204,817504754626588,821706687072387,826077010431743,828356750400476,831736232379626,843025850509368,843740928711723,845050451776051,852042280696332,857912135260852,871829709114624,873254290207218,875246525542985,889897273652095,894040289596463,897620767964532,904962988643425,905308801557271,906900833647951,909442865612931,912697620927191,913789208155712,916185332282483,917277762192278,917334002968300,919561883055202,920956096920505,929046426661708,930950142910172,933691189676382,934117578798841,936230738064974,938188383682602,941861412444067,942726201014166,945032973428091,947084478373286,948779805509636,951217347666850,955636489177710,961017555998937,961314440978493,964218423186297,968212926455014,968926587713112,969379511837489,972618046502811,974637708612999,980196796037373,980565419407507,983225283458250,987541215674501,992940514834332,996549857630112,998926194132937,1014496787753945,1017704359447639,1020480845863237,1024292399670426,1024634573363382,1028460419483054,1033874047074353,1035843403340873,1037163054983442,1039558325527817,1045088944681707,1045785088974313,1048574231977270,1051002783372661,1056506578664023,1057491059487351,1059437143082343,1059853068042602,1060760398971021,1061967838052170,1066520357980609,1069224019506529,1071759691375436,1072369963153950,1080440645655398,1083957482733017,1085596610204486,1086288713384900,1088705827145973,1089204340626863,1090298523330765,1090505634288396,1093123453947031,1093780160574614,1094807962005299,1096801323900100,1103535113750718,1105423537109674,1110277142974534,1113000955148039,1118646614510530,1119281509125641,1119614160374606,1120783033143617,1129279349995602,1130113935525204,1130881986044393,1132820492214112,1144614443668767,1147223276986948,1147680055727668,1158195764117399,1163159397520386,1163303408022562,1164535774717695,1174386415542665,1182168703505980,1186019430315229,1190006012882786,1191391064481088,1200797929442729,1203248128742846,1208460365112124,1225631809302250,1234453656762891,1234698668275227,1237451114108962,1241245219164313,1241546710850109,1241668290204495,1242418821754022,1243711623939695,1244290020173228,1244346278691061,1245003263018464,1250484435790357,1256754510605581,1264563878337445,1269060350975578,1270154727600023,1271923497273997,1273115659423672,1280559509676354,1282472909138162,1285880210646676,1287498565406779,1291218968991828,1292246474868788,1292487278268025,1296553378083571,1300214247397513,1311078551896352,1311449533649890,1313826623773576,1314579091305857,1322276316890973,1325524051301607,1332430917176015,1332675238905364,1334939013056183,1337600826833551,1339674524726757,1341661245836409,1341942310569850,1344850241954264,1348761416973437,1356904466129199,1369078449955986,1370039456672284,1370854092951821,1372351037556570,1376501003787476,1378471035008080,1383348406006914,1387085462947589,1387385057191781,1388584147493453,1390384276015810,1396964107951550,1401298565016323,1407230822931784,1410486644494794,1410786461048450,1414537954260326,1418743862991832,1420081602859846,1424366051167663,1431140791675340,1438960590550765,1443983103542619,1449677011803774,1457429906997387,1458641089226597,1458716224614631,1459144447544839,1461511802747479,1465076638017898,1465867789405739,1467827467674025,1468663744355213,1476939334625119,1480237325649862,1481088686107013,1483592564337201,1492314148312178,1498457281550692,1499617447616390,1503853002568292,1504262319315651,1505172916296130,1513668920373911,1519648405600154,1520931632741619,1521780633605083,1522237249746592,1522582599941917,1523518586763814,1528071377900249,1529728378502178,1531967467499308,1535306641925593,1535658178776979,1539307118095840,1546368847550532,1548019688923957,1552083355029650,1555637141656241,1555928090783844,1556284449775147,1558324681023092,1560969323307091,1569318833056381,1573222947937990,1584949879718000,1588978430427079,1591204462547614,1598261363578814,1600688746972553,1603093475242546,1605199952752847,1617237167349710,1618568234848372,1619286790649678,1621192910003941,1622309948672121,1628201100274523,1632271494883561,1633815225207084,1634399357702189,1637441524349088,1642637371934077,1643195637784435,1643598557356785,1645422696089427,1645673596073883,1645866259200502,1648716913052297,1652765950688817,1654033476941478,1658743399661231,1659819081077302,1662433005161059,1666913529898081,1679605700468270,1679921198649960,1680089532480362,1683724693448022,1689682641477370,1691706033392643,1696089597402537,1705950022227142,1714068118984789,1717459770518422,1718014979380734,1719690455811654,1734728075132632,1739172733710985,1745324659468599,1747303538361662,1749145577098552,1750021468273833,1750530525839386,1757623281396842,1758538630442116,1761579455667380,1762621869823670,1766019454242846,1766154871452422,1768074570558590,1769506068128510,1770988073934927,1778201561133905,1778315567513725,1780288814569870,1781805678833298,1786019351090790,1787025898307575,1793358709247570,1801404378718274,1802232213372715,1804215890133513,1808123394894591,1808805306365691,1810435102767883,1813163351446427,1818925318022107,1819091566970620,1821246620845572,1825289420275521,1829618500803507,1831822327838518,1832408978761242,1835694527640110,1837046808494825,1840320929072049,1842060817177608,1844561134226776,1845664541012305,1846495837486874,1846814283210937,1848195902901531,1849342199305473,1852955637970413,1853064829868822,1854709332537365,1856791461736081,1860030910962345,1862153320764207,1862209616890144,1867578456400407,1870278489144074,1871450013370760,1880743049410508,1880811582956504,1885064900552256,1888527800896759,1899912419788159,1909513665427200,1909893462067689,1910324702460153,1920155014152585,1923031184773399,1923724551213831,1928488418125995,1935392806238480,1936981590066389,1937494292258243,1941935226774825,1942786308149620,1943419695090025,1947698435893922,1947827395290642,1948000063884420,1951286173673455,1957196594968485,1961156417600790,1968345824207972,1973565525696890,1976759223622041,1977637922131648,1977923456470816,1979621033784766,1979691191211071,1981752378561978,1987210877457747,1993564537623510,1996672784729607,1999133750243675,2011354377485272,2017517839581062,2021272596821928,2024291985865500,2030916441428059,2030943399237635,2035149501864507,2047630125224977,2052394950437991,2059880114534091,2060560658024761,2064432037950349,2083858695302000,2088434760658037,2092860563281190,2095578868362462,2097280377232511,2099121913442760,2104210209064238,2107076373938295,2108459225069649,2111395821264557,2122246048824157,2123504523298871,2125171930737142,2127588293738580,2135890156111278,2147601242872786,2147628766136779,2148277682163663,2148629935713334,2148648462894137,2154050039033300,2154825108832254,2157543511093753,2159292319817060,2159391483345580,2163811550162994,2163911364872485,2164107595577716,2166610246026701,2169130162448361,2169401527323023,2182965638264818,2189902950844361,2191089458213993,2194321556975056,2197255584699767,2197550753498976,2197844428920029,2204899458948058,2207484772689862,2214585329667475,2216291576857764,2218560589085471,2222579004644118,2225440067596925,2231678593259696,2239881880935087,2246651203996116,2249382176770011,2251553784168898,2262343143065292,2269891656332884,2277357511613050,2282851679505524,2284008883123690,2294311150128150,2295851772366195,2296330477067902,2297300047218453,2299522719885844,2300003729256754,2305986746818130,2307128673346491,2309328595812376,2314348683023278,2325178911253636,2337363146012963,2338273922165178,2340650536569632,2341149645621931,2349169861378449,2350865952696907,2355373744763135,2359599974602456,2360988166250281,2364165589013103,2365100930739182,2365101583995089,2368070257601382,2379436665071024,2387981834215976,2391376217204289,2391657757985839,2398709852888712,2400026944838468,2402208725828096,2409550806440554,2410681029165949,2412591449989948,2413992919514685,2416809210551017,2420810333651625,2426508439798144,2431689886658063,2434636409776451,2434817960891416,2439360431069834,2442924938559564,2444743697540746,2447704465950372,2451185988965285,2459766139292236,2466530448132713,2466909570912171,2468169126671752,2469990435969385,2472082629869597,2484069800626695,2484707593134371,2486433068244510,2486783619425529,2489988128759413,2497895029394563,2498928723235105,2502877897637973,2507836460937176,2516300104537043,2529686136078992,2531603179656151,2535379300081535,2540748246632844,2542081767873586,2543849372306944,2545009932051689,2547409441873208,2556506799873846,2556532058925046,2570487229611126,2573496573602154,2573521798941261,2575271828359827,2583040424187016,2583468225494252,2583541506767529,2590282004204866,2594709561160407,2598736648640020,2600008000392449,2601526047213631,2604919231758350,2606628075888049,2607585442845824,2608738783833234,2611080508323464,2611611650962181,2618806127233677,2620375519634887,2623788431218018,2625753537877756,2626910805885551,2633023374568506,2641610347651893,2642503504311045,2651833968467605,2658287974480506,2661391357250546,2662023298318235,2665833107218149,2668521248016496,2671313026437821,2675525837460390,2678336928677512,2680497019271975,2693519224664396,2696615422431379,2697093257227995,2703119946699707,2707831053578465,2708218678481553,2715915477263655,2729224055534831,2731006551655845,2732055421730493,2734700729661142,2738859769218570,2742018183825055,2743081343023861,2743888467937942,2751785297738513,2757079557005164,2758720834995819,2758979243701204,2764250636697336,2766608515295278,2771029262532041,2771167327169082,2774929020087483,2787100655005297,2789215679189400,2794721456334777,2796704110243902,2822847943723684,2823690427545053,2824112773494385,2828383286324950,2832223063283424,2839310794637108,2839525055055156,2840131111024087,2845086895593857,2848124500620503,2850392763711528,2851615637093918,2852640851512226,2858440556030254,2859863407402383,2860695977896818,2868567544019176,2868722569049791,2875394822256464,2875768840498356,2876952463837377,2877071122530509,2881697295591721,2885101058817579,2888998206990875,2889740392149462,2892701240258741,2893195916828713,2893601424052339,2896177891093468,2903470979230250,2904033648694661,2905361464861211,2908444575023598,2911124480176230,2911599257101941,2921971427799899,2927511611173972,2932335002147816,2942563058582163,2946143480195981,2946208695315985,2947035710205399,2948810955001129,2950953756094034,2952664814352903,2956213777269798,2956539890231005,2958358510714643,2959335482526692,2959566715003402,2965352172439193,2965439340704221,2965855909473064,2966360689949309,2973978384800223,2975093256580654,2975414282596751,2977316941548719,2977663445217111,2983750706789338,2984126693118897,2985749744661602,2993577870679042,2995240248615334,2997216190337734,2997475303842149,2998782348202460,3002144816290295,3004601117971759,3005445693257298,3005543539398257,3006825969228148,3007008399287583,3007436553703536,3016063581505292,3024440815482041,3029401793589254,3035917551240430,3037029199949908,3037243067032125,3049610843618123,3052077662817141,3055458832740035,3061176921317878,3072032250423585,3074112617890076,3076040588704705,3079164924470365,3083176156972821,3086123334924126,3089319907683113,3098540992604022,3101693309309556,3103506635288743,3110882600220192,3112903104807973,3120960716235347,3125993645599853,3126462642335525,3127239755462313,3127515727740291,3132674664095953,3136116654223153,3138943638252170,3140104823595207,3153220014750330,3158074449437715,3160234728942373,3164386809673569,3173783571944417,3175607440806275,3180308189083804,3184825950572980,3190439500089370,3191603569657769,3192369183577062,3193298901760522,3193346797759720,3194861056078031,3196931220104868,3201112500492023,3202793460581380,3219171897424954,3219575037594274,3224535741992415,3225652520990690,3226382984631204,3230186294385431,3243947082373306,3244627180010006,3248922800662151,3253156487699363,3255370232763973,3257337304537355,3265267353090335,3272928147712512,3273345746404244,3275046616104436,3275876017733599,3276867668269439,3278668472321042,3278714300330291,3280780868469494,3280794856819360,3285404600033524,3289047034753180,3294550813104021,3298089165637310,3299027911208090,3306709449273253,3309878495036042,3312036573217165,3320827905894255,3323347352904912,3332103278994362,3333652145199727,3335087116356546,3336332604419491,3346073604734971,3346508186700073,3362515004859132,3365198599599379,3365586794581106,3366181769304978,3367829027870594,3369243531861603,3377357612999215,3378502887959344,3379556656256325,3380377839647911,3380683064402177,3381845747007120,3382728295376857,3383075891087465,3383302464154854,3383677243861212,3384640865212142,3390849838258698,3402608390039987,3404656276789459,3405149696809115,3405393044390619,3408568306290700,3409525642139599,3409848562939689,3415139398115166,3415475306791216,3417433407494643,3418693183078260,3419312829124670,3421667659970361,3425688845571110,3430483417325813,3431010040648861,3433786393292948,3433893111687059,3434270543355054,3439118720682675,3443405929340821,3443814552613298,3444188614792340,3444648483822568,3452335392026500,3454079754241547,3458841677994973,3459090134521778,3463857637926984,3466754010283942,3468005713457978,3468306935523998,3469621823753300,3484417521606056,3484887355924665,3485163641925480,3485345404315595,3485684351025169,3486271024140478,3486481363201290,3490218326835149,3493226173405941,3495557286227599,3501438911492802,3502652434217727,3509064061394091,3509072379429744,3523958917267613,3531208508664527,3532553615695946,3535903464263126,3539565874451621,3539901139312850,3540376200850317,3541349497756661,3544819360646120,3549947024705822,3556230614643794,3561873326260814,3567631654480233,3569439133907194,3569585416963919,3580260758329980,3584444323393668,3586259833614913,3598107344839577,3599693059706844,3600497750427469,3609452627397093,3611915615904413,3613882437854401,3624379228154857,3628580997551890,3632115466215379,3633516585258144,3636013168823101,3644503150656777,3645611623206895,3648590415100172,3650411848640853,3651208352182968,3659962641982274,3663160485730853,3665217012698891,3670531861702815,3678142299238288,3679412885698189,3679723249740163,3688280883691690,3688327457349314,3690688866820810,3705003329123112,3718621708258333,3722526932992524,3723634868396071,3726938239845979,3728715138302811,3734192362762123,3737946633507459,3740458701179796,3742193997053523,3746284516790765,3750533866251628,3752117756365521,3756936323992755,3757543406733882,3762486477132181,3762751701280063,3767814128506980,3773610790058654,3776595480654768,3776811730885528,3779767620249001,3786173858770873,3794100680281451,3794855359477272,3797500278748845,3797572839534654,3801836630743327,3806047581097738,3809879441266392,3810799390411918,3821170295134013,3821630362687996,3822296640796331,3823175201590864,3825757657900286,3827560739565438,3831533818899493,3835198273307888,3835558444026950,3837899280987896,3841522241190425,3842446209291097,3856495945466312,3856632581492180,3860116438298861,3862694398476978,3863189469668600,3869850399187705,3871935007496414,3872628172162502,3873324719285632,3878833882038024,3880243619746497,3880529063199350,3882311402640088,3887523678289264,3893564413662650,3899793396857493,3908511683767038,3911082399615065,3917275362273600,3921777467979712,3925362829074370,3928994435189027,3929160579967105,3929583967036139,3934142879673460,3941494939757571,3941525993199884,3942048398609850,3949319172964121,3949404714704001,3951447621965404,3968940236457600,3972341462705556,3973437909773411,3977362201597748,3980515012130917,3982398317594569,3983919240708090,3991304962417620,3996232480056804,4001338100305267,4003000776821491,4010634521845832,4011194441900352,4012917626427041,4018474899910568,4020678940249116,4020893102640326,4023604165179706,4026092267698298,4026688740814878,4034351439123543,4036574510586483,4037588403031850,4039922936524250,4039932863104502,4040392164753436,4041139896587433,4044662871102224,4049703973786608,4051543922389363,4052681338148215,4055364557376134,4062261174287869,4065415697051189,4071633959541762,4079083296648701,4086687744224011,4094432968332287,4097518435638924,4102324633120593,4103314051061542,4114248458913135,4122077681924969,4123781140489537,4132460402529320,4134892291048521,4135928574382122,4138840172908252,4139231196428117,4143693188392502,4150745968454974,4151459023776703,4154752706236746,4156105039099431,4157256439982237,4157669782790617,4166107478621219,4167383773226728,4167492383925201,4168181927338698,4175996866082730,4176623816804364,4183402718643845,4188349160298046,4195677986920473,4198198981311457,4199209071018538,4199346559716278,4199937026193161,4200142600556427,4201374728073667,4210305409366342,4211040452351221,4214771019264212,4225632034684502,4234174233250830,4245383804030219,4245913779845337,4253320341425011,4255227426589464,4257521149254292,4264631484544901,4266389306044662,4267533238822472,4275580175408244,4289359014707288,4295704004397925,4300982165488644,4301841948469269,4302934567016197,4305626904573311,4312242836327385,4316505852417381,4320671705521862,4320948899808113,4329077654347637,4330412836235513,4332389830901236,4332584479772575,4335310014667247,4337120565239620,4340454370272718,4345864924315697,4346218796838410,4352222894063447,4354953199641044,4356160430961353,4357880027267574,4367125746948875,4369068622168572,4371927952461526,4374872954793723,4377274169565988,4382694020890333,4383134213334340,4387047294147332,4388622108830575,4393722386432944,4394935128907327,4405185846773600,4409442890926800,4418995118878418,4419676464130546,4421125787216995,4425233123315500,4430853131113411,4433648774646017,4434408204046953,4435949176623047,4437545167361411,4438317402421127,4438817177523704,4443045313246981,4444824473102486,4445273578631201,4454575698901762,4458728897870062,4461351844352989,4462449521144694,4464228910638153,4467196354294999,4474000580782956,4474046785524256,4474574743148389,4481408122328948,4486507708773899,4495575342843287,4496415696378542,4501876131664304,4508459014863643,4517133177825796,4525521793503217,4528035909846301,4532427908015373,4535830530899372,4538230925800141,4539716842839588,4551316076842289,4552805172103424,4561703129830313,4562273139429756,4562889929649950,4572477816005275,4573883165195550,4577060828696911,4577457918883209,4577958025008691,4580199770736665,4582265498314074,4585370103466467,4595548152987374,4602434211109390,4607450449118254,4620004533537053,4621113398888425,4622580510893583,4630992980149087,4634141648884370,4640301477916105,4641399081470667,4641458089041250,4641794721319090,4641810465552112,4645238665720809,4645621983164383,4646458337623997,4647556566493222,4651299169613798,4663702386772812,4665559789434328,4669236331860436,4671025914890237,4671062938394354,4676205735481526,4681240613933899,4683821965014649,4689342516749982,4690119446223188,4694128337468791,4696124657031960,4696193088102148,4696638008353613,4698423835133356,4699216184918082,4704195358103927,4708213524509388,4721521787903217,4723274946162868,4729163773640834,4730341942998122,4750145321126258,4751422453008817,4755912323473330,4756833761182793,4758803188341003,4762437486337017,4762834014218571,4778822146835476,4784620939372924,4784854530655115,4785601845773156,4786683724318639,4788364915970531,4791650559342688,4792800308786051,4793495469956659,4794337453617434,4797047238512497,4800291638880957,4804645672015140,4806827593856676,4811729290308862,4813583810073804,4817878202402319,4819210711953623,4819297775674748,4819311564829320,4822240770685261,4823251614359045,4825955485244615,4828732317464211,4833444690765931,4836894122787451,4838221388703602,4848960069162027,4856643780511233,4859353321888294,4859467776000605,4861078197128753,4868100857196342,4869271395674487,4875972042816124,4879638248944748,4881531428270387,4894597037736842,4900275904853327,4902345078498684,4907673099841830,4912787048821119,4914549573455980,4930034114903088,4938885956719683,4938984906671371,4952413633841153,4957117546097581,4962024566226233,4964966617138828,4966376651170584,4973778510774167,4975540053830624,4978106676024424,4986110732910751,4995563329299788,4996212995257738,4999915977157470,5011891458604349,5012633125949878,5017724733800167,5018157783395788,5018781410893851,5019357482030347,5021445876086138,5030617336717801,5033368364296409,5038003571725954,5045163363224076,5047944681561823,5054098670441464,5057486321357458,5057790328506277,5059239413878415,5059653728314562,5075357793289723,5078143579563766,5082075970958360,5086177235816634,5090517135844571,5095238786157913,5095997925642684,5096759450835327,5097502836207144,5101214857653244,5102026127818781,5102867437873560,5108455108876502,5109339895416818,5112547786374962,5113517669186741,5113987619419017,5125513112408495,5127124574162351,5127331696245969,5128951178677788,5132612621833970,5143420500944709,5145276514713692,5146830226631178,5148881101936222,5149206177910233,5149437337079666,5149863467137139,5150315306295015,5150426384165948,5152031727525643,5154406455748760,5156461422732999,5157379504662047,5161207050871469,5161248283909416,5164496015188591,5169183503623442,5170033828188437,5172451986737288,5173899903805393,5174341244024506,5174660761082943,5178681082547978,5185496711050665,5199397406461572,5199519828192191,5216652140931560,5222047064350262,5222282340592980,5225960701910860,5230003976759540,5231985318055496,5236341345649495,5238635836185856,5239269458643567,5242274139089145,5247586236105385,5251993594243967,5256157883002967,5260023793294245,5262424254200249,5262515077905251,5265232429826960,5273272937856228,5277050637122870,5284015351506042,5291703312055669,5298034705538719,5302053963299700,5304711911200062,5313230992694743,5314446308863251,5316323217920338,5318153535798629,5319128074583642,5326372158895078,5327554389775897,5328891577748554,5333028765846132,5333952601578012,5334935043856488,5335020091722251,5340420023836909,5345704507186657,5349788779068053,5351104001242138,5358493250319346,5361787265106398,5364846600059577,5367133911213099,5367718245082904,5370226527358712,5372175647130534,5375274967884629,5379968233240165,5380736206240325,5381546956953785,5383368125410553,5385746119086850,5386810090653851,5388894770243354,5390748522455977,5394080989067132,5396868798166091,5397913443318485,5398598214938688,5400291293650351,5403110884625308,5406614253824595,5408167860716448,5408684428107176,5410632742040879,5412454472585029,5415609710834006,5419089032931253,5425776155703968,5426343929555151,5430164168254949,5433304691996431,5435448562589939,5440154526785082,5442550985028476,5443519792343783,5444690183081885,5450650932615338,5455725933689601,5458279931832152,5465378018390073,5465468293521107,5470523383371739,5470969344336539,5475821882378207,5477177690474931,5482250465546522,5484161477936466,5489284312856062,5489369658364069,5490569127868792,5491063328575234,5493319803023083,5493870604473986,5494363692102912,5495499148937672,5503103710464614,5503513389639664,5507615794296947,5508503680735494,5517653225284855,5521521718617004,5525271231853414,5525784499514800,5536775761553566,5543069752424250,5544364757593219,5545802799820933,5551558523406896,5554380825386759,5557919753549640,5559553479542169,5560825696398560,5570645491117548,5571611222021582,5575596617449768,5579977210132164,5592415623299140,5593426453232836,5595805533420086,5602700450924399,5603538322319793,5611751716804501,5613862108602356,5614909593083489,5620948070768841,5621064494334548,5624980176214575,5626090376879123,5630820724993122,5631339882742194,5637312677638017,5642756368786070,5646619733332345,5653701086020480,5656741558549497,5658909534123937,5663092617828839,5663224746690969,5663385121189069,5667246937392321,5680063203437155,5685769039380624,5697986357589265,5699319238678867,5710195316669727,5710364792755140,5712513634730123,5714049549411876,5717070125530008,5717707514404603,5718933939076632,5719318350374149,5722219703153811,5737673156168293,5740477961003848,5743439881123553,5746087812806105,5757334188672719,5759061891073762,5759605430401092,5760917932621246,5762846753375421,5768847482599071,5775786807466214,5778465954807704,5779727192598359,5782454578043069,5784207630429531,5787478509200132,5795310516601245,5799653338299710,5812168700056249,5813405909746709,5814659226632216,5815769481430106,5816254259189301,5817139504392687,5818962864882161,5820855454510934,5828565941868585,5832544070159164,5838532420889856,5842761102579491,5846508833190611,5848249740129972,5854937355756548,5856022939536906,5858993021903933,5870645665739004,5876393418760803,5876736261749158,5877001606837482,5879985416123329,5885444131631387,5891024028896298,5894685367317522,5898490390630341,5899953571983439,5903023130181011,5903484378223350,5903666264650934,5903694388453077,5905923307105350,5908696090042283,5913584686996270,5915859656997442,5915879684950182,5916931928833529,5919430186483428,5924117520799744,5924879546681533,5932662790863585,5935312555770245,5935661430493388,5937817448042430,5947101870703205,5950972051903959,5951041949791953,5951726048170658,5972218700939363,5973250287141740,5973677832178283,5977031849078959,5990178361033769,5990570628261118,6001206595929325,6002276243709531,6006751473613523,6006897308518556,6011214467146689,6016555602340361,6023708485790729,6027671084996298,6028501758004444,6029374529344504,6030811056861793,6032097611334280,6035955206980948,6036393255326821,6043316342842623,6045838883225433,6052608603431831,6053804686685524,6053958863313339,6055268730727559,6056807484550007,6058584581621317,6059626128614092,6070104051324693,6083108464119662,6085590822787717,6090956327042002,6094860440146813,6095038202403332,6096229123772452,6097073203191647,6103891026698555,6104688884599994,6107758160260988,6122246130680759,6124004526649147,6125907283269891,6128284886205816,6132924004245246,6137419131615699,6137499190958511,6139205116246240,6142105213155394,6142688368084569,6142913125046520,6144042650109511,6144095475272536,6148853279972165,6157729055897116,6158322788243107,6160835787414693,6164759784469239,6167018513092476,6167607991565253,6171707826744820,6174144459043731,6177490881631784,6177692002705307,6180599532722384,6180617553423208,6181251707047392,6182491554240294,6190932940940803,6192358086666249,6194877677308546,6198857031919734,6200081358996517,6202082397019416,6205024632916134,6207302580763824,6211423761058583,6215284878845774,6215440151333948,6216154454451807,6218233647565127,6218512470330013,6220604711061324,6227071256308643,6233498546819624,6236798857143890,6243407719027967,6244874563473516,6245142031174040,6246838555534920,6249074081912395,6249758679502616,6250557402527662,6251868356478317,6256321719332215,6261686455216337,6264775741996340,6270623030455246,6274969421000503,6280006843403268,6281149158892909,6296961749764111,6300447716613718,6304297838290460,6306042178881559,6307115079760207,6312245698368186,6322233760419356,6323173079753451,6324812402453371,6335023550154225,6353191312166053,6354692547077099,6357396963255601,6358389478584958,6359135369542064,6368768790664617,6371090203570186,6372329995436972,6373927709843793,6374693910197545,6379348598089064,6381541745622015,6385800661078082,6388937996347478,6390863793314128,6393248753656632,6407210526643342,6408730838629478,6417566655445436,6423382525818443,6425609470770570,6436083078959781,6436814879261326,6442916426075876,6445879733828998,6454320146259766,6454656458940756,6458237633544796,6459246249643680,6462957099049843,6466065562806689,6471689417982028,6482532068799143,6489722108699832,6490299073029283,6494216993344366,6499734594021057,6503186772282900,6504859169508928,6507830449842499,6510975929277435,6512429809706119,6516226128433302,6523391982496272,6534435207304569,6546595430555691,6546752598032205,6558599467268843,6559912537272461,6565025138801153,6568196183285198,6571097539012216,6578237361036784,6578503095620162,6585680325396501,6587654701564589,6595708386553174,6608001737567202,6616149821789620,6618251453481948,6618435915029628,6618676721046335,6621057137514784,6622760768363465,6623916460399609,6627687708278604,6627783782930560,6630093742634040,6633679848040650,6641201043777485,6643521507427133,6645310761621776,6648932781635955,6662203303202718,6668468822319197,6674364905825227,6678173806211118,6678617623247346,6682614209582006,6684865188797781,6688488067507556,6692574202860978,6699342662745446,6699601051457480,6699624106317412,6704989683409671,6712177752369021,6720191931854899,6721985110519865,6728831533279314,6729387689420763,6740158910360226,6740782983077394,6741215022206694,6742696713154438,6745753170262651,6747208404406959,6747763101536606,6749753901939374,6752595765235073,6754613815657166,6755291783706242,6759785636657525,6760588036724092,6771281046319400,6778747192742624,6785714707329795,6788002061659117,6788203480253865,6793843162054529,6794299705676629,6801783177540135,6803312883106461,6805314972544938,6818759912474922,6820988811787355,6823062398472026,6826362904176932,6827299283549200,6828195113056916,6828673134192974,6830256824394694,6832757900847449,6839800914177389,6844288668932623,6844336660738179,6845320561422543,6867967645790541,6871074917650344,6871762569059759,6876239502460282,6878773843563416,6885815333874594,6888484602467342,6898143968669196,6900004638975216,6905914351227176,6910834673790415,6911812457892129,6914055104334094,6919747802868531,6923486337221090,6925667026147338,6933453950416148,6938107292471175,6938223955509899,6941024220883212,6942033581849643,6943458456583346,6945905024717815,6958904824641253,6965104736708795,6966317419189633,6967131174911621,6967523404032078,6968626913917078,6969723087551023,6969879544962764,6973244897917836,6976680325243925,6979025352783650,6979370520679168,6980494356161697,6984882368689931,6991429378659097,6993040182965693,6995106444038232,7013532370262403,7016766698233684,7021458253363648,7024829388289761,7037699445900528,7038117440889701,7047751781972594,7068328396622979,7078519684380004,7079136072514786,7079683617185872,7080593167563472,7081754833089042,7090263369022454,7090827224293435,7098844983687608,7103287349340715,7106775558992670,7107670335907433,7112480093601650,7115518871031565,7117602179959460,7117702213846209,7121896123546063,7132927597129919,7134741662059672,7138333125007121,7140267571474431,7142051916052761,7151557370156782,7151630939274842,7152477932457527,7159009930103737,7163383528300707,7163778957994361,7164516813412020,7165245447295611,7179002153816766,7179903040144596,7182233643190269,7182765199375506,7184665213216156,7186047534415508,7191096990307504,7192853768347715,7193706102549630,7194390208322086,7203674420028153,7205356147061987,7205440304046385,7212278254469221,7213120783173544,7222827358840359,7224545604825492,7225295067327053,7228437063271890,7229216536358274,7229486634256081,7232125512735235,7235403866067319,7239097855225524,7242588962722849,7244564678730935,7244768482327881,7245566857344055,7246915842148466,7248035515958267,7248740619006619,7261781244132035,7262289934411894,7266821489213820,7276326602211605,7277092712154010,7277675196574359,7279500143289769,7285678364340995,7286552397490444,7289711990915466,7291166276647549,7291878183300562,7293014405240845,7293120299138230,7296021783720685,7302877510804560,7305136552146877,7308326077425136,7309526818453637,7312457538294268,7316330078533123,7326575126577204,7328477651553143,7334460645181314,7338010137201066,7344453486207090,7348071649723458,7351715416841733,7354010904401429,7358759012302734,7360258178944323,7363914809349690,7368773239687673,7368990891984284,7369067702745463,7370610419112862,7372069375963896,7372949260070506,7378192097899948,7384234111099756,7386045769570324,7390506088312346,7390710096067356,7398773968230003,7399651877743543,7407156702916619,7407505476310295,7412075044877324,7413103855739244,7416878995770443,7417889495517890,7417992892826458,7418488700207131,7425782918083640,7426542400535424,7427800180129173,7428977237077501,7430512069482015,7435984239692232,7436873913790602,7437474890639138,7443107233720726,7444471858317499,7445843328639391,7447921740402688,7457626872755122,7461417070760230,7462663569101078,7463631301411392,7464791273301861,7465698531247575,7465989092511044,7468955978108988,7470414970130956,7470691742535496,7472016146671178,7475500532623013,7476427212005998,7476626636382557,7490128336689868,7496125413626459,7496331600674721,7496586171914845,7497334675461455,7498030447626019,7498854721985035,7499741802632764,7501028258951772,7507971309059323,7513140878034397,7520995594935128,7522857939238615,7523568386509127,7524079580225271,7531837667095752,7535413758699104,7541987648743096,7547834066144263,7549509942679840,7558348891266390,7562422547591358,7567376160342247,7570266295972391,7570701292061586,7574807836993530,7575973317107614,7577088488345910,7577833848718658,7578638579275105,7580092247778121,7583079410262922,7583396165831142,7583764379769101,7590281297167973,7594145655959215,7596032551054600,7597146399455249,7598973979549112,7607301189042080,7609709905585175,7610703887034738,7612217782008852,7612590563938043,7614314828758987,7626611098574312,7632486697445093,7635361684630569,7635781595842092,7637503540227229,7652643882172024,7654546417001702,7658477424630193,7658594690166121,7658756342775449,7659221163244052,7662988662313759,7670573068797315,7670620641239355,7672952384552853,7688048940346303,7689154854909634,7692157387739297,7692684970839720,7692715035163949,7692872154514462,7699490741028377,7700977357734436,7702985130021974,7704411344416172,7708512355032444,7710242527125752,7713386097094125,7716490464557575,7719367164738556,7721077927518745,7721609917312402,7724275439905728,7724711616367069,7727393378005013,7727688737658572,7728376014820122,7729310710317073,7734200858227759,7737983217239889,7738701740734266,7739863878221100,7748491852382750,7761354279867729,7764178055458929,7766279931264675,7771590301023685,7774873548808587,7775101063842022,7779960077213875,7786006720140585,7786623092023665,7788620682842041,7789614831618262,7790698030398764,7794053661517357,7797582785345941,7799750768370576,7800931705061909,7806696954752845,7807627836703959,7809275090934984,7810562577473506,7810998696754018,7812875292301653,7813351823453750,7815570269509758,7818433714447086,7818985241100641,7820811550237674,7826881780965015,7841588711354365,7852366441407035,7855477820333867,7858337898821330,7860940256961100,7862242994456228,7868991532284921,7875449163603356,7877526742599223,7884811158802680,7885098411306234,7885848475582109,7888074400901569,7888437547783278,7888500835141269,7899000176906699,7900170751256211,7900621082983922,7905781741383824,7915002755515393,7916680314419175,7929090546906102,7930649670973791,7931707661135896,7932664861586881,7936886244974173,7944913299381989,7945929831888496,7947631560228941,7950990573236144,7954932398971022,7955707767338343,7957474102755475,7959145831679961,7961836057698682,7965048882911994,7965254194340254,7969662040440070,7980381451760704,7980587376998310,7980735420462798,7985722197062386,7987424295175338,7995443309794354,7997135431578468,8006897017337402,8006976419849250,8011722895901690,8020262137556052,8021771731265660,8022639767891247,8027888930993228,8028030091531706,8031622641498868,8032268170160689,8036544756403250,8042606792528101,8059389868173903,8059450627794768,8065220473196449,8065582408992410,8066856250710691,8071865664728388,8077418220136860,8082463447862295,8082820682426435,8089588938475723,8095523873680122,8097390569526238,8101747032613392,8105559582340911,8106928489710751,8111834837966998,8112104031271206,8114237531654532,8127670090034835,8129591252255075,8130207971385588,8136369282029634,8142129094140152,8142634926688230,8146423080936368,8147298843986804,8152440707511793,8154879940866529,8161574196354384,8170876711488279,8173481093695052,8174511269651377,8178398099602601,8180578302229082,8181586463868245,8192542128082353,8194052295501546,8194969512324171,8201561228078010,8205287009351885,8213078648151372,8214059341854817,8216440810041214,8217581100021235,8234783421929082,8243069565420263,8246362229814725,8246571670186657,8246716772071880,8249298655984594,8250387434460343,8251262500438279,8253292038013840,8255193598573136,8256150707384279,8262954804685181,8264880757955296,8269769863632137,8271331264772547,8279296087284122,8279996819873290,8282488428172923,8284234491989688,8289349890472704,8289498162240419,8302834672979549,8303400292309646,8309633984147819,8318696076968435,8322561813637125,8324890577048813,8325237800434916,8325244982386600,8325938551251775,8326972902154084,8332190045511090,8333273920187213,8333295411468735,8336642144860625,8336794237476945,8340676844945415,8342359556228636,8343138042400877,8343422431629489,8348222101988490,8348990845348274,8349203373840720,8349929839053237,8353187069342082,8355184882068744,8355216613777188,8357480319128064,8358858858741708,8359032487697274,8359937481115835,8365483157606045,8369163766277845,8369986014094300,8370512453717737,8372233568621885,8372801112194483,8378780774433401,8384716609191558,8390262752827048,8391964943990114,8391968117889949,8392149294258272,8397902100674760,8401772412872195,8408375889672914,8411174366535457,8415585315618209,8416272151484023,8422456048756653,8425746898298470,8430090272302554,8437064789335853,8438071844029923,8440380356686069,8442951603563362,8444597753784738,8451164588916518,8452143133964194,8452834615819362,8457432590283505,8457870432601145,8460876401008247,8462195561662212,8462356309409364,8462779787380437,8463894151036866,8468076223787436,8469301843796323,8471378426717330,8471415745047933,8471563440277386,8472404382970836,8478129640091772,8478656700478130,8492172722223897,8498566054392581,8499229793648454,8506577707409417,8509132075144310,8512769287132428,8513163154527267,8513504634548185,8515344063519634,8520471745574237,8522204087666054,8524148859687867,8526397555774454,8529230741888138,8532559561743484,8532654039076026,8533878116076637,8533995416155386,8534972628296620,8535864863010513,8535971593214147,8536095899242944,8541155088407770,8542730575053594,8543114315775141,8544314610710198,8550168506511268,8552407797606671,8554633315621859,8555174066389539,8557956937889635,8558151645997922,8564419827076679,8573037870741832,8576579691621984,8581367096989732,8582261197731227,8584103263448365,8586129879796676,8588390118640027,8598659143101208,8599129846364328,8601522207106423,8602281558256184,8606118628054373,8609962853072397,8614121798757181,8617181526340432,8624540329478798,8626391389132139,8637210920266035,8642764308475012,8643781560678532,8649648448925106,8654788098657391,8656720607859172,8663480084138334,8668647436071212,8676906171988104,8681093783245916,8681657579012301,8681913385441783,8684040140251633,8684751768894594,8687233995374161,8689451052146568,8694259890202010,8707543506731647,8712178326749055,8713167337349649,8720400968274939,8721624366660549,8723528441052177,8728082879135077,8729412146383037,8730722008863330,8734134339123262,8744943197261790,8752084776243496,8752229489182462,8756024741015291,8764086372954576,8765499574275666,8771763736993583,8774891003290900,8776098695299089,8781771689282057,8786064399236492,8788645315265601,8789521527155337,8800564096781294,8802869383602903,8804388443116965,8822012246032459,8824420613904824,8833617601200992,8839028706384124,8845997843748149,8848441586602708,8851257701148635,8853468904397509,8861373802714789,8863519477739503,8864760219124956,8867353885504610,8868704959256862,8869726068715730,8873178736038157,8877414428990774,8877639105413482,8888927845730161,8893272613048649,8895434307439131,8895481679956001,8896675958607703,8906632086442921,8909764344727962,8910696373745749,8914511413857104,8917771334029599,8922546229199542,8935766685372472,8941687890115326,8946727890515135,8953024300985451,8953097697609406,8973117667284369,8976689351398218,8979052050754475,8979138566806786,8981576765955511,8984458105294828,8989003157868761,8998231894652099,9003633319081270,9007488636990326,9014303703878842,9017342455231486,9017775639960192,9017891402481729,9021516015590569,9021894799811309,9026119584500749,9032187371857715,9032210357540524,9037381687162545,9038550674961458,9043118071475817,9045075196494312,9048587505711843,9051912387029910,9056929643995395,9060898928960346,9062131144374189,9064531090448266,9067993556866039,9069591997036415,9073037543243418,9073542014929237,9074741782974562,9074896498908993,9074954160428210,9079318768294079,9079888527353695,9086751601932227,9090741520266450,9091979750057327,9094268438464734,9095103196939357,9107447274695798,9108055709626815,9118451715638510,9125362263593744,9126355422536516,9129589011938145,9141233415372106,9145865355932391,9146655579064944,9150924106375181,9154695178495413,9154906598387783,9156606782616571,9157375841856013,9158676476944194,9162668962659557,9164560080137859,9164808695055580,9165398869818013,9167897025495661,9169656716086699,9170930061820260,9173729001963953,9180092141515547,9180835981759675,9181392291811972,9182988335016139,9191103445945706,9191895644403376,9192998929861767,9211473363037424,9213354462894451,9216455003394783,9218987363983060,9220215058481045,9224473925896720,9230580606053228,9230781676144500,9233097697027556,9240969247386165,9241853489961590,9244558133126668,9247060266595808,9249469378126723,9262046728311117,9263681010079025,9271020411926998,9281037194537819,9285141224306324,9286405859692622,9287536154919442,9292441630319582,9305483767431545,9307808415233512,9310504704434237,9312863640688829,9321928567260274,9335430691435020,9349870750958121,9351366438163042,9352141537262568,9355129827150685,9355940792017453,9356671043680573,9358554525655764,9365839127076720,9369093634111987,9375641216209833,9380627712808545,9391486822118482,9394976106188444,9397328261168293,9403765537033865,9404565557844940,9408921495622508,9418107590281736,9419050885946372,9422324913293330,9423486081818391,9424880153466631,9431002060282822,9431492779408494,9435750849137196,9435999454995538,9436467803114520,9438440967152456,9449047384009255,9449340008831532,9449825490510575,9450124549058915,9450266223912705,9451923415826938,9453754181386129,9455474915077424,9461773699588349,9474415802799799,9475829378737099,9475995871994700,9481386469401705,9484371784536628,9484984344438553,9488440660460028,9494545505111169,9499991472364593,9500280517945270,9506279993556395,9512690141526979,9517376601859417,9535833980442651,9538837225746487,9543648898717657,9547557360432895,9566613425099599,9570293654144253,9571330378157840,9573924489710217,9575165150956720,9575447421008044,9575885834850839,9580743881484339,9583968797309156,9584561427927733,9592438802141140,9595887445842995,9602902468421116,9606089004061910,9612716915528679,9615883008659902,9618201274077735,9624534057788017,9624758856235873,9627779818273370,9630291603264391,9635050901818979,9638553113056923,9640256664119083,9641362208707358,9645470246174746,9654460153869349,9655820024254396,9657079142409314,9659932051785010,9662025225596157,9667241625101916,9667276386502943,9670575633086786,9672577363633156,9677328620414229,9678605646922309,9680945785313483,9681067738687093,9681267064784409,9684623279089548,9692893983377486,9703534916274044,9704279295656371,9710300281092436,9713887926847430,9722172985687510,9725792147964753,9730102559873491,9731503574801034,9732040155273334,9734113097684625,9745464335501377,9759300027118446,9765786817226776,9774084603677009,9775954892341561,9781535106471005,9782799358625264,9787218473221709,9788457577027091,9790881050593378,9791716416162907,9793594547272040,9797573432707706,9799889278686304,9800030295823990,9800643300496312,9801531586161304,9806868721015896,9810257422545913,9810585298035378,9818968963003545,9827735758375406,9830504103227308,9839088699938530,9840344155855293,9842146939578796,9847076554159479,9849134001046702,9852441706161363,9855206514878035,9858589834827721,9861618221073016,9868175191633702,9870764755744667,9884359216488201,9885568741169094,9888383442614567,9889475178157085,9897712283656982,9905482398851907,9905509887050233,9906275973443018,9914537510086498,9916047888280925,9925919096295581,9927737657495008,9929573922212347,9929764483111561,9936648632787612,9936930189944836,9942546535848415,9943414292728350,9943562503792795,9943864011523125,9944926015830475,9949019217971450,9954313852436822,9956729180121161,9957485611486827,9959245907790498,9959649353025290,9963611887803061,9965461136179694,9971538903925806,9972659184707003,9977300414299533,9978079501560780,9984981814399679,9987074046553413,9988183784680681,9989307766906661,9992489863586669,9994394125866496,9999648366321789,10000031604968879,10002844747369936,10004925495910091,10008184940193864,10008461597296659,10010370056943249,10012185312140615,10013671898596744,10015492885655362,10023685800384052,10024272312740748,10024518507785908,10032518948661767,10035990167276835,10037675774604732,10038212235930891,10039680011817758,10040986745269183,10043143509796825,10059920635183403,10060828780412828,10063209359600748,10063600315370974,10065868826751899,10069449569085214,10079621948289706,10079780537607131,10083178186730993,10083835921342635,10087308692864238,10097389948237264,10099505447983235,10114762120457964,10122602940701424,10127093259012746,10137337268168058,10147088443674066,10154032807160608,10154307535853042,10155542068394802,10160572363193486,10168672086600643,10171123087736692,10175620188550308,10176491974309391,10180241838937158,10185718177412770,10194722954029343,10196461247193047,10198150880968885,10199671620517651,10199695570402626,10213139479075307,10213463533232206,10216483252367678,10217695934166737,10224107621925080,10224341861439251,10225134693123796,10228698111458386,10232055143615071,10240074124610978,10243488805517527,10246709309375903,10250309444905037,10255002992865388,10263727029912731,10264525977886481,10266632870176478,10266820110531598,10268623122698602,10270163349933746,10271469102327032,10272466256560721,10275743124733814,10276224552844118,10279570843072650,10280390537061924,10281974844357620,10296558578829119,10297624439979852,10300473582041388,10303678734793518,10306671241348195,10308330126619188,10309932015401873,10313330243851850,10322521592618940,10324994061564475,10327639436128444,10327836487278240,10328062775184436,10333141638123601,10334247661646447,10336137590649056,10338920736989409,10338954267321910,10343961504376443,10344652626799903,10345084432193915,10349133099508569,10353863721850444,10359698556732429,10362411781950181,10363323817948865,10363856866900979,10364382029087166,10372218357405989,10378769233943499,10383366177443313,10389930038194229,10391227460972667,10392794184418963,10398456973956573,10400718449268838,10401822723036473,10402496249924470,10405138061495149,10414467140029313,10416168489938655,10418627906817111,10419717389280422,10420088526584073,10427907045176189,10429449856647102,10429527060520295,10435742830395507,10437768643316041,10438557857523653,10438647586807590,10440006848965726,10440487445875223,10440675814381911,10440838933113584,10442812714871140,10444872472907940,10444918775580154,10446053077534203,10447329823526441,10447915458986158,10449478084214979,10452337317481683,10455776599349428,10466519528405133,10467269725152768,10469454767792558,10470445288089220,10475593353115645,10479056751944619,10481951551463793,10482250308230673,10483827694271696,10488747371854077,10500385026427457,10500488552967312,10501260356111404,10512322942799654,10523297170953308,10524197477213077,10526510768148749,10536793015345856,10543343012752730,10545121874221728,10548308070444431,10548369311321575,10552704438057457,10563044468102777,10564172672934646,10569936885011978,10570501131760442,10572007743658085,10573848693130061,10576973213646289,10578269597246868,10583112117834044,10584639348411680,10594696458041961,10595951863277111,10608578244765171,10614584966626439,10620592192554759,10620798042888920,10620903315872377,10621082248937070,10628643234912171,10628921785417252,10629062237330307,10633505584587868,10635387052790493,10636120887409717,10646006703435010,10647477117223054,10650028403614625,10651121348848499,10651345084987832,10654625803270603,10658173942822862,10662504984834470,10666212884489150,10674748481093869,10682941113641784,10687323304253849,10688559968805974,10691866174146243,10694995464922729,10695927691488062,10696210215422820,10699195330093884,10706304398299584,10706909043141392,10711714669403874,10718007421668166,10729128988836847,10731904283058389,10739494197398891,10739801112151611,10754362437493971,10756057668895469,10760825923948907,10766441205077631,10772046172473065,10774595446367799,10784226394558794,10788763405851951,10789995657189753,10791861050542069,10802291531494117,10808364573742279,10818898059592134,10824560278958441,10827594971106799,10829139547484424,10839803231136688,10840559858292177,10842762916923867,10845347885148728,10846960440347385,10847253104607377,10848123266416937,10852276831524456,10854428106437565,10861241406654155,10862767928581833,10885189706501902,10885610118151807,10887994298217623,10889705602128169,10889922295232389,10891169429104356,10894930198422418,10898755272190895,10899505158719250,10908847810397854,10913442039163653,10914509218958331,10917306751752594,10921844952698928,10923207650640427,10926914968716545,10928023009400926,10929092524133184,10932202328627689,10936661551264740,10948651072438721,10951948612195804,10957392874098981,10962850107056207,10965247315180552,10967958009290469,10972909371352109,10973378500551496,10974341394948959,10975117317621830,10984973432275198,10987375778918246,10989202769093484,10990409005986868,10991558669034873,10992487444525627,10993385229040135,10995462961190157,10997692366332970,10998901653014457,11001665093098228,11008808435120331,11014427382218185,11019805073365185,11022642525947194,11023763375301913,11024320739845292,11024882796236184,11026937591363346,11027888013301225,11029825398794377,11038025471456068,11042536781194006,11054833551017746,11056592382175061,11060604597813501,11062402375665545,11065372708374227,11067445870108559,11069851171800453,11078922436292795,11080633936367688,11084732592274012,11086682643753664,11090552698103596,11090658585225857,11093188409391605,11095383304270179,11097675961341227,11099369753166044,11099551546210604,11106506387649741,11107790185802013,11123159825527527,11127177251318180,11129601052279134,11129859357638708,11135583018648230,11140988948695005,11145709417931599,11150199869893669,11151387183049239,11151502397313142,11154110766156613,11155371761778170,11157295858466191,11158629464450723,11159414148224323,11164777782395177,11169430954279703,11175202072568888,11175852637265303,11182614329697619,11184082345585609,11189705813167959,11200002087496543,11206266985990044,11212811284757912,11221071832453748,11221944462355990,11226519618035312,11228541607320093,11242771850103375,11246366304663746,11247749010895948,11248516680815322,11253108954825869,11254820718151050,11259273102107656,11261264491503353,11265599260848364,11266044641422248,11273050277652437,11280555107389759,11282775695475018,11287600375177114,11287893566206082,11289098740261755,11294033441021320,11294773174907559,11296245208210814,11296470298526751,11305713023756983,11314863622778589,11315258511556799,11318554733251064,11318702654300182,11319884533641300,11327586528497295,11327962158491035,11328275638112363,11330273278512837,11332552015667182,11332697629166595,11339380783297842,11340864658831256,11341499658993269,11345417090314925,11345675657880008,11345676553858105,11347392098829511,11350409656754127,11351342558936179,11356129989349558,11357460555425130,11358086643159577,11364648346737734,11364824056279735,11375384258325108,11383750361347134,11385168267308423,11389085993650118,11390806798887033,11397120253984795,11401700519195899,11407990305495344,11409131295056209,11409539605925674,11412775300232418,11415931058237717,11417029279499404,11418837401792416,11419050457279467,11425460679750797,11425662918976551,11425674126078597,11426226096713572,11428009680185526,11428645865887160,11438749233694215,11438964462131562,11439459859045383,11441352104880750,11449215149184077,11451628260579985,11452907797520228,11454513203644820,11456881578527630,11457346667741182,11459692638425608,11461197052190285,11462322406268747,11463847343590781,11467023328231824,11475621626604237,11478518643790981,11479308399516360,11480479501736039,11481618683495992,11482045627548411,11484846169745779,11495135828909763,11495940916242717,11496597527405109,11501735487559917,11504102260549809,11519358680969077,11520326123589592,11524872224376445,11528838411980010,11529868184218561,11530775986224529,11532404505521434,11540894335577353,11542654016596156,11544013273650036,11549882026188551,11555345985992577,11557042562885581,11560241639867312,11564144512113061,11568498682605843,11568678400586201,11571509198989020,11578506049932280,11579799804334255,11580137502987103,11581727638901896,11582679596601249,11583117656203215,11583507699894777,11585101265495681,11591047076552913,11597838643220140,11599758357243289,11602974466808884,11606641527777687,11611075089640896,11611748354430947,11612441214023420,11614076397936832,11617014574952198,11619486235348603,11620240919983985,11630644392646970,11635654976144153,11647860207129349,11653989895623444,11654096880041734,11657305088193972,11658683525550614,11664834793123518,11668903396015121,11672731802674267,11675090294053441,11675693720283985,11677165015385087,11678549904666632,11682179864909511,11684720239124533,11686779979541908,11687293075071490,11688194680736119,11688237614883154,11692359880827672,11693102982874445,11698756388262452,11706304165327067,11708423865084678,11713690607980814,11715198530711638,11715865254416537,11719221642455460,11720785116417048,11720873374212079,11724825796758899,11730215861616220,11730467845423531,11731164734577661,11732952721643258,11734286258484747,11738267971902102,11741745751488248,11741795538258227,11742706162990999,11752793473682764,11753431031818860,11760357200965436,11763018637241008,11764356908364626,11764874035248246,11765704631176088,11770706878359270,11776764748856507,11776942132964186,11777247958775743,11779851101536854,11780837550785665,11784146238668811,11785019194973166,11785179977172715,11786257969749074,11788283962253789,11790027713938832,11794754868481503,11801008269183730,11805477304749933,11805878919584950,11806503727533719,11807259505447243,11807669581624359,11808975572599296,11811220919611763,11811523671191877,11812158072536409,11823032149508223,11832905268834899,11837866263209531,11840295016246440,11840740382069149,11851393982879753,11852586198115184,11853010088136902,11863824190392357,11865542872711201,11867361302652812,11867622238392053,11867650483762221,11868412894656304,11870483176868986,11898179901887368,11902610012478374,11905938593275593,11909747036535415,11913133672980273,11913579970744326,11915166616058954,11917437865006221,11921035710134156,11923308867485120,11926548936220126,11926805367659349,11927305572643559,11933874408252082,11937995919116415,11939403507435534,11941423373859996,11943720753021897,11944383058348300,11944816331130113,11947591791853532,11949017351281018,11950260926652836,11950767340516423,11952970464557834,11957777053959725,11958250094792465,11958326503192345,11958460182520420,11971876905384523,11975100060815344,11982172621189391,11983457002283111,12003675199241448,12005848174188578,12006522182584979,12008453650739367,12010943352799406,12015639579945326,12017040715051459,12017280196275791,12020813104002531,12021776657102177,12028404727423728,12031148585055714,12035410219539172,12038634615780325,12038984699485340,12044509607056046,12044968098240593,12048218812219413,12048806622801494,12058410162198406,12063913410743326,12064775457651800,12066617975736949,12068867551957576,12071821708133242,12078782315854672,12082781112646069,12085458622840725,12093768254366446,12100943786761249,12103342077182428,12107118790447412,12110485781570692,12116763440852785,12120377787831548,12125020327675301,12128081806219839,12130816680520314,12135336896340352,12142933439996132,12151932314546584,12155534792479002,12157910100394424,12160896977780321,12177544020169884,12179235809448305,12180040689184058,12180082671639635,12186122156905100,12191783234987303,12192518732233721,12194188433406465,12200556590308310,12206961056205598,12213487168281093,12216273376124566,12217037909052392,12226080030988795,12245236639338607,12247196809844638,12248146350462714,12262311222789758,12270437462627588,12270468054721839,12272177606302533,12272459725983251,12278383271194356,12278465206346582,12283300256073245,12288007619457690,12291976925956833,12293363361122131,12309693411990179,12312452990243375,12313776804764473,12315740660505432,12317737462653854,12318972117511665,12321231790044265,12331039831738345,12331300696945968,12332506101262263,12334476628613713,12334531906613812,12339633893332240,12340540257772691,12345475198663391,12347889475047477,12347909851317459,12360123775491680,12365750013844755,12370916479710467,12371561019916631,12371619531441513,12373603008943996,12376979801050250,12383499878769639,12385506900879288,12386587160860313,12388152182711242,12390527382979238,12399457458248220,12399946676042065,12400047520221171,12403402985696867,12414844391454468,12418785272430115,12419850567303984,12423854238100384,12427118539066615,12438014718231534,12442226783163244,12453429222313446,12454152316786214,12456127570195279,12456974565702299,12462037008975433,12466553139749819,12468960135907940,12474505023309244,12474737975133205,12475509972236592,12482618753698829,12484792688815182,12493121285865448,12493531949000940,12495098079276198,12496980807002515,12502404720988619,12505394079431787,12516465480475702,12520510172302125,12524882028709030,12534442305927340,12537207717978996,12537660041547670,12544032455101983,12547299118310242,12548400145943534,12549910268086697,12559911253268655,12561212829273378,12562621625488566,12567396522160703,12568269490941488,12574953991866005,12576978802157283,12579381981407506,12581297128593136,12582417504397196,12593140979984180,12595025894277088,12595568697579144,12598104253633453,12603845745843686,12606498330358134,12606901469342342,12612529707466049,12615183521565762,12626861709059593,12627315009999764,12632687195472462,12639293891627838,12639418814809649,12640167527828266,12640428361474198,12640818042674893,12640871580590329,12640969010574572,12643494240426710,12652136957477942,12652319841467466,12653336178984584,12669546391130781,12673433235167362,12675636205599614,12678598650222967,12686784777563923,12692097685841819,12694087866558340,12695944838331405,12696892670705054,12701707285742221,12702962657957826,12704004077140774,12708456140034254,12714535709904775,12715273171560431,12717959780809396,12726377376546299,12726436436675201,12727033270826653,12727451946669105,12728089463396868,12732375258004085,12733418262925777,12738924440303227,12743533106187231,12753329851400984,12755303856704861,12771267826669947,12771598737920523,12781473711050942,12783187525335089,12784227033504239,12784609326080007,12786352863464886,12801400431375545,12801458013719616,12805335924853732,12807665721211120,12809522469790774,12809632657832569,12811342685032672,12812182344672176,12818427882691906,12819996417836695,12823282271321919,12829072084255416,12830088505582363,12833367241791767,12834860563514747,12841765441041049,12846385534139025,12848207419401226,12855844760930756,12859899907121954,12865701070922173,12868140593650867,12868775358248804,12875124642594600,12876334808558683,12880367229968159,12881424980533677,12882980822794716,12885114452454742,12887418415101226,12897909619055754,12900385538356311,12906788489234465,12908599797018073,12916112665929539,12918769262556623,12926924362852450,12932353138648641,12933527428631618,12934033598192671,12934350686066194,12935326619280462,12940588789639096,12946489524267653,12947522776480207,12955628525207272,12962843219163460,12963051823146860,12965554585695816,12965962908737325,12969038927449240,12974390471676060,12979430248880345,12979490664124573,12981848516554289,12985447175738467,12985493722134421,12986155313057080,12988371356512075,12993874964553291,13006729868315951,13013340987113008,13035980728818146,13036179358537986,13036645177838759,13036712601500114,13039280823665945,13045554401715049,13045600513120125,13052620397276151,13054236155028234,13056240047123862,13072501768493061,13072617923580734,13078916872324125,13080772514935469,13084504306404264,13086192967607976,13098251820355349,13099158956896694,13104310576837531,13104339456644663,13105958370560896,13106875818164772,13111138396854003,13111577005851999,13112638257280506,13114710504214063,13121300725611891,13121909629776355,13122025064383559,13127052312166869,13131340694012302,13136566433737611,13154101095403837,13154198718450725,13154555158306370,13161267336530233,13164318985726929,13165906763126222,13167979789890447,13168110604938660,13169610591191174,13170738127259843,13174101013456127,13175983907317617,13181966685315968,13184920558769854,13185428376339990,13189380579282347,13192157399149307,13198474033954862,13199022280418750,13199996563430952,13200108351974202,13201807925580353,13204068426067383,13204462037481647,13208555149000976,13211066578415956,13211782471313872,13212061616770885,13212165395243922,13224389449907250,13233545463558187,13238503442128074,13241104178731895,13241168864292142,13241450032861794,13248655970715829,13254044124605936,13254407272590223,13257737390535834,13262295991057322,13265037473491891,13267936932487740,13268996306113479,13270587658638306,13275333794285672,13276579099396774,13277158900551235,13277894633414167,13278754087006669,13280579253065923,13298750975340353,13302754854003883,13314547425279234,13315440480385562,13334201253531043,13341251444506381,13344119991216847,13347417735653001,13350157202852563,13353560065022737,13357601360650186,13359331313196874,13371096247956838,13372563355342461,13381234858826658,13385015528950604,13386085962502095,13392333467703344,13392648285436965,13394339089063032,13394551832655184,13402693157045406,13411243783632080,13411581109556372,13412383321315676,13413384478433494,13414514599248518,13423145336408826,13423985507124709,13428472113899924,13441217894362634,13441764089885110,13442145389950277,13442492834263372,13446542370234933,13448612638765991,13452686859689485,13466915673127210,13472766006472691,13479031376600774,13485627420255013,13489343668600089,13494212405375710,13495705064166062,13498151694534672,13501680776202759,13502956667923325,13505729133720187,13508489645714817,13509178757772730,13511175763453019,13516547793277979,13517706451256090,13519875476747795,13521772043996142,13526044181112873,13532220158545162,13544313444890724,13545457425167339,13546292946364632,13552576650306647,13560284735647415,13568144225858544,13570582888476349,13572007215372631,13579292169199361,13581522598681686,13584670639804403,13588669826245287,13594750552645450,13604060080584622,13606027345535991,13617631557607803,13618539821196872,13621326447410048,13621397819727220,13630191002096322,13634793775983156,13639851440986869,13641546138989675,13641606518127186,13649712080213198,13650051593032688,13651091819607497,13654427643263871,13655297823287389,13656122678588204,13669670567123455,13679106791368898,13682988765515845,13683384323434122,13685822158536416,13689486854312196,13692915679570837,13693027758544966,13697542413338103,13698474516655449,13698940649521784,13699244375297918,13700132194249732,13700395889419752,13701739894308740,13702229819892376,13704275954624893,13704352361096222,13706325468204188,13706670591280801,13707739578231116,13711378433916464,13713126833679041,13714567785435526,13725507831112260,13732875764071009,13741640976152344,13751009984842370,13751355444615875,13752515134941761,13752528537770243,13754252433328047,13758183684987635,13761510571679661,13763495093063765,13763737248840023,13769064518094184,13769616521296557,13772233025470893,13779343273469876,13782374358651901,13784328184108875,13796714897651986,13797271737130311,13806888829912490,13810189191668783,13819216056468458,13830139162136642,13834956579150593,13836967928987258,13841506126069216,13844723334748155,13851419674057285,13851804964013356,13852009236228299,13855148698755421,13860370948717274,13860591920719937,13863190387760047,13866281554758808,13869499215642030,13872276184090322,13883742410074464,13885432559815038,13892745650858214,13893729230139344,13894176996423815,13899662982568281,13903640450103285,13915571727313717,13915751473207815,13924477533272165,13927268834765068,13927696780643085,13930075501042467,13931574119523082,13932773453065067,13944345475258344,13944744739592850,13946019636961185,13947359127055804,13948733294751772,13948849305450763,13950003362814608,13951639188915106,13952318012300341,13957499193946800,13957891625399498,13963970693218754,13965952880287033,13965992746518202,13968491336408470,13969065894497400,13982564979542470,13988336481457298,13988537842078882,13996482893584239,14008782205566643,14010800531316406,14012374992832944,14015507053297472,14020512831650303,14021767660954154,14022387662308266,14028827379125338,14033262874705267,14036809823113558,14039082912681460,14041235400538814,14050877619339410,14053671836796726,14059703515141024,14066003779541480,14068336590289394,14068514727840577,14070452446274375,14073196557139903,14077628043807881,14079413477735954,14082060161725832,14086400253454584,14089660453835508,14093701509746096,14093782787475587,14104609562657279,14105474512409513,14110176260429603,14110846566699118,14111754638843301,14113954118968165,14115700381311298,14116084203345559,14116757201838538,14126627189476844,14129104187791374,14134252052809090,14136333045989523,14141721173215486,14142180399257488,14144286736225168,14147641612048676,14155975414531428,14159319817299730,14166972826657757,14167756848708160,14168406404233927,14175692216668231,14177050809138713,14182196956346963,14182395945348617,14182522370557503,14183668773352145,14187963108760817,14188374719646123,14194895524216878,14199253731869253,14200690083582668,14206687970696716,14209853920811090,14210270522420558,14215487446995196,14221732530161551,14226640214180071,14228466984618789,14228747898918402,14232996386333408,14233668407191507,14240099294539079,14240931409922495,14241368478491777,14245988562300971,14249502033018713,14249687565938747,14250698500890187,14253371408889777,14257631509009617,14258964967231840,14262886155232322,14267007162424724,14270247164026926,14271254001084489,14274483320198377,14274835544974226,14282336913974875,14284383355599952,14286491830383354,14291469615410543,14292177888930623,14292472000295936,14295837452356585,14298173613618098,14301257753026067,14306645039357579,14310015895186965,14312644574968166,14314489483212131,14318196470777373,14324162382494344,14327796466197481,14328346609794800,14330165162804822,14331141876120768,14334872702153449,14335504292083940,14341688237268043,14355465370177113,14356635961829550,14358501668762854,14358712927579207,14371667519001998,14373358653447165,14382653933328395,14389328610898075,14396743849723041,14403258144398461,14412375459733818,14419304139943345,14419408012320654,14419921321665188,14420517295463922,14421526836577881,14422482515005653,14428672766558377,14428963789076396,14433829777128924,14434491585844546,14441663522114382,14447076836883551,14453826393896267,14458414754313519,14460517698166413,14462888447275103,14463327792076139,14466639418787131,14473045986857112,14473621415900765,14474428436715615,14499310299171024,14502523747605405,14503354640357478,14503374543528179,14503421058685682,14506805831023499,14532022272038796,14533166699323342,14533912823231087,14540918644501586,14545143474688100,14546173469934987,14549609943559194,14552911878947037,14559345024889908,14572798606348625,14577681860555800,14584989944454173,14588926759145188,14591042865966381,14604151534357376,14606919734830311,14607574529846282,14609506155169308,14611213899966353,14622201375555281,14630990679638089,14631058148494236,14642640937827937,14643031895991669,14651923318081732,14656739959494770,14657604036440419,14660498729179366,14669778228653280,14674786028678808,14679329027774886,14680624844186736,14686694622392691,14689336054562019,14690256368300896,14693014696087872,14696133465591728,14697396774309339,14699312209892116,14710079784099482,14714972951681498,14718737202293924,14723600578234977,14727357539437919,14745825891890849,14753483946970413,14755563140273075,14755965578235618,14763159151224600,14764626632313599,14767178168674763,14773705053327044,14774424971091441,14775120243637151,14776126161075338,14777180456645831,14782806494910422,14784179132609967,14789280323954198,14792259226342467,14796095030944430,14802524498404394,14805119697076881,14808357837092866,14812335544631245,14814818422854695,14814866777217318,14816837023868332,14818468476875375,14819019180401048,14819207660612212,14821775114466739,14822972850892928,14831940328373824,14832874204975299,14833648039540838,14838200055945191,14841572903295226,14844521188313039,14847956454072103,14848728342207420,14862317369956557,14875244788896295,14877010452958052,14878500342798560,14880698080993082,14883492855733875,14884715585223534,14885001023521200,14885277650679824,14885514754372569,14890608256545870,14891249907937290,14894841507741510,14896432925297083,14899776943325306,14912124553248599,14912527776416275,14914637529083995,14927475356531675,14935912020884502,14936652050795629,14939491797730666,14939629686310269,14940182819303564,14944948556154171,14945635561516349,14948084549356910,14948446551537791,14949218635729694,14950087955881572,14952347763435515,14959164006691718,14961785319983417,14969572759318726,14971749865458474,14972120173118520,14977645952278382,14981979822436646,14988165815323633,14988877042506897,14993137347899829,14993931438103578,14995330188668762,14995498956830547,14996000077091154,14999867324523228,15002117713677743,15005212809692919,15006561064342947,15014220467166698,15015143178733208,15015832834168182,15016747917580341,15017581694255009,15023369839042379,15025234321810668,15027872686689687,15027965624651774,15028626060466021,15029223449524697,15029421099140855,15030279194177507,15035193036474483,15036546311204155,15040592624783964,15045563863877798,15047851634879837,15049206174008406,15058256367816154,15061551128577232,15064441595557983,15066002393956736,15067340188537424,15068416282122043,15068737776024543,15075955407954330,15081726781697934,15083561199479055,15093027538264168,15094447720794002,15100586527842091,15100895887756881,15100923564154126,15113939304708511,15124747624370744,15125038963644248,15125825668976374,15129837287101752,15132962157165167,15142340753501357,15142771204200772,15143222831944000,15144509730846110,15144908199448686,15144983031311469,15146266898753650,15149385225563368,15151128984083655,15152827565413011,15155535079044825,15163502675118430,15163608007084548,15164714333985887,15166301729740851,15168611383865707,15169763864701641,15186748056330061,15188528526614937,15193432864725218,15194881156695303,15198814250439862,15201662707583210,15205461449086049,15212200426217138,15213176306218798,15216511057457596,15223439233922134,15223468402959842,15227318255741609,15231273354612283,15232762341410261,15235610343655303,15244577355919854,15245960594532078,15247433843914559,15249106376606939,15250808075404806,15250896481067742,15258347128439790,15259612081001691,15264731568598059,15269511085038233,15270167427090968,15275297064012400,15281435895520542,15285891219756998,15286093688825701,15289862125703132,15290148594297788,15293123874907838,15298758428575449,15302533346414874,15303593678697230,15304616612479818,15304832061714010,15304847733090522,15305964656672387,15320795701669972,15326286490335430,15329295210127478,15332234480025938,15334331246873691,15345070383303245,15353149026108168,15353232670298897,15355335603796124,15356453151175679,15373930811061691,15377210030466896,15378726728075414,15379533653675249,15379705621247200,15383101065751713,15389713624455313,15390882193918153,15397171895399395,15402636785814262,15407310719467201,15407689229789196,15408822912493961,15409962731524859,15413173941024438,15413515751683038,15416018244148923,15419969270171951,15423153342341016,15428180744186198,15430571383432346,15431617843729266,15433887937403854,15437233971050551,15450715294654043,15452893020203867,15453212547751568,15461080320924835,15462712554318219,15467422080353387,15469490756626048,15484285765017800,15484872489499858,15485469980475937,15485540626374727,15486304467213949,15489723346328738,15490588941327161,15493669981706537,15494125152072718,15500366938118891,15503520406029992,15505519386765147,15507002730562560,15507596083915355,15510353559011411,15514535598456693,15515999929535991,15522061334524465,15529140775061671,15530230344187268,15544311106200778,15544548900805204,15545602100255398,15550480026583451,15557041324109368,15559289591149294,15561564170785191,15561952874606530,15564933328360346,15569147522045783,15572863590946325,15576367136356714,15577133638656672,15578855835529187,15584544164633761,15585305674396933,15586021008360415,15586572984402845,15591835587885800,15595116715517183,15600022327961011,15605001570186520,15607870388183703,15607922224614041,15614292833802726,15618067512755640,15624018957338891,15627340304611938,15638216907788849,15638858384127971,15642870227425833,15646298279506026,15651638904781067,15652623828656547,15659367639512437,15663152139200473,15663518386052835,15668753861818902,15669831866387505,15678947651292970,15682293644901354,15682550662690976,15682651083715691,15684129061048451,15684714219805662,15687701212595998,15689677439468810,15695115813510797,15695286443123085,15700741802774887,15701006667851545,15702846415375903,15703609923212388,15703962508930357,15721982912314664,15743385922200831,15748859641437205,15754569858261273,15755103109741170,15755440794212771,15757615898310353,15760850159052851,15762209549139285,15763329664260405,15763347842500031,15764824185348051,15765990586747192,15766276644999494,15768790414358471,15778030936010985,15778073642134042,15783285537872233,15783457873235222,15783601313856694,15786952156025848,15789976943224909,15810883487535993,15812065825937453,15815973513115164,15816359902190124,15818396444990604,15822569260611075,15827802457455271,15828816982020122,15830964443377814,15840334179132579,15843352451900485,15843600775555820,15846238303690403,15863820783079073,15866236940451977,15867439138551076,15872362195905201,15875675232290550,15878101641545005,15878818841668229,15879020355620882,15879801226947188,15882817785917695,15884400676036182,15887607544869391,15890892365015163,15892366657661960,15892644929711368,15895961844448293,15896204034306061,15897401962344850,15904895288630879,15911275554215588,15911665623700279,15915302121110625,15920977380773424,15922176557778000,15927341566519732,15931653898476584,15934933731048989,15939128572044103,15944489120018696,15954518713119826,15962749062304179,15964603511322079,15966264651417527,15979755963656160,15989521035310443,15992440720931469,16003094114347164,16004817238396021,16011025198707647,16012437984901399,16018234336000847,16033495765775843,16038301793476863,16041339465547053,16049606674888102,16049891952869470,16053420407977591,16060832251885914,16062107659438725,16069046551508727,16069174976182272,16074486970716033,16080740588460730,16083877262816687,16090383828959085,16093240548996996,16098091349807463,16098172275019426,16099819217713851,16108376586576329,16125621753305299,16126978195698598,16127193854760523,16134101469312376,16140981467932346,16144336314735217,16145343445599154,16149410584199209,16150040833847011,16152783913026412,16158129834797633,16160771084575630,16176635623269431,16178760128679429,16181654573567669,16184318280736106,16188822750519283,16190571409547135,16191892773280294,16195655847208731,16198734954314842,16211596379781820,16219147118653087,16220746069769206,16225653430698144,16233112123839300,16233654384438564,16235839769165041,16240762741306135,16250616875540489,16263334721549532,16263780136827362,16266712848564625,16270015344285096,16271083150992386,16280927247624124,16281915533551056,16289174359879745,16289936972512243,16293042607710037,16299677703726115,16307595056946704,16309137054392802,16310681306790082,16315681986373983,16317875420686187,16322691175661967,16327416205924614,16330104654201385,16333887903434801,16336789007686054,16339994308760050,16341765762990956,16347036755860703,16349055180152841,16349624325715789,16356347789934648,16359131609074861,16362066780194864,16369009637282998,16369281391114209,16369945946899217,16372297199748151,16373987141658826,16375988586939943,16377409889399924,16379852130875662,16383306457978105,16386085273604579,16389519573583307,16394940965979110,16396880383990556,16398154649330003,16398733126926655,16400068126248119,16411790303732795,16417759312392243,16419509766610091,16419901644544820,16422440467412931,16426184098291933,16428705781520359,16432971637241688,16433327633528711,16435742390682375,16437965595938221,16445322365770696,16447014000822092,16448682699618749,16448829682806061,16452048761762229,16457674212534286,16458193998413945,16460001490402451,16461454427883020,16462708921584182,16465114761860846,16465741257369090,16467984634116524,16473606709399091,16482765139128458,16483944183054084,16492537636527687,16492753413249082,16495982586896225,16499668866369559,16512946688413824,16514937777288269,16515317531532600,16515515620455784,16515763893503398,16517530335877295,16517537506431523,16520680878383534,16522040144005904,16536209470153361,16549193848756349,16551638786295881,16553191501416196,16555675508895642,16556142033974396,16557075823478085,16560847038118314,16561729019896847,16568638052909005,16569637258340730,16573238078421961,16574236353677299,16578960500697920,16582822217918014,16589279792859879,16591958210106118,16594902459898391,16605131007727492,16606580432110149,16614318900004042,16615202652663010,16618755587989745,16626806031696348,16629787832374700,16631291003094984,16649312379995411,16650212270002998,16650283216078512,16651090716262974,16652944391354801,16653981254978641,16654463028053744,16654924393162193,16660067927175777,16662173317527607,16664243102641776,16666335390036133,16681703184890456,16682373593295548,16682578612688855,16685180422791776,16686094229531741,16690146533382363,16698836185067210,16705001871343701,16708636667018243,16710176517292164,16711961207359232,16724720235210872,16725294155137383,16727032133352391,16727688366022871,16729903330833289,16730086424088046,16736716353986757,16737373768108148,16738231483083309,16739182735043548,16741586841667302,16747302145863252,16753484567013429,16755872335309848,16759630605474370,16761517688376276,16762470025066864,16783509929261859,16784815745535597,16787458448627887,16788378238902865,16788943958964328,16792514152922390,16804992154815880,16810005023919202,16820470058192790,16827645998999199,16830315094868687,16834728273804865,16834972450595855,16837630758069802,16838337071218776,16843019140494565,16845510952053197,16850886838136612,16854336811041325,16858777046733835,16862894270178946,16867020968629490,16867864576520806,16868617407655959,16868967869968917,16870410864242800,16873469111493195,16879575300669180,16880613203851258,16886381613931407,16887131664281014,16888158543374642,16890451337099671,16893613954731672,16895785673304080,16897929991993805,16899560454680533,16913880033664423,16915167008196857,16919853192670914,16923906246184599,16936593144852542,16942918462021910,16945631443389494,16947503499298974,16954247022723534,16956760996707076,16966208347117891,16967956019390437,16972536001685219,16974976960555336,16974996106586527,16976062143580028,16977273505567584,16981100707531732,16983782183294117,16986912710655502,16987865244712954,16989134716990479,16989387007759710,16999582099196194,17000582419359490,17001105496098528,17001625874486973,17004131992558114,17008056242821339,17011749486628658,17014229777897844,17018029384954439,17019281824197645,17022002414706187,17023649823777761,17027024912981538,17027708820603609,17027724390296374,17028905071969344,17033234045807732,17039633394164779,17041956186643831,17043911086147728,17044227183900592,17045020641718652,17047039786296372,17048296154567372,17053515690985016,17057095725127500,17065736283826551,17066204322105669,17066614080750842,17066890141401799,17069702342298005,17070031206981939,17072355840387059,17086180968300670,17092924242292896,17104993056562977,17105192154942325,17109970886679733,17115701285838058,17117697441697646,17125687975796961,17127731933707424,17128001588707458,17138834595066375,17143215310364176,17148449573094945,17150277478457455,17156795393603011,17157898465093855,17158828067013858,17159011651238619,17164804932271093,17171938449805958,17172705597475016,17173738199873813,17179028413037838,17184647258662518,17189985997758617,17192344156604394,17196706052792205,17207135836467323,17215506044805401,17219161189129714,17220596644754506,17233175158773315,17246076311509922,17255087609386479,17255995767294112,17259853567766215,17264169344327606,17264493723089078,17270538843510702,17271660415976271,17276987541026097,17282090993143952,17283049372339150,17291560172494026,17291979671712321,17292310452468705,17293149261210524,17296137849004163,17296192463290079,17304784741854216,17308433563518540,17313632977406292,17318960805851472,17320236078883475,17321015553228474,17321174101274260,17321666593584496,17323038726279631,17329308502917698,17329472456864413,17334589990132684,17334864272182912,17341517372421961,17351099007094397,17353053843653375,17366561361017324,17374440012919698,17378407166143928,17383067517360975,17391219400155350,17391436242706241,17393048485135831,17395137850335402,17396392214119358,17398282123693718,17400913153559823,17407246841014078,17413919734842125,17420499051739414,17421399034054265,17422062281526649,17423105453982540,17424289896774390,17425090194197669,17425614571955386,17432341969660318,17435530177003750,17439323243822026,17440523469292889,17441789247813820,17466786275120017,17470441567829100,17476597996427511,17476827515285484,17479085198776952,17484120239214373,17485776963997171,17486966299030886,17486988765689599,17488816545423819,17496728290093656,17497098710311670,17499228377170813,17500477193853245,17517159540441318,17523212990532497,17523973238515804,17527155984508354,17527974390318800,17541513425567569,17544483234277015,17554939460224349,17558580292594243,17559431629269263,17561448044068445,17562043899076409,17564200744376749,17568398735638434,17572077476907682,17575183081748977,17575560135721806,17578541772101901,17582290334182417,17587104794159329,17593208073575946,17596039699851909,17599396608009249,17600395630340514,17600607133597719,17602282637407016,17608993167118462,17612331367634278,17612661692978454,17613143173683718,17620186225453340,17622683372375560,17624682438741740,17637276506512333,17638727474061409,17641686744036386,17644039113805944,17645312266322218,17650070599794961,17653795308183431,17653847556747406,17657497846454029,17659159129272154,17659621059389953,17660743105223332,17661459152369712,17675621085369441,17676740506186425,17688285802217756,17696080779036912,17697163249179507,17697682504000309,17707842984156022,17709737359906993,17710801222146557,17712409569723362,17712893886367554,17714216772557464,17716382248321539,17720864955352360,17725080861093763,17729422012525851,17732456419233492,17741767011633464,17742935181641766,17746510075374564,17747383804315050,17756105008438482,17759977925515375,17760075502845602,17762550181642638,17767687613087632,17767859597836083,17769865028645273,17770964005963097,17770981472959907,17778432694523101,17780417672471981,17780633004635622,17781695742071988,17794405366215460,17798330281681371,17800329108711822,17805181966455568,17806610508162479,17807775056747143,17810904352266449,17811294904961592,17812394543477172,17816589074280916,17820414995546733,17822270975275609,17824058019135158,17824692487924143,17829073738411995,17829961694570784,17837924490120715,17838730974038612,17840541840864217,17843709229402181,17844481483540230,17848382043638116,17851847109708456,17853025423149005,17859332251377907,17861897896064008,17862392392198693,17863284124973089,17864179561603861,17864592832666958,17868918610462163,17871331243165860,17873251325535856,17875253398653485,17877847214982476,17881080524524475,17889149793462925,17892154693253238,17893885025675099,17896653342000979,17903487860911815,17903975998539248,17905797190497744,17905846546240170,17906626648911419,17919141917693070,17922025141068374,17927103791988031,17933163310113598,17937781070124082,17948193446220343,17961387047243191,17965956572473636,17970314287831337,17970920336945917,17981163734458850,17983136277324499,17988399353053093,17988976441880620,17994870382615510,18001307487480335,18001311042217802,18006237324341425,18006674318531347,18009042115164135,18014189215438894,18014657110806736,18020974854298469,18021886555673319,18026004208911747,18026232516319011,18028316562390501,18028401974332903,18032295415917214,18033529755426165,18041417715473006,18042108482583522,18051621313730146,18055041696562277,18055087872299409,18055587125968368,18061732028717577,18063228282000881,18065517227719762,18068141364358640,18069471533986377,18075555160804033,18076181993827292,18082663742446595,18089241107640661,18096458958758462,18097497168766291,18099088172925428,18100450631735810,18103384897215847,18129411267420255,18131898498754493,18133412567042816,18136432192167355,18138184127031654,18138810838881859,18139979326512529,18143945517551573,18148249253291380,18162150874995819,18163443951512529,18176398171978336,18179155755591764,18179306742222141,18195325370888256,18196224826654508,18198481066177520,18200162616918760,18200433284258315,18201109647268708,18210889187587826,18214396067041321,18215366819351334,18215593970961402,18215688955701607,18218596306288915,18219379302426418,18219480778356908,18219492177590913,18223532024009924,18229457516235802,18230262104505569,18235829425519784,18241422988672837,18244762343715926,18257416541434878,18258026885319486,18258589056361348,18258704607599735,18265102179546859,18275143099845195,18277496942328603,18277532214379777,18306497644679138,18307189866355692,18308565966347727,18309896670692432,18310224044923199,18310260983673763,18311678202467591,18312928450191138,18316793768442045,18320199194413158,18320706645413560,18321247174629916,18325796935244911,18330874810048231,18332434291015304,18336172353658611,18337191336507655,18340192768781464,18354768007590930,18354809741903140,18361245509159168,18362561943328805,18366897023001689,18367494137613346,18377045322936582,18379087767248948,18389507703197237,18390199754916829,18395298635755745,18396190684827929,18400962357421119,18402740084326073,18403600751028920,18410807087565130,18411318682491459,18412761413118797,18417094802826718,18417594946606594,18423285819437783,18429692224833089,18434322404441043,18434966080784818,18435861506577465,18436307626060546,18438159643459969,18440518503779835,18441289041766013,18444487060015322,18444666364288446],"molecule":"DNA","num":0,"seed":42,"abundances":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]}],"version":0.4}] \ No newline at end of file diff --git a/tests/test-data/gather/salmonella-picklist-diffcolumn.csv b/tests/test-data/gather/salmonella-picklist-diffcolumn.csv new file mode 100644 index 0000000000..34c79d3615 --- /dev/null +++ b/tests/test-data/gather/salmonella-picklist-diffcolumn.csv @@ -0,0 +1,26 @@ +name2 +"NOT THERE" +"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" +"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" +"NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome" +"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" +"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" +"NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome" +"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" +"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" +"NC_006905.1 Salmonella enterica subsp. enterica serovar Choleraesuis str. SC-B67, complete genome" +NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome +NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome +NC_011294.1 Salmonella enterica subsp. enterica serovar Enteritidis str. P125109 complete genome +NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome +NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome +NC_011274.1 Salmonella enterica subsp. enterica serovar Gallinarum str. 287/91 complete genome +"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" +"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" +"NC_006511.1 Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150, complete genome" +"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" +"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" +"NC_011080.1 Salmonella enterica subsp. enterica serovar Newport str. SL254, complete genome" +"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" +"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" +"NC_003198.1 Salmonella enterica subsp. enterica serovar Typhi str. CT18, complete genome" diff --git a/tests/test-data/scaled/mf.csv b/tests/test-data/scaled/mf.csv new file mode 100644 index 0000000000..e3ff4d09e7 --- /dev/null +++ b/tests/test-data/scaled/mf.csv @@ -0,0 +1,17 @@ +# SOURMASH-MANIFEST-VERSION: 1.0 +internal_location,md5,md5short,ksize,moltype,num,scaled,n_hashes,with_abundance,name,filename +all.lca.json,455c2f95f2d0a95e176870659119f170,455c2f95,31,DNA,0,10000,93,0,, +all.lca.json,684aa226f843eaa7e1e40fc5603d5f2a,684aa226,31,DNA,0,10000,48,0,, +all.lca.json,7f7835d2dd27ba703e843eee4757f3c2,7f7835d2,31,DNA,0,10000,8,0,, +all.lca.json,7ffcfaa4027d4153a991b6bd78cf39fe,7ffcfaa4,31,DNA,0,10000,45,0,, +all.lca.json,d84ef28f610b1783f801734699cf7e40,d84ef28f,31,DNA,0,10000,45,0,, +genome-s10+s11.fa.gz.sig,455c2f95f2d0a95e176870659119f170,455c2f95,31,DNA,0,10000,93,0,,../genome-s10+s11.fa.gz +genome-s11.fa.gz.sig,7ffcfaa4027d4153a991b6bd78cf39fe,7ffcfaa4,31,DNA,0,10000,45,0,,../genome-s11.fa.gz +all.sbt.zip,684aa226f843eaa7e1e40fc5603d5f2a,684aa226,31,DNA,0,10000,48,0,,../genome-s10.fa.gz +all.sbt.zip,7f7835d2dd27ba703e843eee4757f3c2,7f7835d2,31,DNA,0,10000,8,0,,../genome-s10-small.fa.gz +all.sbt.zip,7ffcfaa4027d4153a991b6bd78cf39fe,7ffcfaa4,31,DNA,0,10000,45,0,,../genome-s11.fa.gz +all.sbt.zip,455c2f95f2d0a95e176870659119f170,455c2f95,31,DNA,0,10000,93,0,,../genome-s10+s11.fa.gz +all.sbt.zip,d84ef28f610b1783f801734699cf7e40,d84ef28f,31,DNA,0,10000,45,0,,../genome-s12.fa.gz +genome-s10-small.fa.gz.sig,7f7835d2dd27ba703e843eee4757f3c2,7f7835d2,31,DNA,0,10000,8,0,,../genome-s10-small.fa.gz +genome-s12.fa.gz.sig,d84ef28f610b1783f801734699cf7e40,d84ef28f,31,DNA,0,10000,45,0,,../genome-s12.fa.gz +genome-s10.fa.gz.sig,684aa226f843eaa7e1e40fc5603d5f2a,684aa226,31,DNA,0,10000,48,0,,../genome-s10.fa.gz diff --git a/tests/test-data/scaled/pathlist.txt b/tests/test-data/scaled/pathlist.txt new file mode 100644 index 0000000000..32b8b3bacd --- /dev/null +++ b/tests/test-data/scaled/pathlist.txt @@ -0,0 +1,7 @@ +all.lca.json +all.sbt.zip +genome-s10+s11.fa.gz.sig +genome-s10-small.fa.gz.sig +genome-s10.fa.gz.sig +genome-s11.fa.gz.sig +genome-s12.fa.gz.sig diff --git a/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz b/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz new file mode 100644 index 0000000000..e052b274e2 Binary files /dev/null and b/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz differ diff --git a/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz b/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz new file mode 100644 index 0000000000..5406c2c63b Binary files /dev/null and b/tests/test-data/sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz differ diff --git a/tests/test-data/sketch_fromfile/salmonella-badseq.csv b/tests/test-data/sketch_fromfile/salmonella-badseq.csv new file mode 100644 index 0000000000..d1ffecfd2f --- /dev/null +++ b/tests/test-data/sketch_fromfile/salmonella-badseq.csv @@ -0,0 +1,2 @@ +ident,full_ident,name,genome_filename,protein_filename +GCA_903797575,GCA_903797575.1,GCA_903797575 Salmonella enterica,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz, diff --git a/tests/test-data/sketch_fromfile/salmonella-dna-protein.zip b/tests/test-data/sketch_fromfile/salmonella-dna-protein.zip new file mode 100644 index 0000000000..5fd26246a0 Binary files /dev/null and b/tests/test-data/sketch_fromfile/salmonella-dna-protein.zip differ diff --git a/tests/test-data/sketch_fromfile/salmonella-missing.csv b/tests/test-data/sketch_fromfile/salmonella-missing.csv new file mode 100644 index 0000000000..b6ef55bcec --- /dev/null +++ b/tests/test-data/sketch_fromfile/salmonella-missing.csv @@ -0,0 +1,2 @@ +ident,full_ident,name,genome_filename,protein_filename +GCA_903797575,GCA_903797575.1,GCA_903797575 Salmonella enterica,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz, diff --git a/tests/test-data/sketch_fromfile/salmonella-mult.csv b/tests/test-data/sketch_fromfile/salmonella-mult.csv new file mode 100644 index 0000000000..251e324a1c --- /dev/null +++ b/tests/test-data/sketch_fromfile/salmonella-mult.csv @@ -0,0 +1,3 @@ +ident,full_ident,name,genome_filename,protein_filename +GCA_903797575,GCA_903797575.1,GCA_903797575 Salmonella enterica,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz +xxGCA_903797575,xxGCA_903797575.1,xxGCA_903797575 Salmonella enterica,sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_genomic.fna.gz,sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_protein.faa.gz diff --git a/tests/test-data/sketch_fromfile/salmonella-noname.csv b/tests/test-data/sketch_fromfile/salmonella-noname.csv new file mode 100644 index 0000000000..b464244315 --- /dev/null +++ b/tests/test-data/sketch_fromfile/salmonella-noname.csv @@ -0,0 +1,2 @@ +ident,full_ident,name,genome_filename,protein_filename +GCA_903797575,GCA_903797575.1,,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz diff --git a/tests/test-data/sketch_fromfile/salmonella.csv b/tests/test-data/sketch_fromfile/salmonella.csv new file mode 100644 index 0000000000..5c1fc10508 --- /dev/null +++ b/tests/test-data/sketch_fromfile/salmonella.csv @@ -0,0 +1,2 @@ +ident,full_ident,name,genome_filename,protein_filename +GCA_903797575,GCA_903797575.1,GCA_903797575 Salmonella enterica,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz,sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz diff --git a/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_genomic.fna.gz b/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_genomic.fna.gz new file mode 100644 index 0000000000..e052b274e2 Binary files /dev/null and b/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_genomic.fna.gz differ diff --git a/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_protein.faa.gz b/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_protein.faa.gz new file mode 100644 index 0000000000..5406c2c63b Binary files /dev/null and b/tests/test-data/sketch_fromfile/xxGCA_903797575.1_PARATYPHIC668_protein.faa.gz differ diff --git a/tests/test-data/v6.sbt.zip.mf.csv b/tests/test-data/v6.sbt.zip.mf.csv new file mode 100644 index 0000000000..533b262399 --- /dev/null +++ b/tests/test-data/v6.sbt.zip.mf.csv @@ -0,0 +1,9 @@ +# SOURMASH-MANIFEST-VERSION: 1.0 +internal_location,md5,md5short,ksize,moltype,num,scaled,n_hashes,with_abundance,name,filename +6d6e87e1154e95b279e5e7db414bc37b,6d6e87e1154e95b279e5e7db414bc37b,6d6e87e1,31,DNA,500,0,500,0,,SRR2255622_1.fastq.gz +60f7e23c24a8d94791cc7a8680c493f9,60f7e23c24a8d94791cc7a8680c493f9,60f7e23c,31,DNA,500,0,500,0,,SRR2060939_1.fastq.gz +0107d767a345eff67ecdaed2ee5cd7ba,0107d767a345eff67ecdaed2ee5cd7ba,0107d767,31,DNA,500,0,500,0,,SRR453566_1.fastq.gz +f71e78178af9e45e6f1d87a0c53c465c,f71e78178af9e45e6f1d87a0c53c465c,f71e7817,31,DNA,500,0,500,0,,SRR2241509_1.fastq.gz +f0c834bc306651d2b9321fb21d3e8d8f,f0c834bc306651d2b9321fb21d3e8d8f,f0c834bc,31,DNA,500,0,500,0,,SRR453569_1.fastq.gz +4e94e60265e04f0763142e20b52c0da1,4e94e60265e04f0763142e20b52c0da1,4e94e602,31,DNA,500,0,500,0,,SRR2060939_2.fastq.gz +b59473c94ff2889eca5d7165936e64b3,b59473c94ff2889eca5d7165936e64b3,b59473c9,31,DNA,500,0,500,0,,SRR453570_1.fastq.gz diff --git a/tests/test_bugs.py b/tests/test_bugs.py index dea352db0a..e0f3c5daf0 100644 --- a/tests/test_bugs.py +++ b/tests/test_bugs.py @@ -3,7 +3,7 @@ @utils.in_tempdir def test_bug_803(c): # can we do a 'sourmash search' on an LCA database and a query with abundance? - query = utils.get_test_data('47.abunds.fa.sig') + query = utils.get_test_data('track_abund/47.fa.sig') lca_db = utils.get_test_data('lca/47+63.lca.json') c.run_sourmash('search', query, lca_db, '--ignore-abundance') diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 9d5b1eaa0b..b09d05076b 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -563,6 +563,121 @@ def test_sig_intersect_8_multisig(c): assert not len(actual_intersect_sig.minhash) +def test_sig_inflate_1(runtmp): + # basic inflate test - inflate 47 flat with 47 abund + sig47_flat = utils.get_test_data('47.fa.sig') + sig47_abund = utils.get_test_data('track_abund/47.fa.sig') + runtmp.run_sourmash('sig', 'inflate', sig47_abund, sig47_flat) + + # stdout should be new signature + out = runtmp.last_result.out + + actual_inflate_sig = sourmash.load_one_signature(out) + actual_inflate_mh = actual_inflate_sig.minhash + + # should be identical to track_abund sig + sig47 = sourmash.load_one_signature(sig47_abund) + mh47 = sig47.minhash + + assert actual_inflate_sig.name == sig47.name + assert actual_inflate_mh == mh47 + + +def test_sig_inflate_2(runtmp): + # use abundances from sig #47 + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + runtmp.run_sourmash('sig', 'inflate', sig47, sig63) + + # stdout should be new signature + out = runtmp.last_result.out + + actual_inflate_sig = sourmash.load_one_signature(out) + + # actually do an inflation ourselves for the test + mh47 = sourmash.load_one_signature(sig47).minhash + mh63 = sourmash.load_one_signature(sig63).minhash + mh47_abunds = mh47.hashes + mh63_mins = set(mh63.hashes.keys()) + + # get the set of mins that are in common + mh63_mins.intersection_update(mh47_abunds) + + # take abundances from mh47 & create new sig + mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins } + test_mh = mh47.copy_and_clear() + test_mh.set_abundances(mh47_abunds) + + print(actual_inflate_sig.minhash) + print(out) + + assert actual_inflate_sig.minhash == test_mh + + +def test_sig_inflate_3(runtmp): + # should fail on flat first sig + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.run_sourmash('sig', 'inflate', sig63, sig47) + + assert 'has no abundances' in runtmp.last_result.err + + +def test_sig_inflate_4_picklist(runtmp): + # try out picklists + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + sig47_flat = utils.get_test_data('47.fa.sig') + + ss63 = sourmash.load_one_signature(sig63, ksize=31) + + picklist = _write_file(runtmp, 'pl.csv', ['md5', ss63.md5sum()]) + + print(ss63.md5sum()) + + + runtmp.run_sourmash('sig', 'inflate', sig47, sig63, sig47_flat, + '--picklist', f'pl.csv:md5:md5') + + # stdout should be new signature + out = runtmp.last_result.out + err = runtmp.last_result.err + + actual_inflate_sig = sourmash.load_one_signature(out) + + # actually do an inflation ourselves for the test + mh47 = sourmash.load_one_signature(sig47).minhash + mh63 = sourmash.load_one_signature(sig63).minhash + mh47_abunds = mh47.hashes + mh63_mins = set(mh63.hashes.keys()) + + # get the set of mins that are in common + mh63_mins.intersection_update(mh47_abunds) + + # take abundances from mh47 & create new sig + mh47_abunds = { k: mh47_abunds[k] for k in mh63_mins } + test_mh = mh47.copy_and_clear() + test_mh.set_abundances(mh47_abunds) + + print(actual_inflate_sig.minhash) + print(out) + + assert actual_inflate_sig.minhash == test_mh + + +def test_sig_inflate_5_bad_moltype(runtmp): + # should fail when no signatures match moltype + sig47 = utils.get_test_data('track_abund/47.fa.sig') + prot = utils.get_test_data('prot/protein.zip') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.run_sourmash('sig', 'inflate', sig47, prot) + + assert 'no signatures to inflate' in runtmp.last_result.err + + @utils.in_tempdir def test_sig_subtract_1(c): # subtract of 63 from 47 @@ -583,6 +698,73 @@ def test_sig_subtract_1(c): assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) +def test_sig_subtract_1_abund(runtmp): + # subtract 63 from 47, with abundances borrowed from 47 + + c = runtmp + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('track_abund/63.fa.sig') + c.run_sourmash('sig', 'subtract', sig47, sig63, '-A', sig47) + + # stdout should be new signature + out = c.last_result.out + + test1_sig = sourmash.load_one_signature(sig47) + test2_sig = sourmash.load_one_signature(sig63) + actual_subtract_sig = sourmash.load_one_signature(out) + assert actual_subtract_sig.minhash.track_abundance + + mins = set(test1_sig.minhash.hashes.keys()) + mins -= set(test2_sig.minhash.hashes.keys()) + + assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) + + distinct_abunds = set() + actual_sub_hashes = actual_subtract_sig.minhash.hashes + sig47_hashes = test1_sig.minhash.hashes + for h in mins: + assert actual_sub_hashes[h] == sig47_hashes[h] + distinct_abunds.add(sig47_hashes[h]) + + # this is really just to make sure that we have a sketch with some + # abundances in it... + assert max(distinct_abunds) > 1 + + +def test_sig_subtract_1_abund_is_flat(runtmp): + # subtract 63 from 47, with abundances borrowed from 47 + + c = runtmp + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('track_abund/63.fa.sig') + sig47_flat = utils.get_test_data('47.fa.sig') + + with pytest.raises(SourmashCommandFailed): + c.run_sourmash('sig', 'subtract', sig47, sig63, '-A', sig47_flat) + + +def test_sig_subtract_1_flatten(runtmp): + # subtract 63 from 47, with abund signatures originally and --flatten + + c = runtmp + sig47 = utils.get_test_data('track_abund/47.fa.sig') + sig63 = utils.get_test_data('track_abund/63.fa.sig') + c.run_sourmash('sig', 'subtract', sig47, sig63, '--flatten') + + # stdout should be new signature + out = c.last_result.out + + test1_sig = sourmash.load_one_signature(sig47) + test2_sig = sourmash.load_one_signature(sig63) + actual_subtract_sig = sourmash.load_one_signature(out) + assert not actual_subtract_sig.minhash.track_abundance + + mins = set(test1_sig.minhash.hashes.keys()) + mins -= set(test2_sig.minhash.hashes.keys()) + + assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) + + @utils.in_tempdir def test_sig_subtract_1_multisig(c): # subtract of everything from 47 @@ -638,6 +820,17 @@ def test_sig_subtract_4_ksize_succeed(c): assert 'loaded and subtracted 1 signatures' in c.last_result.err +def test_sig_subtract_5_bad_moltype(runtmp): + # should fail when no matching sigs + sig47 = utils.get_test_data('47.fa.sig') + prot = utils.get_test_data('prot/protein.zip') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.run_sourmash('sig', 'subtract', '-k', '31', sig47, prot) + + assert 'no signatures to subtract' in runtmp.last_result.err + + def test_sig_rename_1(runtmp): c = runtmp @@ -2800,7 +2993,7 @@ def test_sig_describe_1_hp(c): c.run_sourmash('sig', 'describe', computed_sig) out = c.last_result.out - print(c.last_result) + print(c.last_result.out) # Add final trailing slash for this OS testdata_dirname = os.path.dirname(testdata) + os.sep @@ -2814,6 +3007,7 @@ def test_sig_describe_1_hp(c): md5: e45a080101751e044d6df861d3d0f3fd k=7 molecule=protein num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2823,6 +3017,7 @@ def test_sig_describe_1_hp(c): md5: c027e96c3379d38942639219daa24fdc k=7 molecule=dayhoff num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2841,6 +3036,7 @@ def test_sig_describe_1_hp(c): md5: 1136a8a68420bd93683e45cdaf109b80 k=21 molecule=DNA num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2850,6 +3046,7 @@ def test_sig_describe_1_hp(c): md5: 4244d1612598af044e799587132f007e k=10 molecule=protein num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2859,6 +3056,7 @@ def test_sig_describe_1_hp(c): md5: 396dcb7c1875f48ca31e0759bec72ee1 k=10 molecule=dayhoff num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2868,6 +3066,7 @@ def test_sig_describe_1_hp(c): md5: 4c43878296459783dbd6a4a071ab7e9d k=10 molecule=hp num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 --- @@ -2877,6 +3076,7 @@ def test_sig_describe_1_hp(c): md5: 71f7c111c01785e5f38efad45b00a0e1 k=30 molecule=DNA num=500 scaled=0 seed=42 track_abundance=0 size: 500 +sum hashes: 500 signature license: CC0 """.splitlines() @@ -2982,6 +3182,29 @@ def test_sig_describe_1_zipfile(c): assert line.strip() in out +def test_sig_describe_1_sig_abund(runtmp): + # check output of sig describe on a sketch with abundances + c = runtmp + + sigfile = utils.get_test_data('track_abund/47.fa.sig') + c.run_sourmash('sig', 'describe', sigfile) + + out = c.last_result.out + print(c.last_result.out) + + expected_output = """\ +signature: NC_009665.1 Shewanella baltica OS185, complete genome +source file: podar-ref/47.fa +md5: 09a08691ce52952152f0e866a59f6261 +k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=1 +size: 5177 +sum hashes: 5292 +signature license: CC0 +""".splitlines() + for line in expected_output: + assert line.strip() in out + + @utils.in_thisdir def test_sig_describe_stdin(c): sig = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') @@ -3045,6 +3268,37 @@ def test_sig_describe_2_csv(runtmp): assert n == 2 +def test_sig_describe_2_csv_abund(runtmp): + # output info in CSV spreadsheet, for abund sig + c = runtmp + + sig47 = utils.get_test_data('track_abund/47.fa.sig') + c.run_sourmash('sig', 'describe', sig47, '--csv', 'out.csv') + + with open(c.output('out.csv'), 'rt') as fp: + r = csv.DictReader(fp) + + n = 0 + + rows = list(r) + assert len(rows) == 1 + row = rows[0] + + assert row['signature_file'] == sig47 + assert row['md5'] == "09a08691ce52952152f0e866a59f6261" + assert row['ksize'] == "31" + assert row['moltype'] == "DNA" + assert row['num'] == "0" + assert row['scaled'] == "1000" + assert row['n_hashes'] == "5177" + assert row['seed'] == "42" + assert row['with_abundance'] == "1" + assert row['name'] == "NC_009665.1 Shewanella baltica OS185, complete genome" + assert row['filename'] == "podar-ref/47.fa" + assert row['license'] == "CC0" + assert row['sum_hashes'] == "5292" + + def test_sig_describe_2_csv_as_picklist(runtmp): # generate an output CSV from describe and then use it as a manifest # pickfile @@ -3074,6 +3328,79 @@ def test_sig_describe_2_csv_as_picklist(runtmp): assert line.strip() in out +def test_sig_describe_2_include_db_pattern(runtmp): + # test sig describe --include-db-pattern + c = runtmp + + allzip = utils.get_test_data('prot/all.zip') + + c.run_sourmash('sig', 'describe', allzip, + '--include-db-pattern', 'os185') + + out = c.last_result.out + print(c.last_result) + + expected_output = """\ +signature: NC_009665.1 Shewanella baltica OS185, complete genome +source file: 47.fa +md5: 09a08691ce52952152f0e866a59f6261 +k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=0 +size: 5177 +signature license: CC0 +""".splitlines() + for line in expected_output: + assert line.strip() in out + + +def test_sig_describe_2_exclude_db_pattern(runtmp): + # test sig describe --exclude-db-pattern + c = runtmp + + allzip = utils.get_test_data('prot/all.zip') + + c.run_sourmash('sig', 'describe', allzip, '--dna', '-k', '31', + '--exclude-db-pattern', 'os223') + + out = c.last_result.out + print(c.last_result) + + expected_output = """\ +signature: NC_009665.1 Shewanella baltica OS185, complete genome +source file: 47.fa +md5: 09a08691ce52952152f0e866a59f6261 +k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=0 +size: 5177 +signature license: CC0 +""".splitlines() + for line in expected_output: + assert line.strip() in out + + +def test_sig_describe_3_manifest_works(runtmp): + # test on a manifest with relative paths, in proper location + mf = utils.get_test_data('scaled/mf.csv') + runtmp.sourmash('sig', 'describe', mf, '--csv', 'out.csv') + + out = runtmp.last_result.out + print(out) + + with open(runtmp.output('out.csv'), newline='') as fp: + r = csv.reader(fp) + rows = list(r) + assert len(rows) == 16 # 15 signatures, plus head + + +def test_sig_describe_3_manifest_fails_when_moved(runtmp): + # test on a manifest with relative paths, when in wrong place; + # should fail, because actual signatures cannot be loaded now. + # note: this tests lazy loading. + mf = utils.get_test_data('scaled/mf.csv') + shutil.copyfile(mf, runtmp.output('mf.csv')) + + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sig', 'describe', 'mf.csv') + + @utils.in_tempdir def test_sig_overlap(c): # get overlap details @@ -3264,13 +3591,20 @@ def test_sig_manifest_6_pathlist(runtmp): assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list + # note: the manifest output for pathlists will contain the locations + # used in the pathlist. This is required by StandaloneManifestIndex. + for row in manifest.rows: + iloc = row['internal_location'] + print(iloc) + assert iloc.startswith('/'), iloc + def test_sig_manifest_does_not_exist(runtmp): with pytest.raises(SourmashCommandFailed): runtmp.run_sourmash('sig', 'manifest', 'does-not-exist', '-o', 'out.csv') - assert "Cannot open 'does-not-exist'." in runtmp.last_result.err + assert "Cannot open 'does-not-exist' as a sourmash signature collection." in runtmp.last_result.err def test_sig_manifest_7_allzip_1(runtmp): @@ -3856,3 +4190,311 @@ def test_sig_kmers_2_hp(runtmp): check_mh2.add_hash(int(row['hashval'])) assert check_mh.similarity(mh) == 1.0 assert check_mh2.similarity(mh) == 1.0 + + +def test_sig_check_1(runtmp): + # basic check functionality + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, + "--picklist", f"{picklist}::manifest", + "-m", "mf.csv") + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + # all should match. + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 24 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 24 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 3 + assert 11 in ksizes + assert 21 in ksizes + assert 31 in ksizes + + +def test_sig_check_1_nofail(runtmp): + # basic check functionality with --fail-if-missing + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, + "--picklist", f"{picklist}::manifest", + "-m", "mf.csv", '--fail-if-missing') + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + # all should match. + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 24 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 24 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 3 + assert 11 in ksizes + assert 21 in ksizes + assert 31 in ksizes + + +def test_sig_check_1_no_picklist(runtmp): + # basic check functionality + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sig', 'check', *sigfiles) + + assert "No picklist provided?! Exiting." in str(exc) + + +@pytest.mark.parametrize("column, coltype", + (('md5', 'md5'), + ('md5', 'md5prefix8'), + ('name', 'name'), + ('name', 'ident'), + ('name', 'identprefix'), + )) +def test_sig_check_1_column(runtmp, column, coltype): + # basic check functionality for various columns/coltypes + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, + "--picklist", f"{picklist}:{column}:{coltype}", + "-m", "mf.csv", + "-o", "missing.csv") + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + # all should match. + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 24 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 24 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 3 + assert 11 in ksizes + assert 21 in ksizes + assert 31 in ksizes + + +def test_sig_check_1_diff_col_name(runtmp): + # 'sig check' with 'name2' column instead of default name + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist-diffcolumn.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, + "--picklist", f"{picklist}:name2:name", + "-o", "missing.csv", + '-m', 'mf.csv') + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + missing_csv = runtmp.output('missing.csv') + assert os.path.exists(missing_csv) + + # should be 24 matching manifest rows + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 24 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 24 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 3 + assert 11 in ksizes + assert 21 in ksizes + assert 31 in ksizes + + # should be one non-matching picklist row + with open(missing_csv, newline='') as fp: + rows = list(csv.reader(fp)) + assert len(rows) == 2 # header row + data row + assert rows[1][0] == 'NOT THERE' + + +def test_sig_check_1_diff_col_name_exclude(runtmp): + # 'sig check' with 'name2' column, :exclude picklist + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist-diffcolumn.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, + "--picklist", f"{picklist}:name2:name:exclude", + '-m', 'mf.csv') + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + # should be 12 matching manifest rows + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 12 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 12 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 3 + assert 11 in ksizes + assert 21 in ksizes + assert 31 in ksizes + + +def test_sig_check_1_ksize(runtmp): + # basic check functionality with selection for ksize + sigfiles = glob.glob(utils.get_test_data('gather/GCF*.sig')) + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', *sigfiles, '-k', '31', + "--picklist", f"{picklist}::manifest", + "-m", "mf.csv") + + out_mf = runtmp.output('mf.csv') + assert os.path.exists(out_mf) + + # 8 of the 24 should match. + with open(out_mf, newline='') as fp: + mf = CollectionManifest.load_from_csv(fp) + assert len(mf) == 8 + + idx = sourmash.load_file_as_index(out_mf) + siglist = list(idx.signatures()) + assert len(siglist) == 8 + ksizes = set([ ss.minhash.ksize for ss in siglist ]) + assert len(ksizes) == 1 + assert 31 in ksizes + + +def test_sig_check_2_output_missing(runtmp): + # output missing all as identical to input picklist + sigfiles = utils.get_test_data('gather/combined.sig') + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', sigfiles, + "--picklist", f"{picklist}::manifest", + "-o", "missing.csv", "-m", "mf.csv") + + out_csv = runtmp.output('missing.csv') + assert os.path.exists(out_csv) + + mf_csv = runtmp.output('mf.csv') + assert not os.path.exists(mf_csv) + assert "not saving matching manifest" in runtmp.last_result.err + + # everything is missing with 'combined.sig' + with open(out_csv, newline='') as fp: + r = csv.DictReader(fp) + rows = list(r) + + assert len(rows) == 24 + + +def test_sig_check_2_output_missing_error_exit(runtmp): + # output missing all as identical to input picklist + sigfiles = utils.get_test_data('gather/combined.sig') + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + # should error exit... + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sig', 'check', sigfiles, + "--picklist", f"{picklist}::manifest", + "-o", "missing.csv", '--fail') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + # ...and also output stuff! + out_csv = runtmp.output('missing.csv') + assert os.path.exists(out_csv) + + # everything is missing with 'combined.sig' + with open(out_csv, newline='') as fp: + r = csv.DictReader(fp) + rows = list(r) + + assert len(rows) == 24 + + +@pytest.mark.parametrize("column, coltype", + (('md5', 'md5'), + ('md5', 'md5prefix8'), + ('name', 'name'), + ('name', 'ident'), + ('name', 'identprefix'), + )) +def test_sig_check_2_output_missing_column(runtmp, column, coltype): + # output missing all as identical to input picklist + sigfiles = utils.get_test_data('gather/combined.sig') + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + runtmp.sourmash('sig', 'check', sigfiles, + "--picklist", f"{picklist}::manifest", + "-o", "missing.csv") + + out_csv = runtmp.output('missing.csv') + assert os.path.exists(out_csv) + + # everything is missing with 'combined.sig' + with open(out_csv, newline='') as fp: + r = csv.DictReader(fp) + rows = list(r) + + assert len(rows) == 24 + + +def test_sig_check_2_output_missing_exclude(runtmp): + # 'exclude' with '-o' shouldn't work + sigfiles = utils.get_test_data('gather/combined.sig') + picklist = utils.get_test_data('gather/salmonella-picklist.csv') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sig', 'check', sigfiles, + "--picklist", f"{picklist}:name:name:exclude", + "-o", "missing.csv") + + assert "** ERROR: Cannot use an 'exclude' picklist with '-o/--output-missing'" in str(exc) + + +def test_check_3_no_manifest(runtmp): + # fail check when no manifest, by default + sbt = utils.get_test_data('v6.sbt.zip') + picklist = utils.get_test_data('v6.sbt.zip.mf.csv') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.run_sourmash('sig', 'check', sbt, + '--picklist', f"{picklist}::manifest") + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + err = runtmp.last_result.err + assert "sig check requires a manifest by default, but no manifest present." in err + + +def test_check_3_no_manifest_ok(runtmp): + # generate manifest if --no-require-manifest + sbt = utils.get_test_data('v6.sbt.zip') + picklist = utils.get_test_data('v6.sbt.zip.mf.csv') + + runtmp.run_sourmash('sig', 'check', sbt, "--no-require-manifest", + '--picklist', f"{picklist}::manifest") + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + assert "for given picklist, found 7 matches to 7 distinct values" in runtmp.last_result.err diff --git a/tests/test_cmd_signature_fileinfo.py b/tests/test_cmd_signature_fileinfo.py index 534360712b..ee90fc7ba4 100644 --- a/tests/test_cmd_signature_fileinfo.py +++ b/tests/test_cmd_signature_fileinfo.py @@ -63,7 +63,7 @@ def test_fileinfo_1_sig_summarize(runtmp): def test_fileinfo_1_sig_abund(runtmp): # get basic info on a signature with abundance - sig47 = utils.get_test_data('47.abunds.fa.sig') + sig47 = utils.get_test_data('track_abund/47.fa.sig') shutil.copyfile(sig47, runtmp.output('sig47.sig')) runtmp.run_sourmash('sig', 'fileinfo', 'sig47.sig') @@ -329,4 +329,38 @@ def test_sig_fileinfo_does_not_exist(runtmp): with pytest.raises(SourmashCommandFailed): runtmp.run_sourmash('sig', 'fileinfo', 'does-not-exist') - assert "Cannot open 'does-not-exist'." in runtmp.last_result.err + assert "Cannot open 'does-not-exist' as a sourmash signature collection" in runtmp.last_result.err + + +def test_sig_fileinfo_8_manifest_works(runtmp): + # test on a manifest with relative paths, in proper location + mf = utils.get_test_data('scaled/mf.csv') + runtmp.sourmash('sig', 'fileinfo', mf) + + out = runtmp.last_result.out + print(out) + + assert '15 sketches with DNA, k=31, scaled=10000 717 total hashes' in out + assert 'num signatures: 15' in out + assert 'has manifest? yes' in out + assert 'is database? yes' in out + assert 'path filetype: StandaloneManifestIndex' in out + + +def test_sig_fileinfo_8_manifest_works_when_moved(runtmp): + # test on a manifest with relative paths, when in wrong place + # note: this works, unlike 'describe', because all the necessary info + # for 'fileinfo' is in the manifest. + mf = utils.get_test_data('scaled/mf.csv') + shutil.copyfile(mf, runtmp.output('mf.csv')) + + runtmp.sourmash('sig', 'fileinfo', 'mf.csv') + + out = runtmp.last_result.out + print(out) + + assert '15 sketches with DNA, k=31, scaled=10000 717 total hashes' in out + assert 'num signatures: 15' in out + assert 'has manifest? yes' in out + assert 'is database? yes' in out + assert 'path filetype: StandaloneManifestIndex' in out diff --git a/tests/test_index.py b/tests/test_index.py index ea9cc01630..35e31e0714 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -6,26 +6,27 @@ import os import zipfile import shutil -import copy import sourmash from sourmash import index from sourmash import load_one_signature, SourmashSignature from sourmash.index import (LinearIndex, ZipFileLinearIndex, make_jaccard_search_query, CounterGather, - LazyLinearIndex, MultiIndex) + LazyLinearIndex, MultiIndex, + StandaloneManifestIndex) from sourmash.index.revindex import RevIndex -from sourmash.sbt import SBT, GraphFactory, Leaf -from sourmash.sbtmh import SigLeaf +from sourmash.sbt import SBT, GraphFactory from sourmash import sourmash_args from sourmash.search import JaccardSearch, SearchType from sourmash.picklist import SignaturePicklist, PickStyle from sourmash_tst_utils import SourmashCommandFailed +from sourmash.manifest import CollectionManifest import sourmash_tst_utils as utils def test_simple_index(n_children): + # test basic SBT functionality factory = GraphFactory(5, 100, 3) root = SBT(factory, d=n_children) @@ -89,6 +90,7 @@ def test_simple_index(n_children): def test_linear_index_search(): + # test LinearIndex searching - all in memory sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -133,7 +135,7 @@ def test_linear_index_search(): def test_linear_index_prefetch(): - # prefetch does basic things right: + # check that prefetch does basic things right: sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -166,7 +168,7 @@ def test_linear_index_prefetch(): def test_linear_index_prefetch_empty(): - # check that an exception is raised upon for an empty database + # check that an exception is raised upon for an empty LinearIndex sig2 = utils.get_test_data('2.fa.sig') ss2 = sourmash.load_one_signature(sig2, ksize=31) @@ -218,6 +220,7 @@ def minhash(self): def test_linear_index_gather(): + # test LinearIndex gather sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -243,7 +246,7 @@ def test_linear_index_gather(): def test_linear_index_search_subj_has_abundance(): - # check that signatures in the index are flattened appropriately. + # check that search signatures in the index are flattened appropriately. queryfile = utils.get_test_data('47.fa.sig') subjfile = utils.get_test_data('track_abund/47.fa.sig') @@ -260,7 +263,7 @@ def test_linear_index_search_subj_has_abundance(): def test_linear_index_gather_subj_has_abundance(): - # check that signatures in the index are flattened appropriately. + # check that target signatures in the index are flattened appropriately. queryfile = utils.get_test_data('47.fa.sig') subjfile = utils.get_test_data('track_abund/47.fa.sig') @@ -278,7 +281,8 @@ def test_linear_index_gather_subj_has_abundance(): def test_index_search_subj_scaled_is_lower(): - # check that subject sketches are appropriately downsampled + # check that subject sketches are appropriately downsampled for scaled + # sketches. sigfile = utils.get_test_data('scaled100/GCF_000005845.2_ASM584v2_genomic.fna.gz.sig.gz') ss = sourmash.load_one_signature(sigfile) @@ -300,7 +304,8 @@ def test_index_search_subj_scaled_is_lower(): def test_index_search_subj_num_is_lower(): - # check that subject sketches are appropriately downsampled + # check that subject sketches are appropriately downsampled for num + # sketches sigfile = utils.get_test_data('num/47.fa.sig') ss = sourmash.load_one_signature(sigfile, ksize=31) @@ -322,7 +327,7 @@ def test_index_search_subj_num_is_lower(): def test_index_search_query_num_is_lower(): - # check that query sketches are appropriately downsampled + # check that query sketches are appropriately downsampled for num. sigfile = utils.get_test_data('num/47.fa.sig') qs = sourmash.load_one_signature(sigfile, ksize=31) @@ -405,7 +410,7 @@ def test_linear_index_search_abund_downsample_subj(): def test_linear_index_search_abund_requires_threshold(): - # test Index.search_abund + # test that Index.search_abund requires a 'threshold' sig47 = utils.get_test_data('track_abund/47.fa.sig') sig63 = utils.get_test_data('track_abund/63.fa.sig') @@ -423,7 +428,7 @@ def test_linear_index_search_abund_requires_threshold(): def test_linear_index_search_abund_query_flat(): - # test Index.search_abund + # test that Index.search_abund requires an abund query sig sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('track_abund/63.fa.sig') @@ -441,7 +446,7 @@ def test_linear_index_search_abund_query_flat(): def test_linear_index_search_abund_subj_flat(): - # test Index.search_abund + # test Index.search_abund requires an abund subj sig47 = utils.get_test_data('track_abund/47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -459,6 +464,7 @@ def test_linear_index_search_abund_subj_flat(): def test_linear_index_save(runtmp): + # test save output from LinearIndex => JSON sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -489,6 +495,7 @@ def test_linear_index_save(runtmp): def test_linear_index_load(runtmp): + # test .load class method of LinearIndex sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -497,8 +504,6 @@ def test_linear_index_load(runtmp): ss47 = sourmash.load_one_signature(sig47) ss63 = sourmash.load_one_signature(sig63) - from sourmash import save_signatures - filename = runtmp.output('foo') with open(filename, 'wt') as fp: sourmash.save_signatures([ss2, ss47, ss63], fp) @@ -511,6 +516,7 @@ def test_linear_index_load(runtmp): def test_linear_index_save_load(runtmp): + # LinearIndex save/load round trip sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -635,7 +641,7 @@ def test_linear_gather_threshold_5(): def test_linear_index_multik_select(): - # this loads three ksizes, 21/31/51 + # test that LinearIndx can load multiple (three) ksizes, 21/31/51 sig2 = utils.get_test_data('2.fa.sig') siglist = sourmash.load_file_as_signatures(sig2) @@ -683,7 +689,7 @@ def test_linear_index_moltype_select(): def test_linear_index_picklist_select(): - # test select with a picklist + # test LinearIndex.select with a picklist # this loads three ksizes, 21/31/51 sig2 = utils.get_test_data('2.fa.sig') @@ -732,8 +738,9 @@ def test_linear_index_picklist_select_exclude(): assert ksizes == set([21,51]) -@utils.in_tempdir -def test_index_same_md5sum_fsstorage(c): +def test_index_same_md5sum_fsstorage(runtmp): + # check SBT directory 'save' with two signatures that have identical md5 + c = runtmp testdata1 = utils.get_test_data('img/2706795855.sig') testdata2 = utils.get_test_data('img/638277004.sig') @@ -746,8 +753,9 @@ def test_index_same_md5sum_fsstorage(c): assert len(glob.glob(storage + "/*")) == 4 -@utils.in_tempdir -def test_index_same_md5sum_sbt_zipstorage(c): +def test_index_same_md5sum_sbt_zipstorage(runtmp): + # check SBT zipfile 'save' with two signatures w/identical md5 + c = runtmp testdata1 = utils.get_test_data('img/2706795855.sig') testdata2 = utils.get_test_data('img/638277004.sig') @@ -774,9 +782,10 @@ def test_zipfile_does_not_exist(runtmp): assert "ERROR: Error while reading signatures from 'no-exist.zip'." in str(exc) -@utils.in_thisdir -def test_zipfile_protein_command_search(c): +def test_zipfile_protein_command_search(runtmp): # test command-line search/gather of zipfile with protein sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/protein.zip') @@ -788,9 +797,10 @@ def test_zipfile_protein_command_search(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_hp_command_search(c): +def test_zipfile_hp_command_search(runtmp): # test command-line search/gather of zipfile with hp sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/hp.zip') @@ -802,9 +812,10 @@ def test_zipfile_hp_command_search(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_dayhoff_command_search(c): +def test_zipfile_dayhoff_command_search(runtmp): # test command-line search/gather of zipfile with dayhoff sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/dayhoff.zip') @@ -816,9 +827,10 @@ def test_zipfile_dayhoff_command_search(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_protein_command_search_combined(c): +def test_zipfile_protein_command_search_combined(runtmp): # test command-line search/gather of combined zipfile with protein sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/all.zip') @@ -830,9 +842,10 @@ def test_zipfile_protein_command_search_combined(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_hp_command_search_combined(c): +def test_zipfile_hp_command_search_combined(runtmp): # test command-line search/gather of combined zipfile with hp sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/all.zip') @@ -844,9 +857,10 @@ def test_zipfile_hp_command_search_combined(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_dayhoff_command_search_combined(c): +def test_zipfile_dayhoff_command_search_combined(runtmp): # test command-line search/gather of combined zipfile with dayhoff sigs + c = runtmp + sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/all.zip') @@ -858,9 +872,10 @@ def test_zipfile_dayhoff_command_search_combined(c): assert 'the recovered matches hit 100.0% of the query' in c.last_result.out -@utils.in_thisdir -def test_zipfile_dayhoff_command_search_protein(c): +def test_zipfile_dayhoff_command_search_protein(runtmp): # test command-line search/gather of protein sigs in zipfile + c = runtmp + # with dayhoff query sigfile1 = utils.get_test_data('prot/dayhoff/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') db_out = utils.get_test_data('prot/protein.zip') @@ -1053,6 +1068,7 @@ def test_zipfile_API_signatures_select_twice(use_manifest): def test_zipfile_API_save(): + # ZipFileLinearIndex.save is not implemented. zipfile_db = utils.get_test_data('prot/all.zip') zipidx = ZipFileLinearIndex.load(zipfile_db) @@ -1062,6 +1078,7 @@ def test_zipfile_API_save(): def test_zipfile_API_insert(): + # ZipFileLinearIndex.insert is not implemented. zipfile_db = utils.get_test_data('prot/all.zip') zipidx = ZipFileLinearIndex.load(zipfile_db) @@ -1072,6 +1089,7 @@ def test_zipfile_API_insert(): def test_zipfile_API_location(use_manifest): + # test ZipFileLinearIndex.location property zipfile_db = utils.get_test_data('prot/all.zip') zipidx = ZipFileLinearIndex.load(zipfile_db, use_manifest=use_manifest) @@ -1080,6 +1098,7 @@ def test_zipfile_API_location(use_manifest): def test_zipfile_load_file_as_signatures(use_manifest): + # make sure that ZipFileLinearIndex.signatures works, and is generator from types import GeneratorType zipfile_db = utils.get_test_data('prot/all.zip') @@ -1098,6 +1117,7 @@ def test_zipfile_load_file_as_signatures(use_manifest): def test_zipfile_load_file_as_signatures_traverse_yield_all(use_manifest): + # test with --force, which loads all files from types import GeneratorType zipfile_db = utils.get_test_data('prot/all.zip') @@ -1113,9 +1133,10 @@ def test_zipfile_load_file_as_signatures_traverse_yield_all(use_manifest): assert len(sigs) == 8 -@utils.in_tempdir -def test_zipfile_load_database_fail_if_not_zip(c): +def test_zipfile_load_database_fail_if_not_zip(runtmp): # fail _load_database if not .zip + c = runtmp + zipfile_db = utils.get_test_data('prot/all.zip') badname = c.output('xyz.nada') shutil.copyfile(zipfile_db, badname) @@ -1127,6 +1148,7 @@ def test_zipfile_load_database_fail_if_not_zip(c): def test_multi_index_search(): + # test MultiIndex.search sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -1181,6 +1203,7 @@ def test_multi_index_search(): def test_multi_index_gather(): + # test MultiIndex.gather sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -1211,6 +1234,7 @@ def test_multi_index_gather(): def test_multi_index_signatures(): + # test MultiIndex.signatures sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -1236,11 +1260,14 @@ def test_multi_index_signatures(): def test_multi_index_create(): + # test MultiIndex constructor mi = MultiIndex(None, None, prepend_location=False) assert len(mi) == 0 def test_multi_index_create_prepend(): + # test MultiIndex constructor - location must be specified if + # 'prepend_location is True with pytest.raises(ValueError): mi = MultiIndex(None, None, prepend_location=True) @@ -1271,7 +1298,7 @@ def test_multi_index_load_from_directory(): # also check internal locations and parent value -- assert mi.parent.endswith('prot/protein') - ilocs = [ x[2] for x in mi._signatures_with_internal() ] + ilocs = [ x[1] for x in mi._signatures_with_internal() ] assert endings[0] in ilocs, ilocs assert endings[1] in ilocs, ilocs @@ -1285,9 +1312,23 @@ def test_multi_index_load_from_directory_2(): assert len(sigs) == 7 -@utils.in_tempdir -def test_multi_index_load_from_directory_3(c): - # check that force works ok on a directory +def test_multi_index_load_from_directory_3_simple_bad_file(runtmp): + # check that force=False fails properly when confronted with non-JSON + # files. + c = runtmp + + with open(runtmp.output('badsig.sig'), 'wt') as fp: + fp.write('bad content.') + + with pytest.raises(ValueError): + mi = MultiIndex.load_from_directory(runtmp.location, force=False) + + +def test_multi_index_load_from_directory_3(runtmp): + # check that force=False fails properly when confronted with non-JSON + # files that are legit sourmash files... + c = runtmp + dirname = utils.get_test_data('prot') count = 0 @@ -1299,13 +1340,15 @@ def test_multi_index_load_from_directory_3(c): shutil.copyfile(fullname, copyto) count += 1 - with pytest.raises(sourmash.exceptions.SourmashError): + with pytest.raises(ValueError): mi = MultiIndex.load_from_directory(c.location, force=False) -@utils.in_tempdir -def test_multi_index_load_from_directory_3_yield_all_true(c): +def test_multi_index_load_from_directory_3_yield_all_true(runtmp): # check that force works ok on a directory w/force=True + # Note here that only .sig/.sig.gz files are loaded. + c = runtmp + dirname = utils.get_test_data('prot') count = 0 @@ -1323,9 +1366,10 @@ def test_multi_index_load_from_directory_3_yield_all_true(c): assert len(sigs) == 8 -@utils.in_tempdir -def test_multi_index_load_from_directory_3_yield_all_true_subdir(c): - # check that force works ok on subdirectories +def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp): + # check that force works ok on subdirectories. + # Note here that only .sig/.sig.gz files are loaded. + c = runtmp dirname = utils.get_test_data('prot') target_dir = c.output("some_subdir") @@ -1342,13 +1386,17 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(c): mi = MultiIndex.load_from_directory(c.location, force=True) + locations = set([ row['internal_location'] for row in mi.manifest.rows ]) + print(locations) + sigs = list(mi.signatures()) assert len(sigs) == 8 -@utils.in_tempdir -def test_multi_index_load_from_directory_3_sig_gz(c): +def test_multi_index_load_from_directory_3_sig_gz(runtmp): # check that we find .sig.gz files, too + c = runtmp + dirname = utils.get_test_data('prot') count = 0 @@ -1370,11 +1418,12 @@ def test_multi_index_load_from_directory_3_sig_gz(c): assert len(sigs) == 6 -@utils.in_tempdir -def test_multi_index_load_from_directory_3_check_traverse_fn(c): +def test_multi_index_load_from_directory_3_check_traverse_fn(runtmp): # test the actual traverse function... eventually this test can be # removed, probably, as we consolidate functionality and test MultiIndex # better. + c = runtmp + dirname = utils.get_test_data('prot') files = list(sourmash_args.traverse_find_sigs([dirname])) assert len(files) == 7, files @@ -1384,12 +1433,14 @@ def test_multi_index_load_from_directory_3_check_traverse_fn(c): def test_multi_index_load_from_directory_no_exist(): + # raise ValueError on files that don't exist in load_from_directory dirname = utils.get_test_data('does-not-exist') with pytest.raises(ValueError): mi = MultiIndex.load_from_directory(dirname, force=True) def test_multi_index_load_from_file_path(): + # test that MultiIndex.load_from_path works fine sig2 = utils.get_test_data('2.fa.sig') mi = MultiIndex.load_from_path(sig2) @@ -1398,19 +1449,23 @@ def test_multi_index_load_from_file_path(): def test_multi_index_load_from_file_path_no_exist(): + # test that load_from_path fails on non-existent files filename = utils.get_test_data('does-not-exist') with pytest.raises(ValueError): mi = MultiIndex.load_from_directory(filename, force=True) def test_multi_index_load_from_pathlist_no_exist(): + # test that load_from_pathlist fails on non-existent files dirname = utils.get_test_data('does-not-exist') with pytest.raises(ValueError): mi = MultiIndex.load_from_pathlist(dirname) -@utils.in_tempdir -def test_multi_index_load_from_pathlist_1(c): +def test_multi_index_load_from_pathlist_1(runtmp): + # test functionality of MultiIndex.load_from_pathlist with .sig files + c = runtmp + dirname = utils.get_test_data('prot') files = list(sourmash_args.traverse_find_sigs([dirname])) assert len(files) == 7, files @@ -1427,10 +1482,13 @@ def test_multi_index_load_from_pathlist_1(c): assert mi.location == file_list -@utils.in_tempdir -def test_multi_index_load_from_pathlist_2(c): +def test_multi_index_load_from_pathlist_2(runtmp): + # create a pathlist file with _all_ files under dir, and try to load it. + # this will fail on one of several CSV or .sh files in there. + # CTB note: if you create extra files under this directory, # it will fail :) + c = runtmp dirname = utils.get_test_data('prot') files = list(sourmash_args.traverse_find_sigs([dirname], True)) assert len(files) == 20, files # check there aren't extra files in here! @@ -1440,13 +1498,17 @@ def test_multi_index_load_from_pathlist_2(c): with open(file_list, 'wt') as fp: print("\n".join(files), file=fp) - with pytest.raises(ValueError): + with pytest.raises(ValueError) as exc: mi = MultiIndex.load_from_pathlist(file_list) + print(str(exc)) + assert 'Error while reading signatures from' in str(exc) + -@utils.in_tempdir -def test_multi_index_load_from_pathlist_3_zipfile(c): +def test_multi_index_load_from_pathlist_3_zipfile(runtmp): # can we load zipfiles in a pathlist? yes please. + c = runtmp + zipfile = utils.get_test_data('prot/all.zip') file_list = c.output('filelist.txt') @@ -1483,6 +1545,7 @@ def collect(self, score, match): def test_linear_index_gather_ignore(): + # do we properly ignore exact matches in 'search' for LinearIndex? sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -1513,6 +1576,7 @@ def is_found(ss, xx): def test_lca_index_gather_ignore(): + # do we properly ignore exact matches in gather on an LCA DB? from sourmash.lca import LCA_Database sig2 = utils.get_test_data('2.fa.sig') @@ -1548,6 +1612,7 @@ def is_found(ss, xx): def test_sbt_index_gather_ignore(): + # do we properly ignore exact matches in gather on an SBT? sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -1985,6 +2050,7 @@ def test_counter_gather_add_after_consume(): def test_counter_gather_consume_empty_intersect(): + # check that consume works fine when there is an empty signature. query_mh = sourmash.MinHash(n=0, ksize=31, scaled=1) query_mh.add_many(range(0, 20)) query_ss = SourmashSignature(query_mh, name='query') @@ -2292,6 +2358,8 @@ def test_lazy_index_5_len(): def test_lazy_index_wraps_multi_index_location(): + # check that 'location' works fine when MultiIndex is wrapped by + # LazyLinearIndex. sigdir = utils.get_test_data('prot/protein/') sigzip = utils.get_test_data('prot/protein.zip') siglca = utils.get_test_data('prot/protein.lca.json.gz') @@ -2388,7 +2456,9 @@ def test_lazy_loaded_index_3_find(runtmp): x = list(x) assert len(x) == 0 + def test_revindex_index_search(): + # confirm that RevIndex works sig2 = utils.get_test_data("2.fa.sig") sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -2433,6 +2503,7 @@ def test_revindex_index_search(): def test_revindex_gather(): + # check that RevIndex.gather works. sig2 = utils.get_test_data("2.fa.sig") sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -2458,6 +2529,7 @@ def test_revindex_gather(): def test_revindex_gather_ignore(): + # check that RevIndex gather ignores things properly. sig2 = utils.get_test_data('2.fa.sig') sig47 = utils.get_test_data('47.fa.sig') sig63 = utils.get_test_data('63.fa.sig') @@ -2485,3 +2557,253 @@ def is_found(ss, xx): assert not is_found(ss47, results) assert not is_found(ss2, results) assert is_found(ss63, results) + + +def test_standalone_manifest_signatures(runtmp): + # build a StandaloneManifestIndex and test 'signatures' method. + + ## first, build a manifest in memory using MultiIndex + sig47 = utils.get_test_data('47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + ss47 = sourmash.load_one_signature(sig47) + ss63 = sourmash.load_one_signature(sig63) + + lidx1 = LinearIndex.load(sig47) + lidx2 = LinearIndex.load(sig63) + + mi = MultiIndex.load([lidx1, lidx2], [sig47, sig63], "") + + ## got a manifest! ok, now test out StandaloneManifestIndex + mm = StandaloneManifestIndex(mi.manifest, None) + + siglist = [ ss for ss in mm.signatures() ] + assert len(siglist) == 2 + assert ss47 in siglist + assert ss63 in siglist + + +def test_standalone_manifest_signatures_prefix(runtmp): + # try out 'prefix' for StandaloneManifestIndex + + ## first, build a manifest in memory using MultiIndex + sig47 = utils.get_test_data('47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + ss47 = sourmash.load_one_signature(sig47) + ss63 = sourmash.load_one_signature(sig63) + + lidx1 = LinearIndex.load(sig47) + lidx2 = LinearIndex.load(sig63) + mi = MultiIndex.load([lidx1, lidx2], [sig47, sig63], "") + + # ok, now remove the abspath prefix from iloc + for row in mi.manifest.rows: + row['internal_location'] = os.path.basename(row['internal_location']) + + ## this should succeed! + mm = StandaloneManifestIndex(mi.manifest, None, + prefix=utils.get_test_data('')) + + assert len(list(mm.signatures())) == 2 + + +def test_standalone_manifest_signatures_prefix_fail(runtmp): + # give StandaloneManifest the wrong prefix + + ## first, build a manifest in memory using MultiIndex + sig47 = utils.get_test_data('47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + ss47 = sourmash.load_one_signature(sig47) + ss63 = sourmash.load_one_signature(sig63) + + lidx1 = LinearIndex.load(sig47) + lidx2 = LinearIndex.load(sig63) + print('XXX', lidx1.location) + + mi = MultiIndex.load([lidx1, lidx2], [sig47, sig63], "") + + # remove prefix from manifest + for row in mi.manifest.rows: + row['internal_location'] = os.path.basename(row['internal_location']) + + ## got a manifest! ok, now test out StandaloneManifestIndex + mm = StandaloneManifestIndex(mi.manifest, None, prefix='foo') + + # should fail + with pytest.raises(ValueError) as exc: + list(mm.signatures()) + + assert "Error while reading signatures from 'foo/47.fa.sig'" in str(exc) + + +def test_standalone_manifest_load_from_dir(runtmp): + # test loading a mf with relative directory paths from test-data + mf = utils.get_test_data('scaled/mf.csv') + idx = sourmash.load_file_as_index(mf) + + siglist = list(idx.signatures()) + assert len(siglist) == 15 + + assert idx # should be 'True' + assert len(idx) == 15 + + with pytest.raises(NotImplementedError): + idx.insert() + + with pytest.raises(NotImplementedError): + idx.save('foo') + + assert idx.location == mf + + +def test_standalone_manifest_lazy_load(runtmp): + # check that it's actually doing lazy loading + orig_sig47 = utils.get_test_data('47.fa.sig') + sig47 = runtmp.output('47.fa.sig') + + # build an external manifest + shutil.copyfile(orig_sig47, sig47) + + # this is an abspath to sig47 + runtmp.sourmash('sig', 'manifest', sig47, '-o', 'mf.csv') + + # should work to get signatures: + idx = StandaloneManifestIndex.load(runtmp.output('mf.csv')) + + siglist = list(idx.signatures()) + assert len(siglist) == 1 + + # now remove! + os.unlink(sig47) + + # can still access manifest... + assert len(idx) == 1 + + # ...but we should get an error when we call signatures. + with pytest.raises(ValueError): + list(idx.signatures()) + + # but put it back, and all is forgiven. yay! + shutil.copyfile(orig_sig47, sig47) + x = list(idx.signatures()) + assert len(x) == 1 + + +def test_standalone_manifest_lazy_load_2_prefix(runtmp): + # check that it's actually doing lazy loading; supply explicit prefix + orig_sig47 = utils.get_test_data('47.fa.sig') + sig47 = runtmp.output('47.fa.sig') + + # build an external manifest + # note, here use a relative path to 47.fa.sig; the manifest will contain + # just '47.fa.sig' as the location + shutil.copyfile(orig_sig47, sig47) + runtmp.sourmash('sig', 'manifest', '47.fa.sig', '-o', 'mf.csv') + + # should work to get signatures: + idx = StandaloneManifestIndex.load(runtmp.output('mf.csv'), + prefix=runtmp.output('')) + + siglist = list(idx.signatures()) + assert len(siglist) == 1 + + # now remove! + os.unlink(sig47) + + # can still access manifest... + assert len(idx) == 1 + + # ...but we should get an error when we call signatures. + with pytest.raises(ValueError): + list(idx.signatures()) + + # but put it back, and all is forgiven. yay! + shutil.copyfile(orig_sig47, sig47) + x = list(idx.signatures()) + assert len(x) == 1 + + +def test_standalone_manifest_search(runtmp): + # test a straight up 'search' + query_sig = utils.get_test_data('scaled/genome-s12.fa.gz.sig') + mf = utils.get_test_data('scaled/mf.csv') + + runtmp.sourmash('search', query_sig, mf) + + out = runtmp.last_result.out + print(out) + assert '100.0% d84ef28f' in out + + +def test_standalone_manifest_prefetch_lazy(runtmp): + # check that prefetch is actually doing lazy loading on manifest index. + orig_sig47 = utils.get_test_data('47.fa.sig') + sig47 = runtmp.output('47.fa.sig') + orig_sig2 = utils.get_test_data('2.fa.sig') + sig2 = runtmp.output('2.fa.sig') + orig_sig63 = utils.get_test_data('63.fa.sig') + sig63 = runtmp.output('63.fa.sig') + + shutil.copyfile(orig_sig47, sig47) + runtmp.sourmash('sig', 'manifest', sig47, '-o', 'mf1.csv') + shutil.copyfile(orig_sig2, sig2) + runtmp.sourmash('sig', 'manifest', sig2, '-o', 'mf2.csv') + shutil.copyfile(orig_sig63, sig63) + runtmp.sourmash('sig', 'manifest', sig63, '-o', 'mf3.csv') + + # combine the manifests, manually for now... + mf1 = CollectionManifest.load_from_filename(runtmp.output('mf1.csv')) + assert len(mf1) == 1 + + mf2 = CollectionManifest.load_from_filename(runtmp.output('mf2.csv')) + assert len(mf2) == 3 + + mf3 = CollectionManifest.load_from_filename(runtmp.output('mf3.csv')) + assert len(mf3) == 1 + + mf = mf1 + mf2 + mf3 + assert len(mf) == 5 + + mf.write_to_filename(runtmp.output('mf.csv')) + + # ok! now, remove the last signature, 'sig63'. + os.unlink(sig63) + + # ...but loading the manifest should still work. + idx = StandaloneManifestIndex.load(runtmp.output('mf.csv')) + + # double check - third load will fail. this relies on load order :shrug:. + sig_iter = iter(idx.signatures()) + ss = next(sig_iter) + print(ss) + assert '47.fa' in ss.filename + + for i in range(3): + ss = next(sig_iter) + print(i, ss) + assert '2.fa' in ss.filename + + with pytest.raises(ValueError) as exc: + ss = next(sig_iter) + assert 'Error while reading signatures from' in str(exc) + assert '63.fa.sig' in str(exc) + + # ok! now test prefetch... should get one match legit, to 47, + # and then no matches to 2, and then error. + + ss47 = sourmash.load_one_signature(sig47) + idx = idx.select(ksize=31) + g = idx.prefetch(ss47, threshold_bp=0) + + # first value: + sr = next(g) + assert sr.signature == ss47 + + # second value should raise error. + with pytest.raises(ValueError) as exc: + sr = next(g) + + assert 'Error while reading signatures from' in str(exc) + assert '63.fa.sig' in str(exc) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 35f3fec14e..b34cbe4dc4 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -17,7 +17,7 @@ def test_generate_manifest(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) @@ -35,6 +35,30 @@ def test_generate_manifest(): assert sig in manifest +def test_manifest_operations(): + # test basic manifest operations - += + protzip = utils.get_test_data('prot/protein.zip') + + loader = sourmash.load_file_as_index(protzip) + + rows = [] + siglist = [] + for (sig, loc) in loader._signatures_with_internal(): + row = index.CollectionManifest.make_manifest_row(sig, loc) + rows.append(row) + siglist.append(sig) + + manifest = index.CollectionManifest(rows) + manifest += manifest + + assert len(manifest) == 2*len(rows) + assert len(manifest) == 4 + + md5_list = [ row['md5'] for row in manifest.rows ] + assert '16869d2c8a1d29d1c8e56f5c561e585e' in md5_list + assert '120d311cc785cc9d0df9dc0646b2b857' in md5_list + + def test_manifest_to_picklist(): # test manifest/picklist interaction basics protzip = utils.get_test_data('prot/protein.zip') @@ -43,7 +67,7 @@ def test_manifest_to_picklist(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) @@ -64,7 +88,7 @@ def test_save_load_manifest(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) diff --git a/tests/test_minhash.py b/tests/test_minhash.py index 69e6c99edb..2bac12751a 100644 --- a/tests/test_minhash.py +++ b/tests/test_minhash.py @@ -1713,7 +1713,7 @@ def test_inflate(): def test_inflate_error(): - # test behavior of inflate function + # test behavior of inflate function with 'self' as an abund sketch scaled = _get_scaled_for_max_hash(35) mh = MinHash(0, 4, track_abundance=True, scaled=scaled) mh2 = MinHash(0, 4, track_abundance=True, scaled=scaled) @@ -1746,6 +1746,39 @@ def test_inflate_error(): assert "inflate operates on a flat MinHash and takes a MinHash object with track_abundance=True" in str(exc.value) +def test_inflate_not_a_subset(): + # test behavior of inflate function when 'from_mh' is not a subset. + scaled = _get_scaled_for_max_hash(35) + mh = MinHash(0, 4, track_abundance=False, scaled=scaled) + mh2 = MinHash(0, 4, track_abundance=True, scaled=scaled) + assert mh._max_hash == 35 + + mh.add_hash(10) + mh.add_hash(20) + mh.add_hash(30) + + assert mh.hashes[10] == 1 + assert mh.hashes[20] == 1 + assert mh.hashes[30] == 1 + + mh2.add_hash(10) + mh2.add_hash(10) + mh2.add_hash(10) + mh2.add_hash(30) + mh2.add_hash(30) + mh2.add_hash(30) + + assert mh2.hashes[10] == 3 + assert 20 not in mh2.hashes + assert mh2.hashes[30] == 3 + + mh3 = mh.inflate(mh2) + + assert mh3.hashes[10] == 3 + assert 20 not in mh3.hashes # should intersect, in this case. + assert mh3.hashes[30] == 3 + + def test_add_kmer(track_abundance): # test add_kmer method mh1 = MinHash(0, 4, scaled=1, track_abundance=track_abundance) diff --git a/tests/test_sbt.py b/tests/test_sbt.py index 9413b84769..cb5b043c91 100644 --- a/tests/test_sbt.py +++ b/tests/test_sbt.py @@ -1205,3 +1205,15 @@ def test_build_sbt_json_with_dups_exists(runtmp): assert len(sbt_sigs) == 4 assert all_sigs == sbt_sigs + + +def test_load_fail_on_file_not_dir(runtmp): + # make sure the load function raises a ValueError for {filename}/sbt, + # rather than a NotADirectoryError + + filename = runtmp.output('foo') + with open(filename, 'wt') as fp: + fp.write('something') + + with pytest.raises(ValueError) as exc: + x = SBT.load(runtmp.output('foo/bar.sbt.json')) diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 4d994049fc..38ba835cb4 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -5360,6 +5360,61 @@ def test_gather_scaled_1(runtmp, linear_gather, prefetch_gather): assert "1.0 kbp 100.0% 100.0%" in runtmp.last_result.out assert "found 1 matches total;" in runtmp.last_result.out + +def test_standalone_manifest_search(runtmp): + # test loading/searching a manifest file from the command line. + sig47 = utils.get_test_data('47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + dirname = runtmp.output('somedir') + os.mkdir(dirname) + subdir = runtmp.output('somedir/subdir') + os.mkdir(subdir) + shutil.copyfile(sig47, os.path.join(dirname, '47.fa.sig')) + shutil.copyfile(sig63, os.path.join(subdir, '63.fa.sig')) + + # for now, the output manifest must be within top level dir for + # CLI stuff to work properly. + mf = os.path.join(dirname, 'mf.csv') + + # build manifest... + runtmp.sourmash('sig', 'manifest', dirname, '-o', mf) + + # ...and now use for a search! + runtmp.sourmash('search', sig47, mf) + + out = runtmp.last_result.out + print(out) + print(runtmp.last_result.err) + + assert "100.0% NC_009665.1 Shewanella baltica OS185, complete genome" in out + + +def test_standalone_manifest_search_fail(runtmp): + # test loading/searching a manifest file from the command line; should + # fail if manifest is not located within tld. + sig47 = utils.get_test_data('47.fa.sig') + sig63 = utils.get_test_data('63.fa.sig') + + dirname = runtmp.output('somedir') + os.mkdir(dirname) + subdir = runtmp.output('somedir/subdir') + os.mkdir(subdir) + shutil.copyfile(sig47, os.path.join(dirname, '47.fa.sig')) + shutil.copyfile(sig63, os.path.join(subdir, '63.fa.sig')) + + # for now, the output manifest must be within top level dir for + # CLI stuff to work properly. here we intentionally break this, + # for testing purposes. + mf = runtmp.output('mf.csv') + + # build manifest... + runtmp.sourmash('sig', 'manifest', dirname, '-o', mf) + + # ...and now use for a search! + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('search', sig47, mf) + @utils.in_tempdir def test_search_ani_jaccard(c): diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py index aaff6209b5..969a6b6df8 100644 --- a/tests/test_sourmash_args.py +++ b/tests/test_sourmash_args.py @@ -447,7 +447,7 @@ class FakeIndex(LinearIndex): was_called = 0 def _signatures_with_internal(self): self.was_called = 1 - return [(ss47, "fakeloc", "fakeiloc")] + return [(ss47, "fakeiloc")] idx = FakeIndex([sig47]) @@ -471,7 +471,7 @@ class FakeIndex(LinearIndex): def _signatures_with_internal(self): self.was_called = 1 - return [(ss47, "fakeloc", "fakeiloc")] + return [(ss47, "fakeiloc")] idx = FakeIndex([sig47]) diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py index 20769fc27e..9f7912ade9 100644 --- a/tests/test_sourmash_sketch.py +++ b/tests/test_sourmash_sketch.py @@ -19,6 +19,7 @@ from sourmash.command_compute import ComputeParameters from sourmash.cli.compute import subparser from sourmash.cli import SourmashParser +from sourmash import manifest from sourmash import signature from sourmash import VERSION @@ -67,8 +68,136 @@ def test_do_sourmash_sketch_check_num_bounds_more_than_maximum(runtmp): assert "WARNING: num value should be <= 50000. Continuing anyway." in runtmp.last_result.err +def test_empty_factory(): + with pytest.raises(ValueError): + factory = _signatures_for_sketch_factory([], None) + + +def test_no_default_moltype_factory_nonempty(): + with pytest.raises(ValueError): + factory = _signatures_for_sketch_factory(["k=31"], None) + + +def test_factory_no_default_moltype_dna(): + factory = _signatures_for_sketch_factory(['dna'], None) + params_list = list(factory.get_compute_params()) + assert len(params_list) == 1 + + params = params_list[0] + assert params.dna + + +def test_factory_no_default_moltype_protein(): + factory = _signatures_for_sketch_factory(['protein'], None) + params_list = list(factory.get_compute_params()) + assert len(params_list) == 1 + + params = params_list[0] + assert params.protein + + +def test_factory_dna_nosplit(): + factory = _signatures_for_sketch_factory(['k=31,k=51'], 'dna') + params_list = list(factory.get_compute_params(split_ksizes=False)) + assert len(params_list) == 1 + + params = params_list[0] + assert params.ksizes == [31,51] + + +def test_factory_dna_split(): + factory = _signatures_for_sketch_factory(['k=31,k=51'], 'dna') + params_list = list(factory.get_compute_params(split_ksizes=True)) + assert len(params_list) == 2 + + params = params_list[0] + assert params.ksizes == [31] + params = params_list[1] + assert params.ksizes == [51] + + +def test_factory_protein_nosplit(): + factory = _signatures_for_sketch_factory(['k=10,k=9'], 'protein') + params_list = list(factory.get_compute_params(split_ksizes=False)) + assert len(params_list) == 1 + + params = params_list[0] + assert params.ksizes == [30, 27] + + +def test_factory_protein_split(): + factory = _signatures_for_sketch_factory(['k=10,k=9'], 'protein') + params_list = list(factory.get_compute_params(split_ksizes=True)) + assert len(params_list) == 2 + + params = params_list[0] + assert params.ksizes == [30] + params = params_list[1] + assert params.ksizes == [27] + + +def test_factory_dna_equal(): + factory1 = _signatures_for_sketch_factory(['dna'], None) + params_list1 = list(factory1.get_compute_params()) + assert len(params_list1) == 1 + params1 = params_list1[0] + + factory2 = _signatures_for_sketch_factory([], 'dna') + params_list2 = list(factory2.get_compute_params()) + assert len(params_list2) == 1 + params2 = params_list2[0] + + assert params1 == params2 + assert repr(params1) == repr(params2) + + +def test_factory_protein_equal(): + factory1 = _signatures_for_sketch_factory(['protein'], None) + params_list1 = list(factory1.get_compute_params()) + assert len(params_list1) == 1 + params1 = params_list1[0] + + factory2 = _signatures_for_sketch_factory([], 'protein') + params_list2 = list(factory2.get_compute_params()) + assert len(params_list2) == 1 + params2 = params_list2[0] + + assert params1 == params2 + assert repr(params1) == repr(params2) + + +def test_factory_dna_multi_ksize_eq(): + factory1 = _signatures_for_sketch_factory(['k=21,k=31,dna'], None) + params_list1 = list(factory1.get_compute_params()) + assert len(params_list1) == 1 + params1 = params_list1[0] + + factory2 = _signatures_for_sketch_factory(['k=21,k=31'], 'dna') + params_list2 = list(factory2.get_compute_params()) + assert len(params_list2) == 1 + params2 = params_list2[0] + + assert params1 == params2 + assert repr(params1) == repr(params2) + + +def test_factory_protein_multi_ksize_eq(): + factory1 = _signatures_for_sketch_factory(['k=10,k=11,protein'], None) + params_list1 = list(factory1.get_compute_params()) + assert len(params_list1) == 1 + params1 = params_list1[0] + + factory2 = _signatures_for_sketch_factory(['k=10,k=11'], 'protein') + params_list2 = list(factory2.get_compute_params()) + assert len(params_list2) == 1 + params2 = params_list2[0] + + assert params1 == params2 + assert repr(params1) == repr(params2) + + def test_dna_defaults(): - factory = _signatures_for_sketch_factory([], 'dna', False) + factory = _signatures_for_sketch_factory([], 'dna') params_list = list(factory.get_compute_params()) assert len(params_list) == 1 @@ -87,7 +216,7 @@ def test_dna_defaults(): def test_dna_override_1(): factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'], - 'dna', False) + 'dna') params_list = list(factory.get_compute_params()) assert len(params_list) == 1 @@ -106,44 +235,41 @@ def test_dna_override_1(): def test_scaled_param_requires_equal(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k=21,scaled'], - 'dna', False) + factory = _signatures_for_sketch_factory(['k=21,scaled'], 'dna') def test_k_param_requires_equal(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k'], - 'dna', False) + factory = _signatures_for_sketch_factory(['k'], 'dna') def test_k_param_requires_equal_2(): with pytest.raises(ValueError) as exc: - factory = _signatures_for_sketch_factory(['k='], - 'dna', False) + factory = _signatures_for_sketch_factory(['k='], 'dna') + def test_seed_param_requires_equal(): with pytest.raises(ValueError) as exc: - factory = _signatures_for_sketch_factory(['seed='], - 'dna', False) + factory = _signatures_for_sketch_factory(['seed='], 'dna') + def test_num_param_requires_equal(): with pytest.raises(ValueError) as exc: - factory = _signatures_for_sketch_factory(['num='], - 'dna', False) + factory = _signatures_for_sketch_factory(['num='], 'dna') + def test_dna_override_bad_1(): with pytest.raises(ValueError): factory = _signatures_for_sketch_factory(['k=21,scaledFOO=2000,abund'], - 'dna', False) + 'dna') def test_dna_override_bad_2(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k=21,protein'], - 'dna', False) + factory = _signatures_for_sketch_factory(['k=21,protein'], 'dna') def test_protein_defaults(): - factory = _signatures_for_sketch_factory([], 'protein', True) + factory = _signatures_for_sketch_factory([], 'protein') params_list = list(factory.get_compute_params()) assert len(params_list) == 1 @@ -162,12 +288,11 @@ def test_protein_defaults(): def test_protein_override_bad_2(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k=21,dna'], - 'protein', False) + factory = _signatures_for_sketch_factory(['k=21,dna'], 'protein') def test_protein_override_bad_rust_foo(): # mimic 'sourmash sketch protein -p dna' - factory = _signatures_for_sketch_factory([], 'protein', False) + factory = _signatures_for_sketch_factory([], 'protein') # reach in and avoid error checking to construct a bad params_list. factory.params_list = [('dna', {})] @@ -188,7 +313,7 @@ def test_protein_override_bad_rust_foo(): def test_dayhoff_defaults(): - factory = _signatures_for_sketch_factory([], 'dayhoff', True) + factory = _signatures_for_sketch_factory([], 'dayhoff') params_list = list(factory.get_compute_params()) assert len(params_list) == 1 @@ -207,11 +332,10 @@ def test_dayhoff_defaults(): def test_dayhoff_override_bad_2(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k=21,dna'], - 'dayhoff', False) + factory = _signatures_for_sketch_factory(['k=21,dna'], 'dayhoff') def test_hp_defaults(): - factory = _signatures_for_sketch_factory([], 'hp', True) + factory = _signatures_for_sketch_factory([], 'hp') params_list = list(factory.get_compute_params()) assert len(params_list) == 1 @@ -230,8 +354,7 @@ def test_hp_defaults(): def test_hp_override_bad_2(): with pytest.raises(ValueError): - factory = _signatures_for_sketch_factory(['k=21,dna'], - 'hp', False) + factory = _signatures_for_sketch_factory(['k=21,dna'], 'hp') def test_multiple_moltypes(): @@ -239,7 +362,7 @@ def test_multiple_moltypes(): 'k=19,num=400,dayhoff,abund', 'k=30,scaled=200,hp', 'k=30,scaled=200,seed=58'] - factory = _signatures_for_sketch_factory(params_foo, 'protein', True) + factory = _signatures_for_sketch_factory(params_foo, 'protein') params_list = list(factory.get_compute_params()) assert len(params_list) == 4 @@ -289,6 +412,110 @@ def test_multiple_moltypes(): assert params.protein +@pytest.mark.parametrize("input_param_str, expected_output", + [('protein', 'protein,k=10,scaled=200'), + ('dna', 'dna,k=31,scaled=1000'), + ('hp', 'hp,k=42,scaled=200'), + ('dayhoff', 'dayhoff,k=16,scaled=200'), + ('dna,seed=52', 'dna,k=31,scaled=1000,seed=52'), + ('dna,num=500', 'dna,k=31,num=500'), + ('scaled=1100,dna', 'dna,k=31,scaled=1100'), + ('dna,abund', 'dna,k=31,scaled=1000,abund') + ]) +def test_compute_parameters_to_param_str(input_param_str, expected_output): + factory = _signatures_for_sketch_factory([input_param_str], None) + params_list = list(factory.get_compute_params()) + assert len(params_list) == 1 + params = params_list[0] + + actual_output_str = params.to_param_str() + + assert actual_output_str == expected_output, (actual_output_str, + expected_output) + + +def test_manifest_row_to_compute_parameters_1(): + # test ComputeParameters.from_manifest_row with moltype 'DNA' + row = dict(moltype='DNA', + ksize=21, + num=0, scaled=1000, + with_abundance=1) + p = ComputeParameters.from_manifest_row(row) + assert p.dna + assert not p.protein + assert not p.dayhoff + assert not p.hp + assert p.moltype == 'DNA' + assert p.num_hashes == 0 + assert p.scaled == 1000 + assert p.ksizes == [21] + assert p.track_abundance + assert p.seed == 42 + + +def test_manifest_row_to_compute_parameters_2(): + # test ComputeParameters.from_manifest_row with moltype 'protein' + row = dict(moltype='protein', + ksize=10, + num=0, scaled=200, + with_abundance=1) + p = ComputeParameters.from_manifest_row(row) + assert not p.dna + assert p.protein + assert p.moltype == 'protein' + assert not p.dayhoff + assert not p.hp + assert p.num_hashes == 0 + assert p.scaled == 200 + assert p.ksizes == [30] + assert p.track_abundance + assert p.seed == 42 + + +def test_manifest_row_to_compute_parameters_3(): + # test ComputeParameters.from_manifest_row with moltype 'dayhoff' + row = dict(moltype='dayhoff', + ksize=12, + num=0, scaled=200, + with_abundance=0) + p = ComputeParameters.from_manifest_row(row) + assert not p.dna + assert not p.protein + assert p.dayhoff + assert p.moltype == 'dayhoff' + assert not p.hp + assert p.num_hashes == 0 + assert p.scaled == 200 + assert p.ksizes == [36] + assert not p.track_abundance + assert p.seed == 42 + + +def test_manifest_row_to_compute_parameters_4(): + # test ComputeParameters.from_manifest_row with moltype 'hp' + row = dict(moltype='hp', + ksize=32, + num=0, scaled=200, + with_abundance=0) + p = ComputeParameters.from_manifest_row(row) + assert not p.dna + assert not p.protein + assert not p.dayhoff + assert p.hp + assert p.moltype == 'hp' + assert p.num_hashes == 0 + assert p.scaled == 200 + assert p.ksizes == [96] + assert not p.track_abundance + assert p.seed == 42 + + +def test_bad_compute_parameters(): + p = ComputeParameters([31], 42, 0, 0, 0, 0, 0, True, 1000) + with pytest.raises(AssertionError): + p.moltype + + ### command line tests @@ -324,6 +551,42 @@ def test_do_sourmash_sketchdna(runtmp): assert str(sig).endswith('short.fa') +def test_do_sourmash_sketchdna_check_sequence_succeed(runtmp): + testdata1 = utils.get_test_data('short.fa') + runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence') + + sigfile = runtmp.output('short.fa.sig') + assert os.path.exists(sigfile) + + sig = next(signature.load_signatures(sigfile)) + assert str(sig).endswith('short.fa') + + +def test_do_sourmash_sketchdna_check_sequence_fail(runtmp): + testdata1 = utils.get_test_data('shewanella.faa') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence') + + err = runtmp.last_result.err + print(err) + assert "ERROR when reading from " in err + assert "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err + + +def test_do_sourmash_sketchdna_check_sequence_fail_singleton(runtmp): + testdata1 = utils.get_test_data('shewanella.faa') + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'dna', testdata1, '--check-sequence', + '--singleton') + + err = runtmp.last_result.err + print(err) + assert "ERROR when reading from " in err + assert "invalid DNA character in input k-mer: MCGIVGAVAQRDVAEILVEGLRRLEYRGYDS" in err + + def test_do_sourmash_sketchdna_from_file(runtmp): testdata1 = utils.get_test_data('short.fa') @@ -1262,3 +1525,445 @@ def test_dayhoff_with_stop_codons(runtmp): assert cli_mh2.contained_by(cli_mh1) < 1 assert py_mh2.contained_by(cli_mh1) < 1 assert h_mh2.contained_by(h_mh1) < 1 + + +### test sourmash sketch fromfile + + +def test_fromfile_dna(runtmp): + # does it run? yes, hopefully. + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + idx = sourmash.load_file_as_index(runtmp.output('out.zip')) + siglist = list(idx.signatures()) + + assert len(siglist) == 1 + ss = siglist[0] + assert ss.name == 'GCA_903797575 Salmonella enterica' + assert ss.minhash.moltype == 'DNA' + assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err + + +def test_fromfile_dna_empty(runtmp): + # test what happens on empty files. + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + # zero out the file + with gzip.open(runtmp.output('sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz'), 'w') as fp: + pass + + # now what happens? + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna') + + print(runtmp.last_result.out) + err = runtmp.last_result.err + print(err) + + assert "ERROR: no sequences found in " in err + + +def test_fromfile_dna_check_sequence_succeed(runtmp): + # does it run? yes, hopefully. + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna', '--check-sequence') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + idx = sourmash.load_file_as_index(runtmp.output('out.zip')) + siglist = list(idx.signatures()) + + assert len(siglist) == 1 + ss = siglist[0] + assert ss.name == 'GCA_903797575 Salmonella enterica' + assert ss.minhash.moltype == 'DNA' + assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err + + +def test_fromfile_dna_check_sequence_fail(runtmp): + # does it run? yes, hopefully. + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sketch', 'fromfile', + 'sketch_fromfile/salmonella-badseq.csv', + '-o', 'out.zip', '-p', 'dna', '--check-sequence') + + print(runtmp.last_result.out) + err = runtmp.last_result.err + print(err) + + assert "ERROR when reading from " in err + assert "invalid DNA character in input k-mer: MTNILKLFSRKAGEPLDSLAVKSVRQHLSGD" in err + + +def test_fromfile_dna_and_protein(runtmp): + # does it run and produce DNA _and_ protein signatures? + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna', '-p', 'protein') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + idx = sourmash.load_file_as_index(runtmp.output('out.zip')) + siglist = list(idx.signatures()) + + assert len(siglist) == 2 + + prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'protein' ] + assert len(prot_sig) == 1 + prot_sig = prot_sig[0] + assert prot_sig.name == 'GCA_903797575 Salmonella enterica' + + dna_sig = [ ss for ss in siglist if ss.minhash.moltype == 'DNA' ] + assert len(dna_sig) == 1 + dna_sig = dna_sig[0] + assert dna_sig.name == 'GCA_903797575 Salmonella enterica' + + assert "** 2 total requested; output 2, skipped 0" in runtmp.last_result.err + + +def test_fromfile_dna_and_protein_and_hp_and_dayhoff(runtmp): + # does it run and produce DNA _and_ protein signatures? + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna', '-p', 'dna,k=25', + '-p', 'protein', + '-p', 'hp', '-p', 'dayhoff') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + idx = sourmash.load_file_as_index(runtmp.output('out.zip')) + siglist = list(idx.signatures()) + + assert len(siglist) == 5 + + prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'protein' ] + assert len(prot_sig) == 1 + prot_sig = prot_sig[0] + assert prot_sig.name == 'GCA_903797575 Salmonella enterica' + + prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'hp' ] + assert len(prot_sig) == 1 + prot_sig = prot_sig[0] + assert prot_sig.name == 'GCA_903797575 Salmonella enterica' + + prot_sig = [ ss for ss in siglist if ss.minhash.moltype == 'dayhoff' ] + assert len(prot_sig) == 1 + prot_sig = prot_sig[0] + assert prot_sig.name == 'GCA_903797575 Salmonella enterica' + + dna_sig = [ ss for ss in siglist if ss.minhash.moltype == 'DNA' ] + assert len(dna_sig) == 2 + dna_sig = dna_sig[0] + assert dna_sig.name == 'GCA_903797575 Salmonella enterica' + + assert "** 5 total requested; output 5, skipped 0" in runtmp.last_result.err + + +def test_fromfile_dna_and_protein_noname(runtmp): + # nothing in the name column + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sketch', 'fromfile', + 'sketch_fromfile/salmonella-noname.csv', + '-o', 'out.zip', '-p', 'dna', '-p', 'protein') + + out = runtmp.last_result.out + err = runtmp.last_result.err + + print(out) + print(err) + assert "ERROR: 1 entries have blank 'name's? Exiting!" in err + + +def test_fromfile_dna_and_protein_dup_name(runtmp): + # duplicate names + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sketch', 'fromfile', + 'sketch_fromfile/salmonella.csv', + 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna', '-p', 'protein') + + out = runtmp.last_result.out + err = runtmp.last_result.err + + print(out) + print(err) + assert "ERROR: 1 entries have duplicate 'name' records. Exiting!" in err + + +def test_fromfile_dna_and_protein_missing(runtmp): + # test what happens when missing protein. + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed): + runtmp.sourmash('sketch', 'fromfile', + 'sketch_fromfile/salmonella-missing.csv', + '-o', 'out.zip', '-p', 'protein') + + out = runtmp.last_result.out + err = runtmp.last_result.err + + print(out) + print(err) + + assert "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome" in err + assert "** ERROR: we cannot build some of the requested signatures." in err + assert "** 1 total signatures (for 1 names) cannot be built." in err + + +def test_fromfile_dna_and_protein_missing_ignore(runtmp): + # test what happens when missing protein + --ignore-missing + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', + 'sketch_fromfile/salmonella-missing.csv', + '-o', 'out.zip', '-p', 'protein', '--ignore-missing') + + out = runtmp.last_result.out + err = runtmp.last_result.err + + print(out) + print(err) + + assert "WARNING: fromfile entry 'GCA_903797575 Salmonella enterica' is missing a proteome" in err + + assert "** ERROR: we cannot build some of the requested signatures." in err + assert "** 1 total signatures (for 1 names) cannot be built." in err + + assert "** (continuing past this error because --ignore-missing was set)" in err + assert "** 1 new signatures to build from 0 files;" in err + + +def test_fromfile_no_overwrite(runtmp): + # test --force-output-already-exists + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + + # now run again; will fail since already exists + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'protein') + + err = runtmp.last_result.err + + assert "ERROR: output location 'out.zip' already exists!" in err + assert "Use --force-output-already-exists if you want to overwrite/append." in err + + +def test_fromfile_force_overwrite(runtmp): + # test --force-output-already-exists + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + + # now run again, with --force + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'protein', '--force-output') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.zip')) + idx = sourmash.load_file_as_index(runtmp.output('out.zip')) + siglist = list(idx.signatures()) + + assert len(siglist) == 2 + names = list(set([ ss.name for ss in siglist ])) + assert names[0] == 'GCA_903797575 Salmonella enterica' + assert "** 1 total requested; output 1, skipped 0" in runtmp.last_result.err + + +def test_fromfile_need_params(runtmp): + # check that we need a -p + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip') + + print(str(exc)) + assert "Error creating signatures: No default moltype and none specified in param string" in str(exc) + + +def test_fromfile_seed_not_allowed(runtmp): + # check that we cannot adjust 'seed' + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna,seed=43') + print(str(exc)) + + assert "ERROR: cannot set 'seed' in 'sketch fromfile'" in str(exc) + + +def test_fromfile_license_not_allowed(runtmp): + # check that license is CC0 + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + with pytest.raises(SourmashCommandFailed) as exc: + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-o', 'out.zip', '-p', 'dna', + '--license', 'BSD') + + print(str(exc)) + assert 'sourmash only supports CC0-licensed signatures' in str(exc) + + +def test_fromfile_dna_and_protein_csv_output(runtmp): + # does it run and produce DNA _and_ protein signatures? + test_inp = utils.get_test_data('sketch_fromfile') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '--output-csv', 'out.csv', '-p', 'dna', '-p', 'protein') + + print(runtmp.last_result.out) + print(runtmp.last_result.err) + + assert os.path.exists(runtmp.output('out.csv')) + + with open(runtmp.output('out.csv'), newline='') as fp: + r = csv.DictReader(fp) + # filename,sketchtype,output_index,name,param_strs + + x = [] + for row in r: + x.append(row) + + x.sort(key=lambda x: x['filename']) + + assert len(x) == 2 + assert x[0]['sketchtype'] == 'dna' + assert x[0]['param_strs'] == '-p dna,k=31,scaled=1000' + assert x[0]['filename'] == 'sketch_fromfile/GCA_903797575.1_PARATYPHIC668_genomic.fna.gz' + + assert x[1]['sketchtype'] == 'protein' + assert x[1]['param_strs'] == '-p protein,k=10,scaled=200' + assert x[1]['filename'] == 'sketch_fromfile/GCA_903797575.1_PARATYPHIC668_protein.faa.gz' + + # same name... + assert x[0]['name'] == x[1]['name'] == "GCA_903797575 Salmonella enterica" + # ...different output index. + assert x[1]['output_index'] != x[0]['output_index'] + + +def test_fromfile_dna_and_protein_already_exists(runtmp): + # does it properly ignore existing (--already-done) sigs? + test_inp = utils.get_test_data('sketch_fromfile') + already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-p', 'dna', '-p', 'protein', + '--already-done', already_done, + '--output-manifest', 'matching.csv') + + print(runtmp.last_result.out) + err = runtmp.last_result.err + print(err) + + assert 'Loaded 1 pre-existing names from manifest(s)' in err + assert 'Read 1 rows, requesting that 2 signatures be built.' in err + assert '** 0 new signatures to build from 0 files;' in err + assert '** Nothing to build. Exiting!' in err + + assert "output 2 already-done signatures to 'matching.csv' in manifest format." in err + mf = manifest.CollectionManifest.load_from_filename(runtmp.output('matching.csv')) + assert len(mf) == 2 + + +def test_fromfile_dna_and_protein_partly_already_exists(runtmp): + # does it properly ignore existing (--already-done) sigs? + test_inp = utils.get_test_data('sketch_fromfile') + already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella-mult.csv', + '-p', 'dna', '-p', 'protein', + '--already-done', already_done) + + print(runtmp.last_result.out) + err = runtmp.last_result.err + print(err) + + assert 'Loaded 1 pre-existing names from manifest(s)' in err + assert 'Read 2 rows, requesting that 4 signatures be built.' in err + assert '** 2 new signatures to build from 2 files;' in err + assert "** 2 already exist, so skipping those." in err + assert "** 4 total requested; output 2, skipped 2" in err + + +def test_fromfile_dna_and_protein_already_exists_noname(runtmp): + # check that no name in already_exists is handled + test_inp = utils.get_test_data('sketch_fromfile') + already_done = utils.get_test_data('sketch_fromfile/salmonella-dna-protein.zip') + shutil.copytree(test_inp, runtmp.output('sketch_fromfile')) + + # run rename to get rid of names + runtmp.sourmash('sig', 'rename', already_done, '', + '-o', 'already-done.zip') + + runtmp.sourmash('sketch', 'fromfile', 'sketch_fromfile/salmonella.csv', + '-p', 'dna', '-p', 'protein', + '--already-done', 'already-done.zip') + + print(runtmp.last_result.out) + err = runtmp.last_result.err + print(err) + + assert 'Loaded 0 pre-existing names from manifest(s)' in err + assert 'Read 1 rows, requesting that 2 signatures be built.' in err + assert '** 2 new signatures to build from 2 files;' in err + assert '** 2 total requested; output 2, skipped 0' in err