From 950e0ac15a167be8cca69b873d19b15329b256d8 Mon Sep 17 00:00:00 2001 From: Michal Ziemski Date: Fri, 5 Jan 2024 12:03:15 +0100 Subject: [PATCH 1/5] MAINT: remove the Rust dependency --- Makefile | 4 +- ci/recipe/meta.yaml | 14 +- fastq_writer/Cargo.lock | 327 ----------------------------- fastq_writer/Cargo.toml | 13 -- fastq_writer/data/test_input.fastq | 8 - fastq_writer/src/lib.rs | 81 ------- install-sra-tools.sh | 60 ------ q2_fondue/sequences.py | 24 ++- 8 files changed, 25 insertions(+), 506 deletions(-) delete mode 100644 fastq_writer/Cargo.lock delete mode 100644 fastq_writer/Cargo.toml delete mode 100644 fastq_writer/data/test_input.fastq delete mode 100644 fastq_writer/src/lib.rs delete mode 100755 install-sra-tools.sh diff --git a/Makefile b/Makefile index fbfe0db7..9531ea68 100644 --- a/Makefile +++ b/Makefile @@ -16,13 +16,11 @@ test-cov: all install: all bash install-sra-tools.sh - maturin build --release -m fastq_writer/Cargo.toml $(PYTHON) setup.py install - $(PYTHON) -m pip install --no-deps fastq_writer --find-links fastq_writer/target/wheels/ dev: all bash install-sra-tools.sh - pip install coverage parameterized maturin==0.10.3 + pip install coverage parameterized pip install -e . clean: distclean diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml index 3c3c259c..650314ec 100644 --- a/ci/recipe/meta.yaml +++ b/ci/recipe/meta.yaml @@ -14,29 +14,25 @@ build: make install requirements: - build: - - maturin ==0.10.3 - - rust ==1.58.1 - host: - python {{ python }} - setuptools run: - - python {{ python }} + - entrezpy >=2.1.2 - xmltodict - - pyzotero + - python {{ python }} - python-dotenv - - tqdm >=4.62.3 - - entrezpy >=2.1.2 + - pyzotero - q2-types {{ qiime2_epoch }}.* - qiime2 {{ qiime2_epoch }}.* + - sra-tools ==3.0.0 + - tqdm >=4.62.3 test: imports: - q2_fondue - qiime2.plugins.fondue - - fastq_writer requires: - parameterized - coverage diff --git a/fastq_writer/Cargo.lock b/fastq_writer/Cargo.lock deleted file mode 100644 index 01a00150..00000000 --- a/fastq_writer/Cargo.lock +++ /dev/null @@ -1,327 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "ctor" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdffe87e1d521a10f9696f833fe502293ea446d7f256c06128293a4119bdf4cb" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "fastq_writer" -version = "0.1.0" -dependencies = [ - "flate2", - "pyo3", -] - -[[package]] -name = "flate2" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "ghost" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb19fe8de3ea0920d282f7b77dd4227aea6b8b999b42cdf0ca41b2472b14443a" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "indoc" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", - "unindent", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "inventory" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb5160c60ba1e809707918ee329adb99d222888155835c6feedba19f6c3fd4" -dependencies = [ - "ctor", - "ghost", - "inventory-impl", -] - -[[package]] -name = "inventory-impl" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e41b53715c6f0c4be49510bb82dee2c1e51c8586d885abe65396e82ed518548" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "libc" -version = "0.2.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" - -[[package]] -name = "lock_api" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "miniz_oxide" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" -dependencies = [ - "adler", -] - -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", -] - -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro2" -version = "1.0.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4837b8e8e18a102c23f79d1e9a110b597ea3b684c95e874eb1ad88f8683109c3" -dependencies = [ - "cfg-if", - "ctor", - "indoc", - "inventory", - "libc", - "parking_lot", - "paste", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-macros" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47f2c300ceec3e58064fd5f8f5b61230f2ffd64bde4970c81fdd0563a2db1bb" -dependencies = [ - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87b097e5d84fcbe3e167f400fbedd657820a375b034c78bd852050749a575d66" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "smallvec" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" - -[[package]] -name = "syn" -version = "1.0.99" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" - -[[package]] -name = "unindent" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58ee9362deb4a96cef4d437d1ad49cffc9b9e92d202b6995674e928ce684f112" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/fastq_writer/Cargo.toml b/fastq_writer/Cargo.toml deleted file mode 100644 index cf63ae84..00000000 --- a/fastq_writer/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "fastq_writer" -version = "0.1.0" -authors = ["Michal Ziemski "] -edition = "2021" - -[dependencies] -pyo3 = { version = "0.13", features = ["extension-module"] } -flate2 = "1.0.24" - -[lib] -name = "fastq_writer" -crate-type = ["cdylib"] diff --git a/fastq_writer/data/test_input.fastq b/fastq_writer/data/test_input.fastq deleted file mode 100644 index 8f860190..00000000 --- a/fastq_writer/data/test_input.fastq +++ /dev/null @@ -1,8 +0,0 @@ -@SRR20363956.1 1 length=245 -ACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCCACAAGCCTGATCCAGCAATTCTGTGTGCACGATGAAGGTCTTCGGATTGTAAAGTGCTTTCAGTTGGGAAGAAGAAAGTGACGGTACCAACAGAAGAAGCGACGGCTAAATACGTGCCAGCAGCCGCGGTAATACGTATGTCGCAAGCGTTATCCGGATTTATTGGGCGTAAAGCGCGTCTAGGCGGAAAAATAAGTCTGAT -+SRR20363956.1 1 length=245 -FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF:FFFF,FFFFFFFFFF:FFFFF,FFFFFFFFFFFFF -@SRR20363956.2 2 length=245 -ACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCTCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGTAGGGAGGAAAGGGTGAGTCTTAATACGGCTCATCTGTGACGTTACCTACAGAAGAAGGACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTCCGAGCGTTAATCGGACTTACTGGGCGTAAAGCGTGCGC -+SRR20363956.2 2 length=245 -FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,F,FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF:FFFF::FFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFF diff --git a/fastq_writer/src/lib.rs b/fastq_writer/src/lib.rs deleted file mode 100644 index 27ee8424..00000000 --- a/fastq_writer/src/lib.rs +++ /dev/null @@ -1,81 +0,0 @@ -use flate2::write::GzEncoder; -use flate2::Compression; -use pyo3::prelude::{pymodule, PyModule, PyResult, Python}; -use std::fs::File; -use std::io::{BufRead, BufReader, Write}; - -#[pymodule] -fn fastq_writer(_py: Python<'_>, m: &PyModule) -> PyResult<()> { - fn rewrite_fastq(fin: &str, fout: &str) { - _rewrite(fin, fout) - } - - #[pyfn(m, "rewrite_fastq")] - fn rewrite_fastq_py<'py>(_py: Python<'py>, fin: &str, fout: &str) { - rewrite_fastq(fin, fout) - } - - Ok(()) -} - -fn _rewrite(fin: &str, fout: &str) { - let buff_in = BufReader::new(File::open(fin).expect("Could not open file for reading.")); - let mut buff_out = GzEncoder::new( - File::create(fout).expect("Could not open file for writing."), - Compression::default(), - ); - - for line in buff_in.lines() { - let l = line.expect("Unable to read line."); - buff_out - .write_all(l.trim().as_bytes()) - .expect("Unable to write sequence to file."); - buff_out - .write_all("\n".as_bytes()) - .expect("Unable to write to file."); - } - buff_out.try_finish().expect("Could not close the stream."); -} - -mod tests { - use super::*; - use flate2::read::GzDecoder; - use std::env::temp_dir; - use std::io::{BufRead, BufReader}; - - #[allow(dead_code)] - fn create_tmp_file(name: &str) -> String { - let mut dir = temp_dir(); - dir.push(name); - let _fexp = File::create(&dir).expect("Could not create file"); - let a = format!("{}", &dir.as_path().display()); - a - } - - #[allow(dead_code)] - fn assert_file_content(f1: &str, f2: &str) { - let buff1 = BufReader::new(File::open(f1).expect("Could not open file for reading.")); - let buff2 = BufReader::new(GzDecoder::new( - File::open(f2).expect("Could not open gz file."), - )); - - let it = buff1.lines().zip(buff2.lines()); - for (l1, l2) in it { - assert_eq!( - l1.expect("Unable to read line from file 1."), - l2.expect("Unable to read line from file 2.") - ) - } - } - - #[test] - fn test_rewrite_ok() { - let fin = "./data/test_input.fastq"; - let _fout = create_tmp_file("test_seq.fastq.gz"); - let fout = _fout.as_str(); - - _rewrite(fin, fout); - - assert_file_content(fin, fout); - } -} diff --git a/install-sra-tools.sh b/install-sra-tools.sh deleted file mode 100755 index bab57349..00000000 --- a/install-sra-tools.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -TOOLKIT_VER="3.0.0" - -if [[ "$OSTYPE" == "linux"* ]]; then - LINUX_VER=$(awk -F= '/^NAME/{print $2}' /etc/os-release) - if [[ "$LINUX_VER" == '"Ubuntu"' ]]; then - OS_VER="ubuntu64" - elif [[ "$LINUX_VER" == '"CentOS Linux"' ]]; then - OS_VER="centos_linux64" - else - echo "Detected OS version (${LINUX_VER}) is not supported. Aborting." - exit 1 - fi -elif [[ "$OSTYPE" == "darwin"* ]]; then - OS_VER="mac64" -else - echo "Detected OS version (${OSTYPE}) is not supported. Aborting." - exit 1 -fi - -TOOLKIT_URL="http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${TOOLKIT_VER}/sratoolkit.${TOOLKIT_VER}-${OS_VER}.tar.gz" - -echo "Fetching SRA Tools from ${TOOLKIT_URL}..." -curl -L "${TOOLKIT_URL}" > sratoolkit.tar.gz - -echo "Extracting..." -tar -xzf sratoolkit.tar.gz -rm sratoolkit.tar.gz -mv "sratoolkit.${TOOLKIT_VER}-${OS_VER}/" "sratoolkit/" - -if [[ "$PREFIX" == "" ]]; then - echo "Setting PREFIX=$CONDA_PREFIX" - PREFIX="$CONDA_PREFIX" -fi - -echo "Installing SRA Tools in $PREFIX..." -if [[ ! -d "$PREFIX/bin/" ]]; then - mkdir $PREFIX/bin/ -fi -find sratoolkit/bin/ -maxdepth 1 -type f -exec mv -f {} $PREFIX/bin/ \; -find sratoolkit/bin/ -maxdepth 1 -type l -exec mv -f {} $PREFIX/bin/ \; -rm -r sratoolkit - -echo "Testing installation..." -if [[ $(which prefetch) == "$PREFIX/bin"* ]]; then - echo "Success!" -else - echo "Installation failed." - exit 1 -fi - -echo "Configuring SRA Toolkit:" -SRA_CACHE_LOC="$HOME/.prefetch_cache" -echo "Creating prefetch cache directory under $SRA_CACHE_LOC..." -mkdir "$SRA_CACHE_LOC" -echo "Running vdb-config..." -vdb-config -s "/repository/user/main/public/root=$SRA_CACHE_LOC" -vdb-config --prefetch-to-user-repo -echo "Configuration completed." diff --git a/q2_fondue/sequences.py b/q2_fondue/sequences.py index eef644ed..4e039756 100644 --- a/q2_fondue/sequences.py +++ b/q2_fondue/sequences.py @@ -20,7 +20,6 @@ import threading import time -from fastq_writer import rewrite_fastq from qiime2 import Metadata from warnings import warn @@ -238,6 +237,21 @@ def _write_empty_casava(read_type, casava_out_path): pass +def _rewrite_fastq(file_in: str, file_out: str): + with open(file_in, 'r') as fin, open(file_out, 'w') as fout: + for line in fin.readlines(): + fout.write(line.strip() + '\n') + + try: + subprocess.run(['gzip', file_out], check=True) + except subprocess.CalledProcessError: + LOGGER.error( + 'Failed to compress file %s. Please check your ' + 'installation of gzip.', file_out + ) + raise + + def _copy_to_casava( filenames: list, tmp_dir: str, casava_result_path: str ): @@ -247,14 +261,14 @@ def _copy_to_casava( copied from tmp_dir to casava_result_path. """ fwd_path_in = os.path.join(tmp_dir, filenames[0]) - fwd_path_out = os.path.join(casava_result_path, f'{filenames[0]}.gz') - rewrite_fastq(fwd_path_in, fwd_path_out) + fwd_path_out = os.path.join(casava_result_path, f'{filenames[0]}') + _rewrite_fastq(fwd_path_in, fwd_path_out) os.remove(fwd_path_in) if len(filenames) > 1: rev_path_in = os.path.join(tmp_dir, filenames[1]) - rev_path_out = os.path.join(casava_result_path, f'{filenames[1]}.gz') - rewrite_fastq(rev_path_in, rev_path_out) + rev_path_out = os.path.join(casava_result_path, f'{filenames[1]}') + _rewrite_fastq(rev_path_in, rev_path_out) os.remove(rev_path_in) From 75d671ffef43d0ff4b58150da3280fb8f78b48ac Mon Sep 17 00:00:00 2001 From: Michal Ziemski Date: Wed, 10 Jan 2024 15:04:19 +0100 Subject: [PATCH 2/5] Revert the sra-tools changes --- ci/recipe/meta.yaml | 1 - install-sra-tools.sh | 60 ++++++++++++++++++++++++++++++++++++++++++ q2_fondue/sequences.py | 3 +-- 3 files changed, 61 insertions(+), 3 deletions(-) create mode 100755 install-sra-tools.sh diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml index 650314ec..e7485154 100644 --- a/ci/recipe/meta.yaml +++ b/ci/recipe/meta.yaml @@ -26,7 +26,6 @@ requirements: - pyzotero - q2-types {{ qiime2_epoch }}.* - qiime2 {{ qiime2_epoch }}.* - - sra-tools ==3.0.0 - tqdm >=4.62.3 test: diff --git a/install-sra-tools.sh b/install-sra-tools.sh new file mode 100755 index 00000000..bab57349 --- /dev/null +++ b/install-sra-tools.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +TOOLKIT_VER="3.0.0" + +if [[ "$OSTYPE" == "linux"* ]]; then + LINUX_VER=$(awk -F= '/^NAME/{print $2}' /etc/os-release) + if [[ "$LINUX_VER" == '"Ubuntu"' ]]; then + OS_VER="ubuntu64" + elif [[ "$LINUX_VER" == '"CentOS Linux"' ]]; then + OS_VER="centos_linux64" + else + echo "Detected OS version (${LINUX_VER}) is not supported. Aborting." + exit 1 + fi +elif [[ "$OSTYPE" == "darwin"* ]]; then + OS_VER="mac64" +else + echo "Detected OS version (${OSTYPE}) is not supported. Aborting." + exit 1 +fi + +TOOLKIT_URL="http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${TOOLKIT_VER}/sratoolkit.${TOOLKIT_VER}-${OS_VER}.tar.gz" + +echo "Fetching SRA Tools from ${TOOLKIT_URL}..." +curl -L "${TOOLKIT_URL}" > sratoolkit.tar.gz + +echo "Extracting..." +tar -xzf sratoolkit.tar.gz +rm sratoolkit.tar.gz +mv "sratoolkit.${TOOLKIT_VER}-${OS_VER}/" "sratoolkit/" + +if [[ "$PREFIX" == "" ]]; then + echo "Setting PREFIX=$CONDA_PREFIX" + PREFIX="$CONDA_PREFIX" +fi + +echo "Installing SRA Tools in $PREFIX..." +if [[ ! -d "$PREFIX/bin/" ]]; then + mkdir $PREFIX/bin/ +fi +find sratoolkit/bin/ -maxdepth 1 -type f -exec mv -f {} $PREFIX/bin/ \; +find sratoolkit/bin/ -maxdepth 1 -type l -exec mv -f {} $PREFIX/bin/ \; +rm -r sratoolkit + +echo "Testing installation..." +if [[ $(which prefetch) == "$PREFIX/bin"* ]]; then + echo "Success!" +else + echo "Installation failed." + exit 1 +fi + +echo "Configuring SRA Toolkit:" +SRA_CACHE_LOC="$HOME/.prefetch_cache" +echo "Creating prefetch cache directory under $SRA_CACHE_LOC..." +mkdir "$SRA_CACHE_LOC" +echo "Running vdb-config..." +vdb-config -s "/repository/user/main/public/root=$SRA_CACHE_LOC" +vdb-config --prefetch-to-user-repo +echo "Configuration completed." diff --git a/q2_fondue/sequences.py b/q2_fondue/sequences.py index 4e039756..d8a5f0ee 100644 --- a/q2_fondue/sequences.py +++ b/q2_fondue/sequences.py @@ -246,8 +246,7 @@ def _rewrite_fastq(file_in: str, file_out: str): subprocess.run(['gzip', file_out], check=True) except subprocess.CalledProcessError: LOGGER.error( - 'Failed to compress file %s. Please check your ' - 'installation of gzip.', file_out + 'Failed to compress file %s.', file_out ) raise From ce29862bc438ebe2df2c34604614ad22ccd2c11c Mon Sep 17 00:00:00 2001 From: Michal Ziemski Date: Tue, 16 Jan 2024 14:38:40 +0100 Subject: [PATCH 3/5] Simplify get-all tests --- q2_fondue/tests/test_get_all.py | 227 ++++++++++---------------------- 1 file changed, 70 insertions(+), 157 deletions(-) diff --git a/q2_fondue/tests/test_get_all.py b/q2_fondue/tests/test_get_all.py index e724d21e..6edf3a0f 100644 --- a/q2_fondue/tests/test_get_all.py +++ b/q2_fondue/tests/test_get_all.py @@ -5,193 +5,106 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -import os +import unittest +from unittest.mock import (ANY, Mock) import pandas as pd -import unittest -from pandas.testing import assert_frame_equal -from q2_types.per_sample_sequences import \ - CasavaOneEightSingleLanePerSampleDirFmt from qiime2 import Artifact -from qiime2.plugins import fondue -from unittest.mock import (patch, ANY, call) -from q2_fondue.sequences import (_run_fasterq_dump_for_all, - _process_downloaded_sequences, - _write2casava_dir, _copy_to_casava) +from q2_fondue.get_all import get_all from q2_fondue.tests.test_sequences import SequenceTests +class FakeCtx(Mock): + def __init__(self, ids_path, meta_path, failed_ids=None): + super().__init__() + self.ids = Artifact.import_data('NCBIAccessionIDs', ids_path) + self.meta = Artifact.import_data('SRAMetadata', meta_path) + self.failed_empty = Artifact.import_data( + 'SRAFailedIDs', pd.DataFrame() + ) + if failed_ids: + self.failed = Artifact.import_data( + 'SRAFailedIDs', + pd.DataFrame( + data={'Error message': ['Some error message' + for _ in failed_ids]}, + index=pd.Index(failed_ids, name='ID') + ) + ) + else: + self.failed = self.failed_empty + + self.get_metadata = Mock(return_value=(self.meta, self.failed_empty)) + self.get_sequences = Mock(return_value=(Mock(), Mock(), self.failed)) + + def get_action(self, plugin, action): + if action == 'get_metadata': + return self.get_metadata + elif action == 'get_sequences': + return self.get_sequences + + class TestGetAll(SequenceTests): package = 'q2_fondue.tests' - @patch('os.remove') - @patch('q2_fondue.metadata._validate_run_ids') - @patch('q2_fondue.metadata.ef.Efetcher') - @patch('q2_fondue.metadata._efetcher_inquire') - @patch('time.sleep') - @patch('q2_fondue.sequences.Process') - @patch('q2_fondue.sequences.Pool') - @patch('q2_fondue.sequences._announce_completion') - @patch('q2_fondue.sequences.CasavaOneEightSingleLanePerSampleDirFmt') - @patch('tempfile.TemporaryDirectory') - def test_get_all_single( - self, mock_tmpdir, mock_casava, mock_announce, mock_pool, - mock_proc, mock_sleep, mock_inquire, mock_efetcher, - mock_validation, mock_remove - ): + def test_get_all_single(self): """ Test verifying that pipeline get_all calls all expected actions, individual actions are tested in details in respective test classes """ - acc_id = 'SRR123456' - test_md = Artifact.import_data( - 'NCBIAccessionIDs', self.get_data_path(f'{acc_id}_md.tsv') + mock_ctx = FakeCtx( + ids_path=self.get_data_path('SRR123456_md.tsv'), + meta_path=self.get_data_path('sra-metadata-mock.tsv') ) - - # define mocked return values for get_metadata mocks - mock_validation.return_value = {} - - path2df = self.get_data_path('sra-metadata-mock.tsv') - mock_inquire.return_value = ( - pd.read_csv(path2df, sep='\t', index_col=0), {} + obs_meta, _, _, obs_failed = get_all( + mock_ctx, mock_ctx.ids, 'fake@email.com', retries=1 ) - # define mocked return values for get_sequences mocks - mock_tmpdir.return_value = self.move_files_2_tmp_dir( - [f'{acc_id}.fastq', f'{acc_id}.sra']) - mock_announce.return_value = {}, [f'{acc_id}.fastq'], [] - casavas = [CasavaOneEightSingleLanePerSampleDirFmt(), - CasavaOneEightSingleLanePerSampleDirFmt()] - mock_casava.side_effect = casavas - _copy_to_casava( - [f'{acc_id}.fastq'], mock_tmpdir.return_value.name, - str(casavas[0].path) + mock_ctx.get_metadata.assert_called_once_with( + mock_ctx.ids, 'fake@email.com', 1, 'INFO', None ) - os.rename( - os.path.join(str(casavas[0].path), - f'{acc_id}.fastq.gz'), - os.path.join(str(casavas[0].path), - f'{acc_id}_00_L001_R1_001.fastq.gz'), + mock_ctx.get_sequences.assert_called_once_with( + ANY, 'fake@email.com', 1, 1, 'INFO' ) - # run pipeline - fondue.actions.get_all(test_md, 'fake@email.com', retries=1) - - # function call assertions for get_metadata within - mock_validation.assert_called_once_with( - 'fake@email.com', 1, [acc_id], 'INFO') - mock_efetcher.assert_called_once_with( - 'efetcher', 'fake@email.com', apikey=None, apikey_var=None, - threads=1, qid=None) - mock_inquire.assert_called_once_with(ANY, [acc_id], 'INFO') - - # function call assertions for get_sequences within - mock_proc.assert_has_calls([ - call(target=_run_fasterq_dump_for_all, args=( - [acc_id], mock_tmpdir.return_value.name, 1, '', 1, - ANY, ANY), daemon=True), - call(target=_process_downloaded_sequences, args=( - mock_tmpdir.return_value.name, ANY, ANY, 1), daemon=True), - ]) - mock_pool.assert_called_once_with( - 1, _write2casava_dir, - (mock_tmpdir.return_value.name, ANY, ANY, ANY, ANY) - ) - mock_remove.assert_has_calls([ - call(os.path.join( - mock_tmpdir.return_value.name, f"{acc_id}.fastq" - )) - ]) - - @patch('os.remove') - @patch('q2_fondue.metadata.BATCH_SIZE', 1) - @patch('q2_fondue.metadata._validate_run_ids') - @patch('q2_fondue.metadata.ef.Efetcher') - @patch('q2_fondue.metadata._efetcher_inquire') - @patch('time.sleep') - @patch('q2_fondue.sequences.Process') - @patch('q2_fondue.sequences.Pool') - @patch('q2_fondue.sequences._announce_completion') - @patch('q2_fondue.sequences.CasavaOneEightSingleLanePerSampleDirFmt') - @patch('tempfile.TemporaryDirectory') - def test_get_all_multi_with_missing_ids( - self, mock_tmpdir, mock_casava, mock_announce, mock_pool, - mock_proc, mock_sleep, mock_inquire, mock_efetcher, - mock_validation, mock_remove - ): + run_ids = mock_ctx.get_sequences.call_args_list[0][0][0] + run_ids = run_ids.view(pd.DataFrame).index.to_list() + self.assertListEqual(run_ids, ['SRR123456']) + + self.assertEqual(obs_meta, mock_ctx.meta) + self.assertEqual(obs_failed, mock_ctx.failed) + + def test_get_all_multi_with_missing_ids(self): """ Test verifying that pipeline get_all calls all expected actions, individual actions are tested in details in respective test classes """ - acc_ids = ['SRR123456', 'SRR123457'] - test_md = Artifact.import_data( - 'NCBIAccessionIDs', self.get_data_path('SRR1234567_md.tsv') + mock_ctx = FakeCtx( + ids_path=self.get_data_path('SRR1234567_md.tsv'), + meta_path=self.get_data_path('sra-metadata-mock.tsv'), + failed_ids=['SRR123457'] ) - - # define mocked return values for get_metadata mocks - mock_validation.return_value = {} - - path2df = self.get_data_path('sra-metadata-mock.tsv') - missing_ids_dic = {'SRR123457': 'Some error message'} - mock_inquire.return_value = \ - (pd.read_csv(path2df, sep='\t', index_col=0), missing_ids_dic) - - # define mocked return values for get_sequences mocks - mock_tmpdir.return_value = self.move_files_2_tmp_dir( - [f'{acc_ids[0]}.fastq', f'{acc_ids[0]}.sra'] + obs_meta, _, _, obs_failed = get_all( + mock_ctx, mock_ctx.ids, 'fake@email.com', retries=1 ) - mock_announce.return_value = {}, [f'{acc_ids[0]}.fastq'], [] - casavas = [CasavaOneEightSingleLanePerSampleDirFmt(), - CasavaOneEightSingleLanePerSampleDirFmt()] - mock_casava.side_effect = casavas - _copy_to_casava( - [f'{acc_ids[0]}.fastq'], mock_tmpdir.return_value.name, - str(casavas[0].path) + + mock_ctx.get_metadata.assert_called_once_with( + mock_ctx.ids, 'fake@email.com', 1, 'INFO', None ) - os.rename( - os.path.join(str(casavas[0].path), - f'{acc_ids[0]}.fastq.gz'), - os.path.join(str(casavas[0].path), - f'{acc_ids[0]}_00_L001_R1_001.fastq.gz'), + mock_ctx.get_sequences.assert_called_once_with( + ANY, 'fake@email.com', 1, 1, 'INFO' ) - # run pipeline - _, _, _, missing_ids_obs = fondue.actions.get_all( - test_md, 'fake@email.com', retries=0) - - # assert missing ids output - missing_ids_exp = pd.DataFrame( - data={'Error message': missing_ids_dic.values()}, - index=pd.Index(missing_ids_dic.keys(), name='ID')) - assert_frame_equal( - missing_ids_obs.view(pd.DataFrame), - missing_ids_exp) - - # function call assertions for get_metadata within - mock_validation.assert_called_once_with( - 'fake@email.com', 1, acc_ids, 'INFO') - mock_efetcher.assert_called_once_with( - 'efetcher', 'fake@email.com', apikey=None, apikey_var=None, - threads=1, qid=None) - mock_inquire.assert_called_once_with(ANY, acc_ids, 'INFO') - # function call assertions for get_sequences within - mock_proc.assert_has_calls([ - call(target=_run_fasterq_dump_for_all, args=( - [acc_ids[0]], mock_tmpdir.return_value.name, 1, '', 0, - ANY, ANY), daemon=True), - call(target=_process_downloaded_sequences, args=( - mock_tmpdir.return_value.name, ANY, ANY, 1), daemon=True), - ]) - mock_pool.assert_called_once_with( - 1, _write2casava_dir, - (mock_tmpdir.return_value.name, ANY, ANY, ANY, ANY) + run_ids = mock_ctx.get_sequences.call_args_list[0][0][0] + run_ids = run_ids.view(pd.DataFrame).index.to_list() + self.assertListEqual(run_ids, ['SRR123456']) + + self.assertEqual(obs_meta, mock_ctx.meta) + self.assertListEqual( + obs_failed.view(pd.DataFrame).index.to_list(), + ['SRR123457'] ) - mock_remove.assert_has_calls([ - call(os.path.join( - mock_tmpdir.return_value.name, f"{acc_ids[0]}.fastq" - )) - ]) if __name__ == "__main__": From 672ce54e610d6ba6b0489b9611bbc2aaf7e9084f Mon Sep 17 00:00:00 2001 From: Michal Ziemski Date: Tue, 16 Jan 2024 15:53:43 +0100 Subject: [PATCH 4/5] Re-implement and move _rewrite_fastq to utils --- q2_fondue/sequences.py | 22 +++------------------- q2_fondue/tests/test_utils.py | 17 +++++++++++++++-- q2_fondue/utils.py | 8 +++++++- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/q2_fondue/sequences.py b/q2_fondue/sequences.py index d8a5f0ee..88502f18 100644 --- a/q2_fondue/sequences.py +++ b/q2_fondue/sequences.py @@ -32,7 +32,7 @@ from q2_fondue.entrezpy_clients._utils import set_up_logger from q2_fondue.utils import ( _determine_id_type, handle_threaded_exception, DownloadError, - _has_enough_space, _find_next_id + _has_enough_space, _find_next_id, _rewrite_fastq ) threading.excepthook = handle_threaded_exception @@ -237,20 +237,6 @@ def _write_empty_casava(read_type, casava_out_path): pass -def _rewrite_fastq(file_in: str, file_out: str): - with open(file_in, 'r') as fin, open(file_out, 'w') as fout: - for line in fin.readlines(): - fout.write(line.strip() + '\n') - - try: - subprocess.run(['gzip', file_out], check=True) - except subprocess.CalledProcessError: - LOGGER.error( - 'Failed to compress file %s.', file_out - ) - raise - - def _copy_to_casava( filenames: list, tmp_dir: str, casava_result_path: str ): @@ -260,15 +246,13 @@ def _copy_to_casava( copied from tmp_dir to casava_result_path. """ fwd_path_in = os.path.join(tmp_dir, filenames[0]) - fwd_path_out = os.path.join(casava_result_path, f'{filenames[0]}') + fwd_path_out = os.path.join(casava_result_path, f'{filenames[0]}.gz') _rewrite_fastq(fwd_path_in, fwd_path_out) - os.remove(fwd_path_in) if len(filenames) > 1: rev_path_in = os.path.join(tmp_dir, filenames[1]) - rev_path_out = os.path.join(casava_result_path, f'{filenames[1]}') + rev_path_out = os.path.join(casava_result_path, f'{filenames[1]}.gz') _rewrite_fastq(rev_path_in, rev_path_out) - os.remove(rev_path_in) def _write2casava_dir( diff --git a/q2_fondue/tests/test_utils.py b/q2_fondue/tests/test_utils.py index d1890ac8..0640a110 100644 --- a/q2_fondue/tests/test_utils.py +++ b/q2_fondue/tests/test_utils.py @@ -5,7 +5,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- - +import gzip import os import signal import threading @@ -17,7 +17,7 @@ from tqdm import tqdm from q2_fondue.utils import (handle_threaded_exception, _has_enough_space, - _find_next_id, _chunker) + _find_next_id, _chunker, _rewrite_fastq) class TestExceptHooks(unittest.TestCase): @@ -131,6 +131,19 @@ def test_chunker_no_chunks(self): exp_out = ['A', 'B', 'C'] self.assertEqual(next(obs_out), exp_out) + def test_rewrite_fastq(self): + file_in = self.get_data_path('SRR123456.fastq') + file_out = self.get_data_path('SRR123456.fastq.gz') + + _rewrite_fastq(file_in, file_out) + + with open(file_in, 'rb') as fin, gzip.open(file_out, 'r') as fout: + for lin, lout in zip(fin.readlines(), fout.readlines()): + self.assertEqual(lin, lout) + + # clean up + os.remove(file_out) + if __name__ == "__main__": unittest.main() diff --git a/q2_fondue/utils.py b/q2_fondue/utils.py index 1b1d5c11..8262d97c 100644 --- a/q2_fondue/utils.py +++ b/q2_fondue/utils.py @@ -5,8 +5,9 @@ # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- - +import gzip import os +import shutil import signal import subprocess from typing import List @@ -142,3 +143,8 @@ def _find_next_id(acc_id: str, progress_bar: tqdm): return None else: return pbar_content[index_next_acc] + + +def _rewrite_fastq(file_in: str, file_out: str): + with open(file_in, 'rb') as f_in, gzip.open(file_out, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) From ab2716f9d7c9e1a59c919cd23886f84d42add252 Mon Sep 17 00:00:00 2001 From: Michal Ziemski Date: Fri, 19 Jan 2024 16:18:59 +0100 Subject: [PATCH 5/5] Append a newline to all test fastq files --- q2_fondue/tests/data/SRR123456.fastq | 2 +- q2_fondue/tests/data/SRR123457_1.fastq | 2 +- q2_fondue/tests/data/SRR123457_2.fastq | 2 +- q2_fondue/tests/data/testaccA.fastq | 2 +- q2_fondue/tests/data/testaccA_01_L001_R1_001.fastq | 2 +- q2_fondue/tests/data/testaccHYB.fastq | 2 +- q2_fondue/tests/data/testaccHYB_1.fastq | 2 +- q2_fondue/tests/data/testaccHYB_2.fastq | 2 +- q2_fondue/tests/data/testacc_00_L001_R1_001.fastq | 2 +- q2_fondue/tests/data/testacc_00_L001_R2_001.fastq | 2 +- q2_fondue/tests/data/testacc_1.fastq | 2 +- q2_fondue/tests/data/testacc_2.fastq | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/q2_fondue/tests/data/SRR123456.fastq b/q2_fondue/tests/data/SRR123456.fastq index a67e0bb8..da697ced 100644 --- a/q2_fondue/tests/data/SRR123456.fastq +++ b/q2_fondue/tests/data/SRR123456.fastq @@ -9,4 +9,4 @@ A='=4<;<<<;3==B:<5<<9@9A988497;867=<;<-EA/:9A<&C=1A)9EA3#EA2GC7*65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A>EA/GFGGHG///E@EEE>/?1BGB1B>F2B1G1212FGFFHH @ERR3018303.94 Bgsng7131.m10_1839802 length=250 TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGGAGCGTAGGCGGACTTTTAAGTGAGATGTGAAATACCCGGGCTCAACTTGGGTGCTGCATTTCAAACTGGAAGTCTAGAGTGCAGGAGAGGAGAATGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGATTCTCTGGACTGTAACTGACGCTGAGGCTCGAAAGCGTGGGGAGCAAAC +ERR3018303.94 Bgsng7131.m10_1839802 length=250 -CBBCCFFCFFCFGGGGGGGGGGHGGGGGHHHHHHHHGGFGHHGGGGGGGGGGHGGGGGHHHFGHHGHHGHHHHHHEGHHHHGGGFGHGHGGHGHHGEFFHGHHHHHGHGGHGHHHHHHHGHFHHHHHGGFFHGDFGHFHGHHGHHGHHHHHHHFHHGDGGGFGHGHFGD?EFCHGHHFCGHFHHGGGGGGGFFGFADAED?CFFFFFFFB;BEBFFFFFF/ADFAAFEFEFF@BADFFFFFFFAABEFF: \ No newline at end of file +CBBCCFFCFFCFGGGGGGGGGGHGGGGGHHHHHHHHGGFGHHGGGGGGGGGGHGGGGGHHHFGHHGHHGHHHHHHEGHHHHGGGFGHGHGGHGHHGEFFHGHHHHHGHGGHGHHHHHHHGHFHHHHHGGFFHGDFGHFHGHHGHHGHHHHHHHFHHGDGGGFGHGHFGD?EFCHGHHFCGHFHHGGGGGGGFFGFADAED?CFFFFFFFB;BEBFFFFFF/ADFAAFEFEFF@BADFFFFFFFAABEFF: diff --git a/q2_fondue/tests/data/testaccHYB_1.fastq b/q2_fondue/tests/data/testaccHYB_1.fastq index aa42ad1c..299e6880 100644 --- a/q2_fondue/tests/data/testaccHYB_1.fastq +++ b/q2_fondue/tests/data/testaccHYB_1.fastq @@ -13,4 +13,4 @@ AAAAAFF@@F1C1EEGGGG?AAFEGGCGHG2GGHHHEGGGHHHG//EGGGGGGGGGGGF1BFDGHF21E2FHBGGG11BB @ERR3018303.91 Bgsng7131.m10_3716454 length=250 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGAAGGCGAAGGCAGCCCCTTGGGAATGTACTGACGCTCATGTGCGAAAGCGTGGGGGGCAAAC +ERR3018303.91 Bgsng7131.m10_3716454 length=250 -BBBBBBBBBBBBGGGGEGGFFGHGGGGGHHDHGHHGGGGGHHHGGCEGGGGGGGGGGGGHHHBFHH4GGHGHHHHHHHHGG??EEEGHHHHG0GGGFFHFHDGHHHHGFGHDGEHFFEHHGGHHHFHHGGG.FFGGGFCF.FFFFFFBF/BFFFBFF;AFEFFFFEDADFEFFFFFBB9FDBFBBFF?.BDDEAFBC9.AD.EDFFFBBE?BFFF/BBFFF9?.DD/BFFFFF@@DB.9;BAF---@EFF \ No newline at end of file +BBBBBBBBBBBBGGGGEGGFFGHGGGGGHHDHGHHGGGGGHHHGGCEGGGGGGGGGGGGHHHBFHH4GGHGHHHHHHHHGG??EEEGHHHHG0GGGFFHFHDGHHHHGFGHDGEHFFEHHGGHHHFHHGGG.FFGGGFCF.FFFFFFBF/BFFFBFF;AFEFFFFEDADFEFFFFFBB9FDBFBBFF?.BDDEAFBC9.AD.EDFFFBBE?BFFF/BBFFF9?.DD/BFFFFF@@DB.9;BAF---@EFF diff --git a/q2_fondue/tests/data/testaccHYB_2.fastq b/q2_fondue/tests/data/testaccHYB_2.fastq index 1c2ce984..9c2c051d 100644 --- a/q2_fondue/tests/data/testaccHYB_2.fastq +++ b/q2_fondue/tests/data/testaccHYB_2.fastq @@ -13,4 +13,4 @@ GGGHHHHGHHHHGGGGGGGGGHHGGGGGHHHHHHHHGGGGHHHGGGGGGGGGGGGGGGGFFHHHDGHHHHHHGGHHHHHH @ERR3018303.91 Bgsng7131.m10_3716454 length=231 TACGTAGGTGGCAAGCGTTATCCGGATTTATTGGGCGTAAAGAGAGTGCAGGCGGTTTTCTAAGTCTGATGTGAAAGCCTTCGGCTTAACCGGAGAAGTGCATCGGAAACTGGATAACTTGAGTGCAGAAGAGGGTAGTGGAACTCCATGTGTAGCGGTGGAATGCGTAGATATATGGAAGAACACCAGTGGCGAAGGCGGCTACCTGGTCTGCAACTGACGCTGAGACTC +ERR3018303.91 Bgsng7131.m10_3716454 length=231 -GGGHHHHGHGGHHHGHGGGGHHHGGGGGGHEGHHHHGFGGHHHHHGHHHHHHGGGEFGGGHGHGGFHGEDG4GEGH3BECGEHG@CCBHBGFG/BF1=FDGF1=FDG65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A65:08?7C<==@;$5<<9?7<=<;A=(A91<=<=FB/=@;$=8-C==B;<===B;=8@<&:GC6)<3:B;<@;$=<5:D=;D=7:==D=;8<<<<+;:A<<9A