From 41f6108a8ed03d0445fffa15c650470bef57e8ae Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 26 Jul 2024 16:37:39 +1000 Subject: [PATCH 01/14] [WIP] add skeleton code for async reader --- noodles-util/Cargo.toml | 3 ++ noodles-util/src/alignment.rs | 2 + noodles-util/src/alignment/async.rs | 3 ++ noodles-util/src/alignment/async/io.rs | 5 +++ noodles-util/src/alignment/async/io/reader.rs | 40 +++++++++++++++++++ 5 files changed, 53 insertions(+) create mode 100644 noodles-util/src/alignment/async.rs create mode 100644 noodles-util/src/alignment/async/io.rs create mode 100644 noodles-util/src/alignment/async/io/reader.rs diff --git a/noodles-util/Cargo.toml b/noodles-util/Cargo.toml index 39f5afa55..fa37801b4 100644 --- a/noodles-util/Cargo.toml +++ b/noodles-util/Cargo.toml @@ -26,6 +26,9 @@ async = [ "noodles-bcf?/async", "noodles-bgzf?/async", "noodles-vcf?/async", + "noodles-bam?/async", + "noodles-cram?/async", + "noodles-sam?/async", ] variant = [ "dep:noodles-bcf", diff --git a/noodles-util/src/alignment.rs b/noodles-util/src/alignment.rs index 90378c02c..d34bc642c 100644 --- a/noodles-util/src/alignment.rs +++ b/noodles-util/src/alignment.rs @@ -1,4 +1,6 @@ //! Alignment format utilities. +#[cfg(feature = "async")] +pub mod r#async; pub mod io; pub mod iter; diff --git a/noodles-util/src/alignment/async.rs b/noodles-util/src/alignment/async.rs new file mode 100644 index 000000000..6c474134d --- /dev/null +++ b/noodles-util/src/alignment/async.rs @@ -0,0 +1,3 @@ +//! Async alignment format utilities. + +pub mod io; diff --git a/noodles-util/src/alignment/async/io.rs b/noodles-util/src/alignment/async/io.rs new file mode 100644 index 000000000..07c8461ce --- /dev/null +++ b/noodles-util/src/alignment/async/io.rs @@ -0,0 +1,5 @@ +//! Async alignment format I/O + +pub mod reader; + +pub use self::reader::Reader; diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs new file mode 100644 index 000000000..143e0444f --- /dev/null +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -0,0 +1,40 @@ +//! Async alignment reader. +//! todo add an example + +use futures::Stream; +use noodles_bam as bam; +use noodles_cram as cram; +use noodles_sam as sam; +use std::io; +use tokio::io::AsyncBufRead; + +/// An async alignment reader. +pub enum Reader { + /// SAM. + Sam(sam::r#async::io::Reader), + /// BAM. + Bam(bam::r#async::io::Reader), + /// CRAM. + Cram(cram::r#async::io::Reader), +} + +impl Reader +where + R: AsyncBufRead + Unpin, +{ + /// Reads the SAM header + /// + /// todo example usage + pub async fn read_header(&mut self) -> tokio::io::Result { + todo!() + } + + /// Returns an iterator over records starting from the current stream position. + /// todo add an example + pub fn records<'r, 'h: 'r>( + &'r mut self, + header: &'h sam::Header, + ) -> impl Stream>> + 'r { + todo!() + } +} From 25619e9f074009cc07fa647738199686029d53cc Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 7 Aug 2024 15:17:46 +1000 Subject: [PATCH 02/14] util/alignment/async: add Reader --- noodles-util/src/alignment/async/io/reader.rs | 40 +++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 143e0444f..64feb3e96 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -1,12 +1,12 @@ //! Async alignment reader. //! todo add an example -use futures::Stream; +use futures::{Stream, StreamExt}; use noodles_bam as bam; use noodles_cram as cram; use noodles_sam as sam; -use std::io; -use tokio::io::AsyncBufRead; +use std::pin::Pin; +use tokio::io::{self, AsyncBufRead}; /// An async alignment reader. pub enum Reader { @@ -24,9 +24,14 @@ where { /// Reads the SAM header /// - /// todo example usage - pub async fn read_header(&mut self) -> tokio::io::Result { - todo!() + /// # Examples + /// todo once Builder is implemented + pub async fn read_header(&mut self) -> io::Result { + match self { + Self::Sam(reader) => reader.read_header().await, + Self::Bam(reader) => reader.read_header().await, + Self::Cram(reader) => reader.read_header().await, + } } /// Returns an iterator over records starting from the current stream position. @@ -35,6 +40,27 @@ where &'r mut self, header: &'h sam::Header, ) -> impl Stream>> + 'r { - todo!() + #[allow(clippy::type_complexity)] + let records: Pin< + Box>>>, + > = match self { + Self::Sam(reader) => Box::pin( + reader + .records() + .map(|result| result.map(|r| Box::new(r) as Box)), + ), + Self::Bam(reader) => Box::pin( + reader + .records() + .map(|result| result.map(|r| Box::new(r) as Box)), + ), + Self::Cram(reader) => Box::pin( + reader + .records(header) + .map(|result| result.map(|r| Box::new(r) as Box)), + ), + }; + + records } } From 04e8d6fea4cffeca1d480ac26b31c70050b08a9f Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 13 Aug 2024 16:36:53 +1000 Subject: [PATCH 03/14] util/alignment/async: add Reader Builder --- noodles-util/src/alignment/async/io/reader.rs | 4 + .../src/alignment/async/io/reader/builder.rs | 174 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 noodles-util/src/alignment/async/io/reader/builder.rs diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 64feb3e96..75420b3f6 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -1,6 +1,8 @@ //! Async alignment reader. //! todo add an example +mod builder; + use futures::{Stream, StreamExt}; use noodles_bam as bam; use noodles_cram as cram; @@ -8,6 +10,8 @@ use noodles_sam as sam; use std::pin::Pin; use tokio::io::{self, AsyncBufRead}; +pub use self::builder::Builder; + /// An async alignment reader. pub enum Reader { /// SAM. diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs new file mode 100644 index 000000000..a9449c6cc --- /dev/null +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -0,0 +1,174 @@ +use super::Reader; +use crate::alignment::io::{CompressionMethod, Format}; +use noodles_bam as bam; +use noodles_bgzf as bgzf; +use noodles_cram as cram; +use noodles_fasta as fasta; +use noodles_sam as sam; +use tokio::{ + fs::File, + io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, BufReader}, +}; + +/// An async alignment reader builder. +#[derive(Default)] +pub struct Builder { + compression_method: Option>, + format: Option, + reference_sequence_repository: fasta::Repository, +} + +impl Builder { + /// Sets the compression method. + /// + /// By default, the compression method is autodetected on build. This can be used to override + /// it. + /// + /// # Examples + /// + /// ``` + /// use noodles_util::alignment::{self, io::CompressionMethod}; + /// let builder = alignment::io::reader::Builder::default() + /// .set_compression_method(Some(CompressionMethod::Bgzf)); + /// ``` + pub fn set_compression_method(mut self, compression_method: Option) -> Self { + self.compression_method = Some(compression_method); + self + } + + /// Sets the format of the input. + /// + /// By default, the format is autodetected on build. This can be used to override it. + /// + /// # Examples + /// + /// ``` + /// use noodles_util::alignment::{self, io::Format}; + /// let builder = alignment::io::reader::Builder::default() + /// .set_format(Format::Sam); + /// ``` + pub fn set_format(mut self, format: Format) -> Self { + self.format = Some(format); + self + } + + /// Sets the reference sequence repository. + /// + /// # Examples + /// + /// ``` + /// use noodles_fasta as fasta; + /// use noodles_util::alignment::{self, io::Format}; + /// + /// let repository = fasta::Repository::default(); + /// + /// let builder = alignment::io::reader::Builder::default() + /// .set_reference_sequence_repository(repository); + /// ``` + pub fn set_reference_sequence_repository( + mut self, + reference_sequence_repository: fasta::Repository, + ) -> Self { + self.reference_sequence_repository = reference_sequence_repository; + self + } + + /// Builds an async alignment reader from a path. + /// + /// By default, the format and compression method will be autodetected. This can be overridden + /// by using [`Self::set_format`] and [`Self::set_compression_method`]. + /// + /// # Examples + /// + /// ```no_run + /// # #[tokio::main] + /// # async fn main() -> tokio::io::Result<()> { + /// use noodles_util::alignment::async::io::reader::Builder; + /// use std::path::Path; + /// let path = Path::new("sample.bam"); + /// let reader = Builder::default().build_from_path(path).await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn build_from_path

( + self, + src: P, + ) -> io::Result>> + where + P: AsRef, + { + let file = File::open(src).await?; + self.build_from_reader(file).await + } + + /// Builds an async alignment reader from a reader. + /// + /// By default, the format and compression method will be autodetected. This can be overridden + /// by using [`Self::set_format`] and [`Self::set_compression_method`]. + /// + /// # Examples + /// + /// ``` + /// # #[tokio::main] + /// # async fn main() -> tokio::io::Result<()> { + /// use noodles_util::alignment::async::io::reader::Builder; + /// use tokio::io; + /// let reader = Builder::default().build_from_reader(io::empty()).await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn build_from_reader( + self, + reader: R, + ) -> io::Result>> + where + R: AsyncRead + Unpin + 'static, + { + use crate::alignment::io::reader::builder::{detect_compression_method, detect_format}; + let mut reader = BufReader::new(reader); + + let compression_method = match self.compression_method { + Some(compression_method) => compression_method, + None => { + let mut src = reader.fill_buf().await?; + detect_compression_method(&mut src)? + }, + }; + + let format = match self.format { + Some(format) => format, + None => { + let mut src = reader.fill_buf().await?; + detect_format(&mut src, compression_method)? + } + }; + + let reader: Box = match (format, compression_method) { + (Format::Sam, None) => Box::new(reader), + (Format::Sam, Some(CompressionMethod::Bgzf)) => Box::new(bgzf::AsyncReader::new(reader)), + (Format::Bam, None) => Box::new(reader), + (Format::Bam, Some(CompressionMethod::Bgzf)) => Box::new(bgzf::AsyncReader::new(reader)), + (Format::Cram, None) => { + let inner: Box = Box::new(reader); + let inner = cram::r#async::io::reader::Builder::default() + .set_reference_sequence_repository(self.reference_sequence_repository) + .build_from_reader(inner); + return Ok(Reader::Cram(inner)); + } + (Format::Cram, Some(CompressionMethod::Bgzf)) => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "CRAM cannot be compressed with BGZF", + )); + } + }; + + let reader: Reader> = match format { + Format::Sam => Reader::Sam(sam::r#async::io::Reader::new(reader)), + Format::Bam => Reader::Bam(bam::r#async::io::Reader::new(reader)), + Format::Cram => unreachable!(), // Handled above + }; + + Ok(reader) + } +} From 43cbdb823a5b697296ab1c503892d1c45a6a23a1 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 14 Aug 2024 09:17:06 +1000 Subject: [PATCH 04/14] util/alignment/async: fix Reader Builder --- .../src/alignment/async/io/reader/builder.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs index a9449c6cc..680c1a88a 100644 --- a/noodles-util/src/alignment/async/io/reader/builder.rs +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -132,7 +132,7 @@ impl Builder { None => { let mut src = reader.fill_buf().await?; detect_compression_method(&mut src)? - }, + } }; let format = match self.format { @@ -145,9 +145,13 @@ impl Builder { let reader: Box = match (format, compression_method) { (Format::Sam, None) => Box::new(reader), - (Format::Sam, Some(CompressionMethod::Bgzf)) => Box::new(bgzf::AsyncReader::new(reader)), + (Format::Sam, Some(CompressionMethod::Bgzf)) => { + Box::new(bgzf::AsyncReader::new(reader)) + } (Format::Bam, None) => Box::new(reader), - (Format::Bam, Some(CompressionMethod::Bgzf)) => Box::new(bgzf::AsyncReader::new(reader)), + (Format::Bam, Some(CompressionMethod::Bgzf)) => { + Box::new(bgzf::AsyncReader::new(reader)) + } (Format::Cram, None) => { let inner: Box = Box::new(reader); let inner = cram::r#async::io::reader::Builder::default() @@ -165,7 +169,7 @@ impl Builder { let reader: Reader> = match format { Format::Sam => Reader::Sam(sam::r#async::io::Reader::new(reader)), - Format::Bam => Reader::Bam(bam::r#async::io::Reader::new(reader)), + Format::Bam => Reader::Bam(bam::r#async::io::Reader::from(reader)), Format::Cram => unreachable!(), // Handled above }; From 9130173e14f526be983075e6c3e64e02c83333aa Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 14 Aug 2024 10:45:44 +1000 Subject: [PATCH 05/14] util/alignment/async: add doc examples --- noodles-util/src/alignment/async/io/reader.rs | 53 +++++++++++++++++-- .../src/alignment/async/io/reader/builder.rs | 4 +- noodles-util/src/lib.rs | 3 +- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 75420b3f6..502e5f063 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -1,5 +1,6 @@ //! Async alignment reader. -//! todo add an example +//! +//! Constructing a [`Reader`] is best done via a builder using the [`Builder`] type. mod builder; @@ -29,7 +30,23 @@ where /// Reads the SAM header /// /// # Examples - /// todo once Builder is implemented + /// + /// ``` + /// # #[tokio::main] + /// # async fn main() -> tokio::io::Result<()> { + /// use noodles_util::alignment::r#async::io::reader::Builder; + /// + /// let data = b"@HD\tVN:1.6 + /// @SQ\tSN:chr1\tLN:2489 + /// *\t4\t*\t0\t255\t*\t*\t0\t0\t*\t* + /// "; + /// + /// let mut reader = Builder::default().build_from_reader(&data[..]).await?; + /// let header = reader.read_header().await?; + /// + /// assert_eq!(header.reference_sequences().len(), 1); + /// # Ok(()) + /// # } pub async fn read_header(&mut self) -> io::Result { match self { Self::Sam(reader) => reader.read_header().await, @@ -39,7 +56,37 @@ where } /// Returns an iterator over records starting from the current stream position. - /// todo add an example + /// + /// # Examples + /// + /// ``` + /// # #[tokio::main] + /// # async fn main() -> tokio::io::Result<()> { + /// use futures::TryStreamExt; + /// use noodles_util::alignment::r#async::io::reader::Builder; + /// use noodles_sam::alignment::Record; + /// + /// let data = b"@HD\tVN:1.6 + /// @SQ\tSN:chr1\tLN:2489 + /// *\t4\t*\t0\t255\t*\t*\t0\t0\t*\t* + /// chr1\t0\tr1\t1\t60\t100M\t*\t0\t0\t*\t* + /// "; + /// + /// let mut reader = Builder::default().build_from_reader(&data[..]).await?; + /// let header = reader.read_header().await?; + /// let mut records = reader.records(&header); + /// + /// let mut num_unmapped = 0; + /// while let Some(record) = records.try_next().await? { + /// let is_unmapped = record.flags().map(|f| f.is_unmapped())?; + /// if is_unmapped { + /// num_unmapped += 1; + /// } + /// } + /// assert_eq!(num_unmapped, 1); + /// # Ok(()) + /// # } + /// ``` pub fn records<'r, 'h: 'r>( &'r mut self, header: &'h sam::Header, diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs index 680c1a88a..7cdff7cc1 100644 --- a/noodles-util/src/alignment/async/io/reader/builder.rs +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -83,7 +83,7 @@ impl Builder { /// ```no_run /// # #[tokio::main] /// # async fn main() -> tokio::io::Result<()> { - /// use noodles_util::alignment::async::io::reader::Builder; + /// use noodles_util::alignment::r#async::io::reader::Builder; /// use std::path::Path; /// let path = Path::new("sample.bam"); /// let reader = Builder::default().build_from_path(path).await?; @@ -111,7 +111,7 @@ impl Builder { /// ``` /// # #[tokio::main] /// # async fn main() -> tokio::io::Result<()> { - /// use noodles_util::alignment::async::io::reader::Builder; + /// use noodles_util::alignment::r#async::io::reader::Builder; /// use tokio::io; /// let reader = Builder::default().build_from_reader(io::empty()).await?; /// # Ok(()) diff --git a/noodles-util/src/lib.rs b/noodles-util/src/lib.rs index 2814c5adc..bcc24ae25 100644 --- a/noodles-util/src/lib.rs +++ b/noodles-util/src/lib.rs @@ -1,6 +1,7 @@ #![warn(missing_docs)] -//! **noodles-util** are utilities for working with noodles. +//! **noodles-util** are utilities for working with noodles. Currently, this consists of a unified +//! interface for reading and writing [alignment] (BAM/CRAM/SAM) and [variant] (VCF/BCF) data. #[cfg(feature = "alignment")] pub mod alignment; From c09d940faaf3df0808c423577eb4a5716b28ca5d Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 14 Aug 2024 11:09:50 +1000 Subject: [PATCH 06/14] util/alignment/async: add async view example --- .../examples/util_alignment_view_async.rs | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 noodles-util/examples/util_alignment_view_async.rs diff --git a/noodles-util/examples/util_alignment_view_async.rs b/noodles-util/examples/util_alignment_view_async.rs new file mode 100644 index 000000000..e223e8f66 --- /dev/null +++ b/noodles-util/examples/util_alignment_view_async.rs @@ -0,0 +1,53 @@ +//! Prints an alignment file in the SAM format. +//! +//! Reference sequences in the FASTA format are only required for CRAM inputs that require them. +//! +//! The result matches the output of `samtools view --no-PG --with-header [--reference ] +//! `. + +use futures::TryStreamExt; +use std::env; +use tokio::io::{self, BufWriter}; + +use noodles_fasta::{self as fasta, repository::adapters::IndexedReader}; +use noodles_sam::r#async::io::Writer; +use noodles_util::alignment::r#async::io::reader::Builder; + +#[tokio::main] +async fn main() -> io::Result<()> { + let mut args = env::args().skip(1); + + let src = args.next().expect("missing src"); + let fasta_src = args.next(); + + let mut builder = Builder::default(); + + if let Some(fasta_src) = fasta_src { + let repository = fasta::io::indexed_reader::Builder::default() + .build_from_path(fasta_src) + .map(IndexedReader::new) + .map(fasta::Repository::new)?; + + builder = builder.set_reference_sequence_repository(repository); + } + + let mut reader = if src == "-" { + let stdin = io::stdin(); + builder.build_from_reader(stdin).await? + } else { + builder.build_from_path(src).await? + }; + + let header = reader.read_header().await?; + + let stdout = io::stdout(); + let mut writer = Writer::new(BufWriter::new(stdout)); + + writer.write_header(&header).await?; + + while let Some(record) = reader.records(&header).try_next().await? { + writer.write_alignment_record(&header, &record).await?; + } + + Ok(()) +} From 39c6f6a852a2baf101ecca79c75f500c6d38a244 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:24:03 -0500 Subject: [PATCH 07/14] util/alignment: Organize imports --- noodles-util/src/alignment.rs | 1 + noodles-util/src/alignment/async/io/reader.rs | 3 ++- noodles-util/src/alignment/async/io/reader/builder.rs | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/noodles-util/src/alignment.rs b/noodles-util/src/alignment.rs index d34bc642c..299b4645d 100644 --- a/noodles-util/src/alignment.rs +++ b/noodles-util/src/alignment.rs @@ -2,5 +2,6 @@ #[cfg(feature = "async")] pub mod r#async; + pub mod io; pub mod iter; diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 502e5f063..68888b1d2 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -4,11 +4,12 @@ mod builder; +use std::pin::Pin; + use futures::{Stream, StreamExt}; use noodles_bam as bam; use noodles_cram as cram; use noodles_sam as sam; -use std::pin::Pin; use tokio::io::{self, AsyncBufRead}; pub use self::builder::Builder; diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs index 7cdff7cc1..056bb1b8b 100644 --- a/noodles-util/src/alignment/async/io/reader/builder.rs +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -1,5 +1,3 @@ -use super::Reader; -use crate::alignment::io::{CompressionMethod, Format}; use noodles_bam as bam; use noodles_bgzf as bgzf; use noodles_cram as cram; @@ -10,6 +8,9 @@ use tokio::{ io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, BufReader}, }; +use super::Reader; +use crate::alignment::io::{CompressionMethod, Format}; + /// An async alignment reader builder. #[derive(Default)] pub struct Builder { @@ -125,6 +126,7 @@ impl Builder { R: AsyncRead + Unpin + 'static, { use crate::alignment::io::reader::builder::{detect_compression_method, detect_format}; + let mut reader = BufReader::new(reader); let compression_method = match self.compression_method { From 138e6340c43fd91093fb16badc7361f6bbb999e7 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:29:11 -0500 Subject: [PATCH 08/14] util/alignment/async/io: Update descriptions --- noodles-util/src/alignment/async/io.rs | 2 +- noodles-util/src/alignment/async/io/reader.rs | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/noodles-util/src/alignment/async/io.rs b/noodles-util/src/alignment/async/io.rs index 07c8461ce..17b3f262f 100644 --- a/noodles-util/src/alignment/async/io.rs +++ b/noodles-util/src/alignment/async/io.rs @@ -1,4 +1,4 @@ -//! Async alignment format I/O +//! Async alignment format I/O. pub mod reader; diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 68888b1d2..4bc831bdf 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -1,6 +1,4 @@ //! Async alignment reader. -//! -//! Constructing a [`Reader`] is best done via a builder using the [`Builder`] type. mod builder; @@ -28,7 +26,7 @@ impl Reader where R: AsyncBufRead + Unpin, { - /// Reads the SAM header + /// Reads the SAM header. /// /// # Examples /// From 9c3326b08681523d2ed36e4567a38fdef857d08d Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:38:02 -0500 Subject: [PATCH 09/14] util/alignment/async/io/reader: Simplify examples --- noodles-util/src/alignment/async/io/reader.rs | 32 +++++-------------- .../src/alignment/async/io/reader/builder.rs | 21 ++++-------- 2 files changed, 15 insertions(+), 38 deletions(-) diff --git a/noodles-util/src/alignment/async/io/reader.rs b/noodles-util/src/alignment/async/io/reader.rs index 4bc831bdf..66f7bf0d5 100644 --- a/noodles-util/src/alignment/async/io/reader.rs +++ b/noodles-util/src/alignment/async/io/reader.rs @@ -34,18 +34,12 @@ where /// # #[tokio::main] /// # async fn main() -> tokio::io::Result<()> { /// use noodles_util::alignment::r#async::io::reader::Builder; - /// - /// let data = b"@HD\tVN:1.6 - /// @SQ\tSN:chr1\tLN:2489 - /// *\t4\t*\t0\t255\t*\t*\t0\t0\t*\t* - /// "; - /// - /// let mut reader = Builder::default().build_from_reader(&data[..]).await?; - /// let header = reader.read_header().await?; - /// - /// assert_eq!(header.reference_sequences().len(), 1); + /// use tokio::io; + /// let mut reader = Builder::default().build_from_reader(io::empty()).await?; + /// let _header = reader.read_header().await?; /// # Ok(()) /// # } + /// ``` pub async fn read_header(&mut self) -> io::Result { match self { Self::Sam(reader) => reader.read_header().await, @@ -63,26 +57,16 @@ where /// # async fn main() -> tokio::io::Result<()> { /// use futures::TryStreamExt; /// use noodles_util::alignment::r#async::io::reader::Builder; - /// use noodles_sam::alignment::Record; - /// - /// let data = b"@HD\tVN:1.6 - /// @SQ\tSN:chr1\tLN:2489 - /// *\t4\t*\t0\t255\t*\t*\t0\t0\t*\t* - /// chr1\t0\tr1\t1\t60\t100M\t*\t0\t0\t*\t* - /// "; + /// use tokio::io; /// - /// let mut reader = Builder::default().build_from_reader(&data[..]).await?; + /// let mut reader = Builder::default().build_from_reader(io::empty()).await?; /// let header = reader.read_header().await?; + /// /// let mut records = reader.records(&header); /// - /// let mut num_unmapped = 0; /// while let Some(record) = records.try_next().await? { - /// let is_unmapped = record.flags().map(|f| f.is_unmapped())?; - /// if is_unmapped { - /// num_unmapped += 1; - /// } + /// // ... /// } - /// assert_eq!(num_unmapped, 1); /// # Ok(()) /// # } /// ``` diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs index 056bb1b8b..a6f495a2f 100644 --- a/noodles-util/src/alignment/async/io/reader/builder.rs +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -28,9 +28,8 @@ impl Builder { /// # Examples /// /// ``` - /// use noodles_util::alignment::{self, io::CompressionMethod}; - /// let builder = alignment::io::reader::Builder::default() - /// .set_compression_method(Some(CompressionMethod::Bgzf)); + /// use noodles_util::alignment::{r#async::io::reader::Builder, io::CompressionMethod}; + /// let _builder = Builder::default().set_compression_method(Some(CompressionMethod::Bgzf)); /// ``` pub fn set_compression_method(mut self, compression_method: Option) -> Self { self.compression_method = Some(compression_method); @@ -44,9 +43,8 @@ impl Builder { /// # Examples /// /// ``` - /// use noodles_util::alignment::{self, io::Format}; - /// let builder = alignment::io::reader::Builder::default() - /// .set_format(Format::Sam); + /// use noodles_util::alignment::{r#async::io::reader::Builder, io::Format}; + /// let _builder = Builder::default().set_format(Format::Sam); /// ``` pub fn set_format(mut self, format: Format) -> Self { self.format = Some(format); @@ -59,12 +57,9 @@ impl Builder { /// /// ``` /// use noodles_fasta as fasta; - /// use noodles_util::alignment::{self, io::Format}; - /// + /// use noodles_util::alignment::{r#async::io::reader::Builder, io::Format}; /// let repository = fasta::Repository::default(); - /// - /// let builder = alignment::io::reader::Builder::default() - /// .set_reference_sequence_repository(repository); + /// let _builder = Builder::default().set_reference_sequence_repository(repository); /// ``` pub fn set_reference_sequence_repository( mut self, @@ -85,9 +80,7 @@ impl Builder { /// # #[tokio::main] /// # async fn main() -> tokio::io::Result<()> { /// use noodles_util::alignment::r#async::io::reader::Builder; - /// use std::path::Path; - /// let path = Path::new("sample.bam"); - /// let reader = Builder::default().build_from_path(path).await?; + /// let _reader = Builder::default().build_from_path("sample.bam").await?; /// # Ok(()) /// # } /// ``` From 19fa0dfddcaa9d2352624b8fb986ac1eccda432b Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:42:46 -0500 Subject: [PATCH 10/14] util/examples/alignment_view_async: Normalize example This is to better match `util_variant_view_async`. --- .../examples/util_alignment_view_async.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/noodles-util/examples/util_alignment_view_async.rs b/noodles-util/examples/util_alignment_view_async.rs index e223e8f66..01bb67b5d 100644 --- a/noodles-util/examples/util_alignment_view_async.rs +++ b/noodles-util/examples/util_alignment_view_async.rs @@ -5,13 +5,13 @@ //! The result matches the output of `samtools view --no-PG --with-header [--reference ] //! `. -use futures::TryStreamExt; use std::env; -use tokio::io::{self, BufWriter}; +use futures::TryStreamExt; use noodles_fasta::{self as fasta, repository::adapters::IndexedReader}; -use noodles_sam::r#async::io::Writer; -use noodles_util::alignment::r#async::io::reader::Builder; +use noodles_sam as sam; +use noodles_util::alignment; +use tokio::io::{self, AsyncWriteExt}; #[tokio::main] async fn main() -> io::Result<()> { @@ -20,7 +20,7 @@ async fn main() -> io::Result<()> { let src = args.next().expect("missing src"); let fasta_src = args.next(); - let mut builder = Builder::default(); + let mut builder = alignment::r#async::io::reader::Builder::default(); if let Some(fasta_src) = fasta_src { let repository = fasta::io::indexed_reader::Builder::default() @@ -32,22 +32,21 @@ async fn main() -> io::Result<()> { } let mut reader = if src == "-" { - let stdin = io::stdin(); - builder.build_from_reader(stdin).await? + builder.build_from_reader(io::stdin()).await? } else { builder.build_from_path(src).await? }; let header = reader.read_header().await?; - let stdout = io::stdout(); - let mut writer = Writer::new(BufWriter::new(stdout)); - + let mut writer = sam::r#async::io::Writer::new(io::stdout()); writer.write_header(&header).await?; while let Some(record) = reader.records(&header).try_next().await? { writer.write_alignment_record(&header, &record).await?; } + writer.get_mut().shutdown().await?; + Ok(()) } From b18e14d018b09d5557eede31f38502537d445355 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:43:59 -0500 Subject: [PATCH 11/14] util: Add required features for util_alignment_view_async example --- noodles-util/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/noodles-util/Cargo.toml b/noodles-util/Cargo.toml index fa37801b4..15b70c9f2 100644 --- a/noodles-util/Cargo.toml +++ b/noodles-util/Cargo.toml @@ -75,6 +75,10 @@ required-features = ["alignment"] name = "util_alignment_view" required-features = ["alignment"] +[[example]] +name = "util_alignment_view_async" +required-features = ["alignment", "async"] + [[example]] name = "util_variant_query" required-features = ["variant"] From 34d9d04d6b3a8ef1eec2df4bc13e3d0051476865 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:45:49 -0500 Subject: [PATCH 12/14] util: Sort async dependencies --- noodles-util/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noodles-util/Cargo.toml b/noodles-util/Cargo.toml index 15b70c9f2..6eb21b779 100644 --- a/noodles-util/Cargo.toml +++ b/noodles-util/Cargo.toml @@ -23,12 +23,12 @@ alignment = [ async = [ "dep:futures", "dep:tokio", + "noodles-bam?/async", "noodles-bcf?/async", "noodles-bgzf?/async", - "noodles-vcf?/async", - "noodles-bam?/async", "noodles-cram?/async", "noodles-sam?/async", + "noodles-vcf?/async", ] variant = [ "dep:noodles-bcf", From 3787ad68f6184f206aecde627eb4514e2617281a Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:46:55 -0500 Subject: [PATCH 13/14] util/alignment/async/io/reader/builder: Import Path --- noodles-util/src/alignment/async/io/reader/builder.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/noodles-util/src/alignment/async/io/reader/builder.rs b/noodles-util/src/alignment/async/io/reader/builder.rs index a6f495a2f..0e8772baa 100644 --- a/noodles-util/src/alignment/async/io/reader/builder.rs +++ b/noodles-util/src/alignment/async/io/reader/builder.rs @@ -1,3 +1,5 @@ +use std::path::Path; + use noodles_bam as bam; use noodles_bgzf as bgzf; use noodles_cram as cram; @@ -89,7 +91,7 @@ impl Builder { src: P, ) -> io::Result>> where - P: AsRef, + P: AsRef, { let file = File::open(src).await?; self.build_from_reader(file).await From 06bc126ad84c47212c6ad2637bd25262b523a3e5 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 15 Aug 2024 15:51:08 -0500 Subject: [PATCH 14/14] util/changelog: Add entry for async reader --- noodles-util/CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/noodles-util/CHANGELOG.md b/noodles-util/CHANGELOG.md index 28c575de3..d7527de15 100644 --- a/noodles-util/CHANGELOG.md +++ b/noodles-util/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## Unreleased + +### Added + + * util/alignment: Add async reader (`alignment::r#async::io::Reader`) + ([#286]). + +[#286]: https://github.com/zaeleus/noodles/issues/286 + ## 0.50.0 - 2024-08-04 ### Added