From d2b2cee1a94bdd0b3f7479102d8ee76a85e56fcf Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 12 Dec 2024 09:48:19 -0600 Subject: [PATCH] bed/examples: Add query example The BED reader currently doesn't have a convienient method to query the input, but we can use existing components to compose a queried reader. See #312. --- noodles-bed/Cargo.toml | 5 ++++ noodles-bed/examples/bed_query.rs | 50 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 noodles-bed/examples/bed_query.rs diff --git a/noodles-bed/Cargo.toml b/noodles-bed/Cargo.toml index f47506c2a..e7c51a786 100644 --- a/noodles-bed/Cargo.toml +++ b/noodles-bed/Cargo.toml @@ -15,3 +15,8 @@ bstr.workspace = true lexical-core.workspace = true memchr.workspace = true noodles-core = { path = "../noodles-core", version = "0.15.0" } + +[dev-dependencies] +noodles-bgzf = { path = "../noodles-bgzf", version = "0.33.0" } +noodles-csi = { path = "../noodles-csi", version = "0.40.0" } +noodles-tabix = { path = "../noodles-tabix", version = "0.46.0" } diff --git a/noodles-bed/examples/bed_query.rs b/noodles-bed/examples/bed_query.rs new file mode 100644 index 000000000..21e1d7c6f --- /dev/null +++ b/noodles-bed/examples/bed_query.rs @@ -0,0 +1,50 @@ +/// Queries a bgzipped BED file with a given region. +/// +/// The input bgzipped BED file must have an associated tabix index (TBI) in the same directory. +use std::{env, io}; + +use noodles_bed as bed; +use noodles_bgzf as bgzf; +use noodles_core::Region; +use noodles_csi::{self as csi, BinningIndex}; +use noodles_tabix as tabix; + +fn main() -> Result<(), Box> { + let mut args = env::args().skip(1); + + let src = args.next().expect("missing src"); + let region: Region = args.next().expect("missing region").parse()?; + + let index_src = format!("{src}.tbi"); + let index = tabix::read(index_src)?; + + let header = index.header().expect("missing tabix header"); + let reference_sequence_id = header + .reference_sequence_names() + .get_index_of(region.name()) + .expect("invalid reference sequence name"); + + let mut decoder = bgzf::reader::Builder.build_from_path(src)?; + let chunks = index.query(reference_sequence_id, region.interval())?; + let query = csi::io::Query::new(&mut decoder, chunks); + + let mut reader = bed::io::Reader::<3, _>::new(query); + let mut record = bed::Record::default(); + + let stdout = io::stdout().lock(); + let mut writer = bed::io::Writer::<3, _>::new(stdout); + + while reader.read_record(&mut record)? != 0 { + let start = record.feature_start()?; + let end = record.feature_end().expect("missing feature end")?; + let interval = (start..=end).into(); + + if !region.interval().intersects(interval) { + continue; + } + + writer.write_record(&record)?; + } + + Ok(()) +}