Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: finish transition to file options #166

Merged
merged 2 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name = "biobear"
[dependencies]
arrow = { version = "52.1.0", features = ["pyarrow"] }
datafusion = "40"
exon = { version = "0.29.1", features = ["default"] }
exon = { version = "0.30.0", features = ["default"] }
pyo3 = "0.21.2"
tokio = { version = "1", features = ["rt"] }
noodles = { version = "0.78", features = ["core"] }
Expand Down
26 changes: 3 additions & 23 deletions src/datasources/bed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
use exon::datasources::bed::table_provider::ListingBEDTableOptions;
use pyo3::{pyclass, pymethods};

use crate::{error::BioBearResult, file_options::FileOptions, FileCompressionType};
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone, Default)]
Expand All @@ -31,6 +31,8 @@ pub struct BEDReadOptions {
file_extension: Option<String>,
}

impl_settable_from_file_options!(BEDReadOptions);

#[pymethods]
impl BEDReadOptions {
#[new]
Expand All @@ -48,28 +50,6 @@ impl BEDReadOptions {
}
}

impl BEDReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
if self.file_extension.is_none() {
self.file_extension = Some(file_extension.to_string());
}
}

if let Some(file_compression_type) = file_options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}
}

Ok(())
}
}

impl From<BEDReadOptions> for ListingBEDTableOptions {
fn from(options: BEDReadOptions) -> Self {
let file_compression_type = options
Expand Down
24 changes: 2 additions & 22 deletions src/datasources/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use crate::{
error::BioBearResult, file_compression_type::FileCompressionType, file_options::FileOptions,
file_compression_type::FileCompressionType, file_options::impl_settable_from_file_options,
};
use exon::datasources::fasta::{table_provider::ListingFASTATableOptions, SequenceDataType};
use pyo3::{pyclass, pymethods};
Expand Down Expand Up @@ -107,27 +107,7 @@ impl FASTAReadOptions {
}
}

impl FASTAReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
if self.file_extension.is_none() {
self.file_extension = Some(file_extension.to_string());
}
}

if let Some(file_compression_type) = file_options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}
}

Ok(())
}
}
impl_settable_from_file_options!(FASTAReadOptions);

impl From<FASTAReadOptions> for ListingFASTATableOptions {
fn from(options: FASTAReadOptions) -> Self {
Expand Down
22 changes: 3 additions & 19 deletions src/datasources/fastq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use crate::{
error::BioBearResult, file_compression_type::FileCompressionType, file_options::FileOptions,
file_compression_type::FileCompressionType, file_options::impl_settable_from_file_options,
};
use exon::datasources::fastq::table_provider::ListingFASTQTableOptions;
use pyo3::{pyclass, pymethods};
Expand Down Expand Up @@ -51,6 +51,8 @@ pub struct FASTQReadOptions {
file_compression_type: Option<FileCompressionType>,
}

impl_settable_from_file_options!(FASTQReadOptions);

#[pymethods]
impl FASTQReadOptions {
#[new]
Expand Down Expand Up @@ -84,24 +86,6 @@ impl FASTQReadOptions {
}
}

impl FASTQReadOptions {
pub(crate) fn update_from_file_options(
&mut self,
file_options: &FileOptions,
) -> BioBearResult<()> {
if let Some(file_extension) = file_options.file_extension() {
self.file_extension = Some(file_extension.to_string());
}

if let Some(file_compression_type) = file_options.file_compression_type() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
}

Ok(())
}
}

impl From<FASTQReadOptions> for ListingFASTQTableOptions {
fn from(options: FASTQReadOptions) -> Self {
let file_compression_type = options
Expand Down
17 changes: 13 additions & 4 deletions src/datasources/genbank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,37 @@
use exon::datasources::genbank::table_provider::ListingGenbankTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone, Default)]
/// Options for reading GenBank files.
pub struct GenBankReadOptions {
/// The file compression type.
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
/// The file extension.
file_extension: Option<String>,
}

impl_settable_from_file_options!(GenBankReadOptions);

#[pymethods]
impl GenBankReadOptions {
#[new]
fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type.unwrap_or_default(),
file_compression_type,
file_extension: Some("gb".to_string()),
}
}
}

impl From<GenBankReadOptions> for ListingGenbankTableOptions {
fn from(options: GenBankReadOptions) -> Self {
ListingGenbankTableOptions::new(options.file_compression_type.into())
let c = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

ListingGenbankTableOptions::new(c.into())
}
}
11 changes: 8 additions & 3 deletions src/datasources/gff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ use exon::datasources::gff::table_provider::ListingGFFTableOptions;
use noodles::core::Region;
use pyo3::{pyclass, pymethods, PyResult};

use crate::{error::BioBearResult, file_options::FileOptions, FileCompressionType};
use crate::{
error::BioBearResult,
file_options::{impl_settable_from_file_options, FileOptions},
FileCompressionType,
};

use super::parse_region;

Expand All @@ -28,6 +32,8 @@ pub struct GFFReadOptions {
file_compression_type: Option<FileCompressionType>,
}

impl_settable_from_file_options!(GFFReadOptions);

#[pymethods]
impl GFFReadOptions {
#[new]
Expand Down Expand Up @@ -56,8 +62,7 @@ impl GFFReadOptions {

if let Some(file_compression_type) = options.file_compression_type() {
if self.file_compression_type.is_none() {
let fct = FileCompressionType::try_from(file_compression_type)?;
self.file_compression_type = Some(fct);
self.file_compression_type = Some(file_compression_type);
}
}

Expand Down
21 changes: 15 additions & 6 deletions src/datasources/gtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,44 @@
use exon::datasources::gtf::table_provider::ListingGTFTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone)]
pub struct GTFReadOptions {
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
file_extension: Option<String>,
}

impl Default for GTFReadOptions {
fn default() -> Self {
Self {
file_compression_type: FileCompressionType::UNCOMPRESSED,
file_compression_type: Some(FileCompressionType::UNCOMPRESSED),
file_extension: None,
}
}
}

impl_settable_from_file_options!(GTFReadOptions);

#[pymethods]
impl GTFReadOptions {
#[new]
pub fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED),
file_compression_type,
file_extension: Some("gtf".to_string()),
}
}
}

impl From<GTFReadOptions> for ListingGTFTableOptions {
fn from(options: GTFReadOptions) -> Self {
ListingGTFTableOptions::new(options.file_compression_type.into())
ListingGTFTableOptions::new(
options
.file_compression_type
.map(|c| c.into())
.unwrap_or(datafusion::datasource::file_format::file_compression_type::FileCompressionType::UNCOMPRESSED),
)
}
}
30 changes: 24 additions & 6 deletions src/datasources/mzml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,23 @@
use exon::datasources::mzml::table_provider::ListingMzMLTableOptions;
use pyo3::{pyclass, pymethods};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

#[pyclass]
#[derive(Debug, Clone)]
/// Options for reading mzML files.
pub struct MzMLReadOptions {
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
file_extension: Option<String>,
}

impl_settable_from_file_options!(MzMLReadOptions);

impl Default for MzMLReadOptions {
fn default() -> Self {
Self {
file_compression_type: FileCompressionType::UNCOMPRESSED,
file_compression_type: Some(FileCompressionType::UNCOMPRESSED),
file_extension: None,
}
}
}
Expand All @@ -37,14 +41,28 @@ impl MzMLReadOptions {
#[new]
fn new(file_compression_type: Option<FileCompressionType>) -> Self {
Self {
file_compression_type: file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED),
file_compression_type: Some(
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED),
),
file_extension: None,
}
}
}

impl From<MzMLReadOptions> for ListingMzMLTableOptions {
fn from(options: MzMLReadOptions) -> Self {
ListingMzMLTableOptions::new(options.file_compression_type.into())
let file_compression_type = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

let mut new_options = ListingMzMLTableOptions::new(file_compression_type.into());

// let file_extension = options.file_extension;
if let Some(fe) = options.file_extension {
eprintln!("Setting file extension to {}", fe);
new_options = new_options.with_file_extension(fe)
}

new_options
}
}
18 changes: 12 additions & 6 deletions src/datasources/vcf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use exon::datasources::vcf::ListingVCFTableOptions;
use noodles::core::Region;
use pyo3::{pyclass, pymethods, PyResult};

use crate::FileCompressionType;
use crate::{file_options::impl_settable_from_file_options, FileCompressionType};

use super::parse_region;

Expand All @@ -28,15 +28,19 @@ pub struct VCFReadOptions {
/// The region to read.
region: Option<Region>,
/// The file compression type.
file_compression_type: FileCompressionType,
file_compression_type: Option<FileCompressionType>,
/// True if the INFO column should be parsed.
parse_info: bool,
/// True if the FORMAT column should be parsed.
parse_formats: bool,
/// The partition fields.
partition_cols: Option<Vec<String>>,
/// The file extension.
file_extension: Option<String>,
}

impl_settable_from_file_options!(VCFReadOptions);

#[pymethods]
impl VCFReadOptions {
#[new]
Expand All @@ -50,22 +54,24 @@ impl VCFReadOptions {
) -> PyResult<Self> {
let region = parse_region(region)?;

let file_compression_type =
file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);

Ok(Self {
region,
file_compression_type,
parse_info,
parse_formats,
partition_cols,
file_extension: Some("vcf".to_string()),
})
}
}

impl From<VCFReadOptions> for ListingVCFTableOptions {
fn from(options: VCFReadOptions) -> Self {
let mut o = ListingVCFTableOptions::new(options.file_compression_type.into(), false)
let compression = options
.file_compression_type
.unwrap_or(FileCompressionType::UNCOMPRESSED);

let mut o = ListingVCFTableOptions::new(compression.into(), false)
.with_parse_info(options.parse_info)
.with_parse_formats(options.parse_formats);

Expand Down
2 changes: 1 addition & 1 deletion src/file_compression_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use pyo3::prelude::*;
use crate::error::BioBearError;

#[pyclass]
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub enum FileCompressionType {
GZIP,
ZSTD,
Expand Down
Loading
Loading