diff --git a/python/biobear/__init__.py b/python/biobear/__init__.py index 12ed1b4..c82f179 100644 --- a/python/biobear/__init__.py +++ b/python/biobear/__init__.py @@ -14,15 +14,6 @@ """Main biobear package.""" -from biobear.fasta_reader import FastaReader -from biobear.fastq_reader import FastqReader -from biobear.vcf_reader import VCFReader, VCFIndexedReader -from biobear.bam_reader import BamReader, BamIndexedReader -from biobear.gtf_reader import GTFReader -from biobear.gff_reader import GFFReader -from biobear.mzml_reader import MzMLReader -from biobear.genbank_reader import GenbankReader -from biobear.bcf_reader import BCFReader, BCFIndexedReader from biobear import compression from biobear.compression import Compression @@ -52,18 +43,6 @@ __version__ = "0.22.7" __all__ = [ - "FastaReader", - "FastqReader", - "VCFReader", - "VCFIndexedReader", - "BamReader", - "BamIndexedReader", - "BCFReader", - "BCFIndexedReader", - "GFFReader", - "GTFReader", - "GenbankReader", - "MzMLReader", "compression", "Compression", "FileCompressionType", diff --git a/python/biobear/bam_reader.py b/python/biobear/bam_reader.py deleted file mode 100644 index fe31116..0000000 --- a/python/biobear/bam_reader.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2023 WHERE TRUE Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""BAM File Readers.""" - -import os - -import pyarrow as pa -import warnings - -from biobear.reader import Reader -from .biobear import _BamIndexedReader, _ExonReader - - -class BamReader(Reader): - """A BAM File Reader.""" - - def __init__(self, path: os.PathLike): - """Initialize the BamReader. - - Args: - path (Path): Path to the BAM file. - - """ - warnings.warn( - "The BamReader class is deprecated and will be removed in a future release. " - "Please use BioBearSessionContext.read_bam_file instead.", - DeprecationWarning, - ) - self._bam_reader = _ExonReader(str(path), "BAM", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._bam_reader - - -class BamIndexedReader(Reader): - """An Indexed BAM File Reader.""" - - def __init__(self, path: os.PathLike): - """Initialize the BamIndexedReader. - - Args: - path (Path): Path to the BAM file. - index (Path): Path to the BAM index file. - - """ - warnings.warn( - "The BamIndexedReader class is deprecated and will be removed in a future release. " - "Please use BioBearSessionContext.read_bam_file instead.", - DeprecationWarning, - ) - self._bam_reader = _BamIndexedReader(str(path)) - - @property - def inner(self): - """Return the inner reader.""" - return self._bam_reader - - def query(self, region: str) -> pa.RecordBatchReader: - """Query the BAM file and return an Arrow RecordBatchReader. - - Args: - region: A region in the format "chr:start-end". - - """ - return self._bam_reader.query(region) diff --git a/python/biobear/bcf_reader.py b/python/biobear/bcf_reader.py deleted file mode 100644 index d482706..0000000 --- a/python/biobear/bcf_reader.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2023 WHERE TRUE Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""BCF File Readers.""" - -import os - -import pyarrow as pa - -from biobear.reader import Reader - -from .biobear import _ExonReader, _BCFIndexedReader - -from warnings import warn - - -class BCFReader(Reader): - """A BCF File Reader. - - This class is used to read a BCF file and convert it to a polars DataFrame. - """ - - def __init__(self, path: os.PathLike): - """Initialize the BCFReader. - - Args: - path (Path): Path to the BCF file. - - """ - - # show a warning that this is deprecated - warn( - "BCFReader is deprecated, use ExonSessionContext instead", - DeprecationWarning, - ) - - self._bcf_reader = _ExonReader(str(path), "BCF", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._bcf_reader - - -class BCFIndexedReader(Reader): - """An Indexed BCF File Reader. - - This class is used to read or query an indexed BCF file and convert it to a - polars DataFrame. - - """ - - def __init__(self, path: os.PathLike): - """Initialize the BCFIndexedReader.""" - self._bcf_reader = _BCFIndexedReader(str(path)) - - @property - def inner(self): - """Return the inner reader.""" - return self._bcf_reader - - def query(self, region: str) -> pa.RecordBatchReader: - """Query the BCF file and return an arrow batch reader. - - Args: - region (str): The region to query. - - """ - return self._bcf_reader.query(region) diff --git a/python/biobear/fasta_reader.py b/python/biobear/fasta_reader.py deleted file mode 100644 index aa50f90..0000000 --- a/python/biobear/fasta_reader.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2023 WHERE TRUE Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""FASTA file reader.""" -import os - -import warnings - -from biobear.reader import Reader -from biobear.compression import Compression - -from .biobear import _ExonReader - - -class FastaReader(Reader): - """FASTA file reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Read a fasta file. - - Args: - path (Path): Path to the fasta file. - - Kwargs: - compression (Compression): Compression type of the file. Defaults to - Compression.INFERRED. - - """ - warnings.warn( - "The FastaReader class is deprecated and will be removed in a future release. " - "Please use BioBearSessionContext.read_fasta instead.", - DeprecationWarning, - ) - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._fasta_reader = _ExonReader(str(path), "FASTA", "GZIP") - else: - self._fasta_reader = _ExonReader(str(path), "FASTA", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._fasta_reader diff --git a/python/biobear/fastq_reader.py b/python/biobear/fastq_reader.py deleted file mode 100644 index f25b122..0000000 --- a/python/biobear/fastq_reader.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2023 WHERE TRUE Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""FASTQ reader.""" -import os -import warnings - -from biobear.reader import Reader -from biobear.compression import Compression - -from .biobear import _ExonReader - - -class FastqReader(Reader): - """FASTQ file reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Read a fastq file. - - Args: - path (Path): Path to the fastq file. - - Kwargs: - compression (Compression): Compression type of the file. Defaults to - Compression.INFERRED. - - """ - warnings.warn( - "The FastaReader class is deprecated and will be removed in a future release. " - "Please use BioBearSessionContext.read_fasta instead.", - DeprecationWarning, - ) - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._fastq_reader = _ExonReader(str(path), "FASTQ", "GZIP") - else: - self._fastq_reader = _ExonReader(str(path), "FASTQ", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._fastq_reader diff --git a/python/biobear/genbank_reader.py b/python/biobear/genbank_reader.py deleted file mode 100644 index 5ec46a2..0000000 --- a/python/biobear/genbank_reader.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Genbank file reader.""" -import os - -from biobear.reader import Reader -from biobear.compression import Compression - -from .biobear import _ExonReader - - -class GenbankReader(Reader): - """Genbank file reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Read a genbank file. - - Args: - path (Path): Path to the fasta file. - compression (Compression): Compression type of the file. - - """ - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._reader = _ExonReader(str(path), "GENBANK", "GZIP") - else: - self._reader = _ExonReader(str(path), "GENBANK", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._reader diff --git a/python/biobear/gff_reader.py b/python/biobear/gff_reader.py deleted file mode 100644 index d6ff87d..0000000 --- a/python/biobear/gff_reader.py +++ /dev/null @@ -1,55 +0,0 @@ -"""GFF File Reader.""" - -import os - -import pyarrow as pa -import pyarrow.dataset as ds - -from biobear.compression import Compression -from biobear.reader import Reader - -from .biobear import _ExonReader - - -class GFFReader(Reader): - """A GFF File Reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Initialize the GFFReader. - - Args: - path: The path to the GFF file. - """ - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._gff_reader = _ExonReader(str(path), "GFF", "GZIP") - else: - self._gff_reader = _ExonReader(str(path), "GFF", None) - - def to_polars(self): - """Read the GFF file and return a polars DataFrame.""" - try: - import polars as pl - except ImportError as import_error: - raise ImportError( - "The polars library is required to convert a GFF file " - "to a polars DataFrame." - ) from import_error - - return pl.from_arrow(self.to_arrow().read_all()) - - def to_arrow(self) -> pa.RecordBatchReader: - """Convert the GFF reader to an arrow batch reader.""" - return self._gff_reader.to_pyarrow() - - def to_arrow_scanner(self) -> ds.Scanner: - """Convert the GFF reader to an arrow scanner.""" - return ds.Scanner.from_batches(self.to_arrow()) - - @property - def inner(self): - """Return the inner reader.""" - return self._gff_reader diff --git a/python/biobear/gtf_reader.py b/python/biobear/gtf_reader.py deleted file mode 100644 index 21f84c8..0000000 --- a/python/biobear/gtf_reader.py +++ /dev/null @@ -1,57 +0,0 @@ -"""GTF File Reader.""" - -import os - -import pyarrow as pa -import pyarrow.dataset as ds - -from biobear.compression import Compression -from biobear.reader import Reader - -from .biobear import _ExonReader - - -class GTFReader(Reader): - """A GTF File Reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Initialize the GTFReader. - - Args: - path: The path to the GTF file. - """ - - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._gtf_reader = _ExonReader(str(path), "GTF", "GZIP") - else: - self._gtf_reader = _ExonReader(str(path), "GTF", None) - - def to_polars(self): - """Read the GTF file and return a polars DataFrame.""" - - try: - import polars as pl - except ImportError as import_error: - raise ImportError( - "The polars library is required to convert a GTF file to " - "a polars DataFrame." - ) from import_error - - return pl.from_arrow(self.to_arrow().read_all()) - - def to_arrow(self) -> pa.RecordBatchReader: - """Convert the GTF reader to an arrow batch reader.""" - return self._gtf_reader.to_pyarrow() - - def to_arrow_scanner(self) -> ds.Scanner: - """Convert the GTF reader to an arrow scanner.""" - return ds.Scanner.from_batches(self.to_arrow()) - - @property - def inner(self): - """Return the inner reader.""" - return self._gtf_reader diff --git a/python/biobear/mzml_reader.py b/python/biobear/mzml_reader.py deleted file mode 100644 index 4524e7a..0000000 --- a/python/biobear/mzml_reader.py +++ /dev/null @@ -1,40 +0,0 @@ -"""MzML File Reader.""" - -import os - -from biobear.compression import Compression -from biobear.reader import Reader - -from .biobear import _ExonReader - - -class MzMLReader(Reader): - """A MzML File Reader.""" - - def __init__( - self, path: os.PathLike, compression: Compression = Compression.INFERRED - ): - """Initialize the MzMLReader. - - Args: - path: The path to the MzML file. - """ - - self.compression = compression.infer_or_use(path) - - if self.compression == Compression.GZIP: - self._reader = _ExonReader(str(path), "MZML", "GZIP") - else: - self._reader = _ExonReader(str(path), "MZML", None) - - def to_polars(self): - """Read the MZML file and return a polars DataFrame.""" - raise RuntimeError( - "The polars library is not yet supported for MzML files. " - "Consider using the session to select the fields you need." - ) - - @property - def inner(self): - """Return the inner reader.""" - return self._reader diff --git a/python/biobear/vcf_reader.py b/python/biobear/vcf_reader.py deleted file mode 100644 index 4338f9d..0000000 --- a/python/biobear/vcf_reader.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2023 WHERE TRUE Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""VCF File Readers.""" - -import os -import warnings - -import pyarrow as pa - -from biobear.reader import Reader - -from .biobear import _ExonReader, _VCFIndexedReader - - -class VCFReader(Reader): - """A VCF File Reader. - - This class is used to read a VCF file and convert it to a polars DataFrame. - """ - - def __init__(self, path: os.PathLike): - """Initialize the VCFReader. - - Args: - path (Path): Path to the VCF file. - - """ - warnings.warn( - "The VCFReader class is deprecated and will be removed in a future release. " - "Please use BioBearSessionContext.read_vcf_file instead.", - DeprecationWarning, - ) - self._vcf_reader = _ExonReader(str(path), "VCF", None) - - @property - def inner(self): - """Return the inner reader.""" - return self._vcf_reader - - -class VCFIndexedReader(Reader): - """An Indexed VCF File Reader. - - This class is used to read or query an indexed VCF file and convert it to a - polars DataFrame. - - """ - - def __init__(self, path: os.PathLike): - """Initialize the VCFIndexedReader.""" - self._vcf_reader = _VCFIndexedReader(str(path)) - - @property - def inner(self): - """Return the inner reader.""" - return self._vcf_reader - - def query(self, region: str) -> pa.RecordBatchReader: - """Query the VCF file and return a pyarrow RecordBatchReader. - - Args: - region (str): The region to query. - - """ - return self._vcf_reader.query(region)