From a3410ff715dd8dc6fa8d23339bf7dfbde5f38142 Mon Sep 17 00:00:00 2001 From: Nick Moore Date: Sun, 24 Nov 2024 21:29:32 +1100 Subject: [PATCH] feat: add BZIP2 (.bz2) support for reading fasta and fastq --- python/biobear/biobear.pyi | 1 + python/biobear/compression.py | 3 +++ python/tests/data/test.fa.bz2 | Bin 0 -> 75 bytes python/tests/data/test.fq.bz2 | Bin 0 -> 189 bytes python/tests/test_session.py | 20 ++++++++++++++++++++ src/file_compression_type.rs | 5 +++++ 6 files changed, 29 insertions(+) create mode 100644 python/tests/data/test.fa.bz2 create mode 100644 python/tests/data/test.fq.bz2 diff --git a/python/biobear/biobear.pyi b/python/biobear/biobear.pyi index bf06cb3..b22af32 100644 --- a/python/biobear/biobear.pyi +++ b/python/biobear/biobear.pyi @@ -30,6 +30,7 @@ class FileCompressionType(enum.Enum): GZIP = 0 BGZIP = 1 NONE = 2 + BZIP2 = 3 class FastaSequenceDataType(enum.Enum): """How to treat the sequence data in a FASTA file.""" diff --git a/python/biobear/compression.py b/python/biobear/compression.py index d70b438..e2ed4b4 100644 --- a/python/biobear/compression.py +++ b/python/biobear/compression.py @@ -25,12 +25,15 @@ class Compression(Enum): INFERRED = "INFERRED" NONE = "NONE" GZIP = "GZIP" + BZIP2 = "BZIP2" @classmethod def from_file(cls, path: os.PathLike) -> "Compression": """Infer the compression type from the file extension.""" if Path(path).suffix == ".gz": return Compression.GZIP + if Path(path).suffix == ".bz2": + return Compression.BZIP2 return Compression.NONE def infer_or_use(self, path: os.PathLike) -> "Compression": diff --git a/python/tests/data/test.fa.bz2 b/python/tests/data/test.fa.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..73b5d3a9a18806429a8d6654738d5472f7b781f7 GIT binary patch literal 75 zcmV-R0JQ%?T4*^jL0KkKS;pg_+W-Iq-+%xRKmZT{D1Zb2J|WxyAOIk$ri}yC1IiS_ hX`nR3(U*J1Igo@QE*&XJN%BF&Z!xRPiL#^#*`!kjdo$$k^#=K~Wz$3m0#?qEaDNKbtxMUq?^)i>xpxLEjmUd0gc6cl(4?r5Dp5#!F-n$kXtM-57pT~h r5Y?kam#;!nh6yEhYb1pkYS)Z>;m@7Fd2)Y35s write!(f, "GZIP"), Self::ZSTD => write!(f, "ZSTD"), Self::UNCOMPRESSED => write!(f, "UNCOMPRESSED"), + Self::BZIP2 => write!(f, "BZIP2"), } } } @@ -66,6 +68,7 @@ impl From for DFFileCompressionType { FileCompressionType::GZIP => DFFileCompressionType::GZIP, FileCompressionType::ZSTD => DFFileCompressionType::ZSTD, FileCompressionType::UNCOMPRESSED => DFFileCompressionType::UNCOMPRESSED, + FileCompressionType::BZIP2 => DFFileCompressionType::BZIP2, } } } @@ -78,6 +81,7 @@ impl TryFrom for FileCompressionType { CompressionTypeVariant::GZIP => Ok(Self::GZIP), CompressionTypeVariant::ZSTD => Ok(Self::ZSTD), CompressionTypeVariant::UNCOMPRESSED => Ok(Self::UNCOMPRESSED), + CompressionTypeVariant::BZIP2 => Ok(Self::BZIP2), _ => Err(BioBearError::InvalidCompressionType(value.to_string())), } } @@ -91,6 +95,7 @@ impl TryFrom for FileCompressionType { DFFileCompressionType::GZIP => Ok(Self::GZIP), DFFileCompressionType::ZSTD => Ok(Self::ZSTD), DFFileCompressionType::UNCOMPRESSED => Ok(Self::UNCOMPRESSED), + DFFileCompressionType::BZIP2 => Ok(Self::BZIP2), _ => Err(BioBearError::InvalidCompressionType( "Invalid compression type".to_string(), )),