Skip to content

Commit

Permalink
revert: revert genbank update
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck committed Feb 5, 2024
1 parent cae32fd commit ddef2fe
Show file tree
Hide file tree
Showing 5 changed files with 3 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ df = session.sql("""

#### Known Issues

There are a few file types where the naive `SELECT *` will cause an error, because Polars doesn't support all Arrow types -- `Map` being the most common. In these cases, you can use the `SELECT * EXCEPT attributes` to exclude the offending column, or select the fields from the map individually. Alternatively, you can first convert the table to a Pandas DataFrame.
There are a few file types where the naive `SELECT *` will cause an error, because Polars doesn't support all Arrow types -- `Map` being the most common. In these cases, select the fields from the map individually - `SELECT . Alternatively, you can first convert the table to a Pandas DataFrame.

The file types that can cause this issue are GenBanks and MZMLs.

Expand Down
23 changes: 0 additions & 23 deletions python/biobear/genbank_reader.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
"""Genbank file reader."""
import os

import pyarrow as pa
import pyarrow.dataset as ds

from biobear.reader import Reader
from biobear.compression import Compression

Expand All @@ -30,26 +27,6 @@ def __init__(
else:
self._reader = _ExonReader(str(path), "GENBANK", None)

def to_polars(self):
"""Read the GFF file and return a polars DataFrame."""
try:
import polars as pl
except ImportError as import_error:
raise ImportError(
"The polars library is required to convert a GFF file "
"to a polars DataFrame."
) from import_error

return pl.from_arrow(self.to_arrow().read_all())

def to_arrow(self) -> pa.RecordBatchReader:
"""Convert the GFF reader to an arrow batch reader."""
return self._reader.to_pyarrow()

def to_arrow_scanner(self) -> ds.Scanner:
"""Convert the GFF reader to an arrow scanner."""
return ds.Scanner.from_batches(self.to_arrow())

@property
def inner(self):
"""Return the inner reader."""
Expand Down
6 changes: 2 additions & 4 deletions python/biobear/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,8 @@ def to_polars(self):
"The 'polars' package is required to use the to_polars method."
) from import_error

arrow_record = self.to_arrow()
table = pa.Table.from_batches(arrow_record, schema=arrow_record.schema)

return pl.from_arrow(table)
pydict = self.to_arrow_scanner().to_table().to_pydict()
return pl.from_dict(pydict)

def to_arrow_scanner(self) -> ds.Scanner:
"""Convert the inner data to an Arrow scanner.
Expand Down
1 change: 0 additions & 1 deletion python/tests/test_genbank_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DATA = Path(__file__).parent / "data"


@pytest.mark.skip
@pytest.mark.skipif(
not importlib.util.find_spec("polars"), reason="polars not installed"
)
Expand Down
2 changes: 0 additions & 2 deletions python/tests/test_mzml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DATA = Path(__file__).parent / "data"


@pytest.mark.skip
@pytest.mark.skipif(
not importlib.util.find_spec("polars"), reason="polars not installed"
)
Expand Down Expand Up @@ -38,7 +37,6 @@ def test_mzml_reader_to_scanner():
assert scanner.count_rows() == 2


@pytest.mark.skip
@pytest.mark.skipif(
not importlib.util.find_spec("polars"), reason="polars not installed"
)
Expand Down

0 comments on commit ddef2fe

Please sign in to comment.