Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Allow safe access to .book in ExcelWriter #45687

Merged
merged 8 commits into from
Feb 1, 2022
7 changes: 6 additions & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,12 @@ def engine(self) -> str:
"""Name of engine."""
pass

@property
@abc.abstractmethod
def sheets(self) -> dict[str, Any]:
"""Mapping of sheet name to sheet object."""
pass

@abc.abstractmethod
def write_cells(
self,
Expand Down Expand Up @@ -1112,7 +1118,6 @@ def __init__(
self.handles = get_handle(
path, mode, storage_options=storage_options, is_text=False
)
self.sheets: dict[str, Any] = {}
self.cur_sheet = None

if date_format is None:
Expand Down
12 changes: 11 additions & 1 deletion pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ def __init__(
self.book = OpenDocumentSpreadsheet(**engine_kwargs)
self._style_dict: dict[str, str] = {}

@property
def sheets(self) -> dict[str, Any]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way to type these more specifically?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not easily, as far as I can tell. I plan to look into this more in the future.

Regarding whatsnew - this is not user facing. Currently in docs no attribute of ExcelWriter is public. I'd like to refactor ExcelWriter (namely, making protected attributes start with _) before updating docs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it totally fine (make issues if you cannot to get to it soon), otherwise PR's totally fine.

is there a way to type these more specifically?

can we use the base class generic for typing? (again nbd)

from odf.table import Table

result = {
sheet.getAttribute("name"): sheet
for sheet in self.book.getElementsByType(Table)
}
return result

def save(self) -> None:
"""
Save workbook to disk.
Expand Down Expand Up @@ -91,7 +101,7 @@ def write_cells(
wks = self.sheets[sheet_name]
else:
wks = Table(name=sheet_name)
self.sheets[sheet_name] = wks
self.book.spreadsheet.addElement(wks)

if validate_freeze_panes(freeze_panes):
freeze_panes = cast(Tuple[int, int], freeze_panes)
Expand Down
9 changes: 5 additions & 4 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,18 @@ def __init__(

self.book = load_workbook(self.handles.handle, **engine_kwargs)
self.handles.handle.seek(0)
self.sheets = {name: self.book[name] for name in self.book.sheetnames}

else:
# Create workbook object with default optimized_write=True.
self.book = Workbook(**engine_kwargs)

if self.book.worksheets:
self.book.remove(self.book.worksheets[0])

@property
def sheets(self) -> dict[str, Any]:
result = {name: self.book[name] for name in self.book.sheetnames}
return result

def save(self) -> None:
"""
Save workbook to disk.
Expand Down Expand Up @@ -440,7 +443,6 @@ def write_cells(
target_index = self.book.index(old_wks)
del self.book[sheet_name]
wks = self.book.create_sheet(sheet_name, target_index)
self.sheets[sheet_name] = wks
elif self.if_sheet_exists == "error":
raise ValueError(
f"Sheet '{sheet_name}' already exists and "
Expand All @@ -458,7 +460,6 @@ def write_cells(
else:
wks = self.book.create_sheet()
wks.title = sheet_name
self.sheets[sheet_name] = wks

if validate_freeze_panes(freeze_panes):
freeze_panes = cast(Tuple[int, int], freeze_panes)
Expand Down
11 changes: 7 additions & 4 deletions pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ def __init__(

self.book = Workbook(self.handles.handle, **engine_kwargs)

@property
def sheets(self) -> dict[str, Any]:
result = self.book.sheetnames
return result

def save(self) -> None:
"""
Save workbook to disk.
Expand All @@ -222,11 +227,9 @@ def write_cells(
# Write the frame cells using xlsxwriter.
sheet_name = self._get_sheet_name(sheet_name)

if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.get_worksheet_by_name(sheet_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would change .book to be a private attribute ._book and then expose the .book property (followups for sure)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would the property be something besides just return self._book? In other words, why a property?

if wks is None:
wks = self.book.add_worksheet(sheet_name)
self.sheets[sheet_name] = wks

style_dict = {"null": None}

Expand Down
5 changes: 5 additions & 0 deletions pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def __init__(
self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format)
self.fm_date = xlwt.easyxf(num_format_str=self.date_format)

@property
def sheets(self) -> dict[str, Any]:
result = {sheet.name: sheet for sheet in self.book._Workbook__worksheets}
return result

def save(self) -> None:
"""
Save workbook to disk.
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/io/excel/test_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,13 @@ def test_engine_kwargs(ext, engine_kwargs):
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass


def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f) as writer:
assert writer.sheets == {}
table = odf.table.Table(name="test_name")
writer.book.spreadsheet.addElement(table)
assert writer.sheets == {"test_name": table}
9 changes: 9 additions & 0 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,12 @@ def test_read_empty_with_blank_row(datapath, ext, read_only):
result = pd.read_excel(wb, engine="openpyxl")
expected = DataFrame()
tm.assert_frame_equal(result, expected)


def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl") as writer:
assert writer.sheets == {}
sheet = writer.book.create_sheet("test_name", 0)
assert writer.sheets == {"test_name": sheet}
8 changes: 8 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,10 +1271,12 @@ def test_register_writer(self):
# some awkward mocking to test out dispatch and such actually works
called_save = []
called_write_cells = []
called_sheets = []

class DummyClass(ExcelWriter):
called_save = False
called_write_cells = False
called_sheets = False
supported_extensions = ["xlsx", "xls"]
engine = "dummy"

Expand All @@ -1284,12 +1286,18 @@ def save(self):
def write_cells(self, *args, **kwargs):
called_write_cells.append(True)

@property
def sheets(self):
called_sheets.append(True)

def check_called(func):
func()
assert len(called_save) >= 1
assert len(called_write_cells) >= 1
assert len(called_sheets) == 0
del called_save[:]
del called_write_cells[:]
del called_sheets[:]

with option_context("io.excel.xlsx.writer", "dummy"):
path = "something.xlsx"
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/io/excel/test_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,12 @@ def test_engine_kwargs(ext, nan_inf_to_errors):
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
assert writer.book.nan_inf_to_errors == nan_inf_to_errors


def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="xlsxwriter") as writer:
assert writer.sheets == {}
sheet = writer.book.add_worksheet("test_name")
assert writer.sheets == {"test_name": sheet}
9 changes: 9 additions & 0 deletions pandas/tests/io/excel/test_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,12 @@ def test_engine_kwargs(ext, style_compression):
assert writer.book._Workbook__styles.style_compression == style_compression
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)


def test_book_and_sheets_consistent(ext):
# GH#45687 - Ensure sheets is updated if user modifies book
with tm.ensure_clean(ext) as f:
with ExcelWriter(f) as writer:
assert writer.sheets == {}
sheet = writer.book.add_sheet("test_name")
assert writer.sheets == {"test_name": sheet}