Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COMPAT: Allow multi-indexes to be written to excel #10570

Merged
merged 1 commit into from
Aug 20, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ Other API Changes
- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
- Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
- Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).
- Allow ``DataFrame`` with ``MultiIndex`` columns to be written to Excel (:issue: `10564`). This was changed in 0.16.2 as the read-back method could not always guarantee perfect fidelity (:issue:`9794`).
- ``Categorical.unique`` now returns new ``Categorical`` which ``categories`` and ``codes`` are unique, rather than returning ``np.array`` (:issue:`10508`)

- unordered category: values and categories are sorted by appearance order.
Expand Down
18 changes: 17 additions & 1 deletion pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# pylint: disable=W0141

import sys
import warnings

from pandas.core.base import PandasObject
from pandas.core.common import adjoin, notnull
Expand Down Expand Up @@ -1640,11 +1641,14 @@ class ExcelFormatter(object):
inf_rep : string, default `'inf'`
representation for np.inf values (which aren't representable in Excel)
A `'-'` sign will be added in front of -inf.
verbose: boolean, default True
If True, warn user that the resulting output file may not be
re-read or parsed directly by pandas.
"""

def __init__(self, df, na_rep='', float_format=None, cols=None,
header=True, index=True, index_label=None, merge_cells=False,
inf_rep='inf'):
inf_rep='inf', verbose=True):
self.df = df
self.rowcounter = 0
self.na_rep = na_rep
Expand All @@ -1657,6 +1661,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
self.header = header
self.merge_cells = merge_cells
self.inf_rep = inf_rep
self.verbose = verbose

def _format_value(self, val):
if lib.checknull(val):
Expand All @@ -1671,6 +1676,17 @@ def _format_value(self, val):
return val

def _format_header_mi(self):

if self.columns.nlevels > 1:
if not self.index:
raise NotImplementedError("Writing to Excel with MultiIndex"
" columns and no index ('index'=False) "
"is not yet implemented.")
elif self.index and self.verbose:
warnings.warn("Writing to Excel with MultiIndex columns is a"
" one way serializable operation. You will not"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is where I mean

" be able to re-read or parse the output file.")

has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
if not(has_aliases or self.header):
return
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,7 +1247,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, columns=None, header=True, index=True,
index_label=None, startrow=0, startcol=0, engine=None,
merge_cells=True, encoding=None, inf_rep='inf'):
merge_cells=True, encoding=None, inf_rep='inf',
verbose=True):
"""
Write DataFrame to a excel sheet

Expand Down Expand Up @@ -1288,6 +1289,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
inf_rep : string, default 'inf'
Representation for infinity (there is no native representation for
infinity in Excel)
verbose: boolean, default True
If True, warn user that the resulting output file may not be
re-read or parsed directly by pandas.

Notes
-----
Expand All @@ -1304,12 +1308,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
strings before writing.
"""
from pandas.io.excel import ExcelWriter
if self.columns.nlevels > 1:
raise NotImplementedError("Writing as Excel with a MultiIndex is "
"not yet implemented.")

need_save = False
if encoding == None:
if encoding is None:
encoding = 'ascii'

if isinstance(excel_writer, compat.string_types):
Expand All @@ -1324,7 +1324,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
index=index,
index_label=index_label,
merge_cells=merge_cells,
inf_rep=inf_rep)
inf_rep=inf_rep, verbose=verbose)
formatted_cells = formatter.get_formatted_cells()
excel_writer.write_cells(formatted_cells, sheet_name,
startrow=startrow, startcol=startcol)
Expand Down
68 changes: 51 additions & 17 deletions pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,6 @@ def test_read_from_file_url(self):

tm.assert_frame_equal(url_table, local_table)


def test_xlsx_table(self):
_skip_if_no_xlrd()
_skip_if_no_openpyxl()
Expand Down Expand Up @@ -1145,10 +1144,10 @@ def test_excel_010_hemstring(self):
# ensure limited functionality in 0.10
# override of #2370 until sorted out in 0.11

def roundtrip(df, header=True, parser_hdr=0):
def roundtrip(df, header=True, parser_hdr=0, index=True):

with ensure_clean(self.ext) as path:
df.to_excel(path, header=header, merge_cells=self.merge_cells)
df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index)
xf = pd.ExcelFile(path)
res = xf.parse(xf.sheet_names[0], header=parser_hdr)
return res
Expand All @@ -1164,7 +1163,7 @@ def roundtrip(df, header=True, parser_hdr=0):
#is implemented for now fixing #9794
if j>1:
with tm.assertRaises(NotImplementedError):
res = roundtrip(df, use_headers)
res = roundtrip(df, use_headers, index=False)
else:
res = roundtrip(df, use_headers)

Expand All @@ -1187,6 +1186,33 @@ def roundtrip(df, header=True, parser_hdr=0):
self.assertEqual(res.shape, (1, 2))
self.assertTrue(res.ix[0, 0] is not np.nan)

def test_excel_010_hemstring_raises_NotImplementedError(self):
# This test was failing only for j>1 and header=False,
# So I reproduced a simple test.
_skip_if_no_xlrd()

if self.merge_cells:
raise nose.SkipTest('Skip tests for merged MI format.')

from pandas.util.testing import makeCustomDataframe as mkdf
# ensure limited functionality in 0.10
# override of #2370 until sorted out in 0.11

def roundtrip2(df, header=True, parser_hdr=0, index=True):

with ensure_clean(self.ext) as path:
df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index)
xf = pd.ExcelFile(path)
res = xf.parse(xf.sheet_names[0], header=parser_hdr)
return res

nrows = 5; ncols = 3
j = 2; i = 1
df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
with tm.assertRaises(NotImplementedError):
res = roundtrip2(df, header=False, index=False)


def test_duplicated_columns(self):
# Test for issue #5235.
_skip_if_no_xlrd()
Expand Down Expand Up @@ -1439,29 +1465,37 @@ class XlwtTests(ExcelWriterBase, tm.TestCase):
engine_name = 'xlwt'
check_skip = staticmethod(_skip_if_no_xlwt)

def test_excel_raise_not_implemented_error_on_multiindex_columns(self):
def test_excel_raise_error_on_multiindex_columns_and_no_index(self):
_skip_if_no_xlwt()
#MultiIndex as columns is not yet implemented 9794
cols = pd.MultiIndex.from_tuples([('site',''),
('2014','height'),
('2014','weight')])
df = pd.DataFrame(np.random.randn(10,3), columns=cols)
# MultiIndex as columns is not yet implemented 9794
cols = pd.MultiIndex.from_tuples([('site', ''),
('2014', 'height'),
('2014', 'weight')])
df = pd.DataFrame(np.random.randn(10, 3), columns=cols)
with tm.assertRaises(NotImplementedError):
with ensure_clean(self.ext) as path:
df.to_excel(path, index=False)

def test_excel_warns_verbosely_on_multiindex_columns_and_index_true(self):
_skip_if_no_xlwt()
cols = pd.MultiIndex.from_tuples([('site', ''),
('2014', 'height'),
('2014', 'weight')])
df = pd.DataFrame(np.random.randn(10, 3), columns=cols)
with tm.assert_produces_warning(UserWarning):
with ensure_clean(self.ext) as path:
df.to_excel(path, index=True)

def test_excel_multiindex_index(self):
_skip_if_no_xlwt()
#MultiIndex as index works so assert no error #9794
cols = pd.MultiIndex.from_tuples([('site',''),
('2014','height'),
('2014','weight')])
df = pd.DataFrame(np.random.randn(3,10), index=cols)
# MultiIndex as index works so assert no error #9794
cols = pd.MultiIndex.from_tuples([('site', ''),
('2014', 'height'),
('2014', 'weight')])
df = pd.DataFrame(np.random.randn(3, 10), index=cols)
with ensure_clean(self.ext) as path:
df.to_excel(path, index=False)



def test_to_excel_styleconverter(self):
_skip_if_no_xlwt()

Expand Down