Skip to content

Commit

Permalink
DEPR: Deprecate parse_cols in read_excel
Browse files Browse the repository at this point in the history
Will now use "usecols" just like in read_csv.

xref pandas-devgh-4988.
  • Loading branch information
gfyoung committed Oct 4, 2017
1 parent 81694dc commit 517b726
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 25 deletions.
10 changes: 5 additions & 5 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2800,21 +2800,21 @@ Parsing Specific Columns

It is often the case that users will insert columns to do temporary computations
in Excel and you may not want to read in those columns. `read_excel` takes
a `parse_cols` keyword to allow you to specify a subset of columns to parse.
a `usecols` keyword to allow you to specify a subset of columns to parse.

If `parse_cols` is an integer, then it is assumed to indicate the last column
If `usecols` is an integer, then it is assumed to indicate the last column
to be parsed.

.. code-block:: python
read_excel('path_to_file.xls', 'Sheet1', parse_cols=2)
read_excel('path_to_file.xls', 'Sheet1', usecols=2)
If `parse_cols` is a list of integers, then it is assumed to be the file column
If `usecols` is a list of integers, then it is assumed to be the file column
indices to be parsed.

.. code-block:: python
read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3])
read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
Parsing Dates
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ Deprecations
~~~~~~~~~~~~

- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
Expand Down
11 changes: 8 additions & 3 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import pandas.compat.openpyxl_compat as openpyxl_compat
from warnings import warn
from distutils.version import LooseVersion
from pandas.util._decorators import Appender
from pandas.util._decorators import Appender, deprecate_kwarg
from textwrap import fill

__all__ = ["read_excel", "ExcelWriter", "ExcelFile"]
Expand Down Expand Up @@ -115,6 +115,10 @@
.. versionadded:: 0.19.0
parse_cols : int or list, default None
.. deprecated:: 0.21.0
Pass in `usecols` instead.
usecols : int or list, default None
* If None then parse all columns,
* If int then indicates last column to be parsed
* If list of ints then indicates list of column numbers to be parsed
Expand Down Expand Up @@ -205,8 +209,9 @@ def get_writer(engine_name):


@Appender(_read_excel_doc)
@deprecate_kwarg("parse_cols", "usecols")
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
index_col=None, names=None, parse_cols=None, parse_dates=False,
index_col=None, names=None, usecols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
convert_float=True, converters=None, dtype=None,
true_values=None, false_values=None, engine=None,
Expand All @@ -226,7 +231,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,

return io._parse_excel(
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
index_col=index_col, parse_cols=usecols, parse_dates=parse_dates,
date_parser=date_parser, na_values=na_values, thousands=thousands,
convert_float=convert_float, skip_footer=skip_footer,
converters=converters, dtype=dtype, true_values=true_values,
Expand Down
52 changes: 35 additions & 17 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,56 +158,74 @@ def setup_method(self, method):
self.check_skip()
super(ReadingTestsBase, self).setup_method(method)

def test_parse_cols_int(self):
def test_usecols_int(self):

dfref = self.get_csv_refdf('test1')
dfref = dfref.reindex(columns=['A', 'B', 'C'])
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3)
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3)
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_cols=3)
usecols=3)

with tm.assert_produces_warning(FutureWarning):
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
index_col=0, parse_cols=3)

# TODO add index to xls file)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
tm.assert_frame_equal(df3, dfref, check_names=False)

def test_parse_cols_list(self):
def test_usecols_list(self):

dfref = self.get_csv_refdf('test1')
dfref = dfref.reindex(columns=['B', 'C'])
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols=[0, 2, 3])
usecols=[0, 2, 3])
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_cols=[0, 2, 3])
usecols=[0, 2, 3])

with tm.assert_produces_warning(FutureWarning):
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
index_col=0, parse_cols=[0, 2, 3])

# TODO add index to xls file)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
tm.assert_frame_equal(df3, dfref, check_names=False)

def test_parse_cols_str(self):
def test_usecols_str(self):

dfref = self.get_csv_refdf('test1')

df1 = dfref.reindex(columns=['A', 'B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A:D')
usecols='A:D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_cols='A:D')
usecols='A:D')

with tm.assert_produces_warning(FutureWarning):
df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
index_col=0, parse_cols='A:D')

# TODO add index to xls, read xls ignores index name ?
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)
tm.assert_frame_equal(df4, df1, check_names=False)

df1 = dfref.reindex(columns=['B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A,C,D')
usecols='A,C,D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_cols='A,C,D')
usecols='A,C,D')
# TODO add index to xls file
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)

df1 = dfref.reindex(columns=['B', 'C'])
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
parse_cols='A,C:D')
usecols='A,C:D')
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
parse_cols='A,C:D')
usecols='A,C:D')
tm.assert_frame_equal(df2, df1, check_names=False)
tm.assert_frame_equal(df3, df1, check_names=False)

Expand Down Expand Up @@ -457,14 +475,14 @@ def test_read_one_empty_col_no_header(self):
actual_header_none = read_excel(
path,
'no_header',
parse_cols=[0],
usecols=[0],
header=None
)

actual_header_zero = read_excel(
path,
'no_header',
parse_cols=[0],
usecols=[0],
header=0
)
expected = DataFrame()
Expand All @@ -486,14 +504,14 @@ def test_read_one_empty_col_with_header(self):
actual_header_none = read_excel(
path,
'with_header',
parse_cols=[0],
usecols=[0],
header=None
)

actual_header_zero = read_excel(
path,
'with_header',
parse_cols=[0],
usecols=[0],
header=0
)
expected_header_none = DataFrame(pd.Series([0], dtype='int64'))
Expand Down

0 comments on commit 517b726

Please sign in to comment.