From a72ecbe84c1a60c0f6c95fa5d4915ce7d996e179 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 4 Jun 2016 03:31:13 +0100 Subject: [PATCH] DEPR, DOC: Deprecate buffer_lines in read_csv The 'buffer_lines' parameter is not even respected in the implementation, as it is determined internally to the C parser. [ci skip] --- doc/source/io.rst | 6 ++++++ doc/source/whatsnew/v0.18.2.txt | 1 + pandas/io/parsers.py | 11 +++++++++-- pandas/io/tests/parser/test_parsers.py | 2 -- pandas/io/tests/parser/test_unsupported.py | 5 +++++ 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 4eb42e1fb918d..cfc88d335f862 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -176,6 +176,12 @@ low_memory : boolean, default ``True`` Note that the entire file is read into a single DataFrame regardless, use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) +buffer_lines : int, default None + DEPRECATED: this argument will be removed in a future version because its + value is not respected by the parser + + If ``low_memory`` is ``True``, specify the number of rows to be read for + each chunk. (Only valid with C parser) compact_ints : boolean, default False DEPRECATED: this argument will be removed in a future version diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 7493150370e9f..191f5c1ccbf40 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -294,6 +294,7 @@ Deprecations ^^^^^^^^^^^^ - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13320`) +- ``buffer_lines`` has been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13360`) .. _whatsnew_0182.performance: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2c8726f588522..5936d256c6d2a 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -227,6 +227,12 @@ Note that the entire file is read into a single DataFrame regardless, use the `chunksize` or `iterator` parameter to return the data in chunks. (Only valid with C parser) +buffer_lines : int, default None + DEPRECATED: this argument will be removed in a future version because its + value is not respected by the parser + + If low_memory is True, specify the number of rows to be read for each + chunk. (Only valid with C parser) compact_ints : boolean, default False DEPRECATED: this argument will be removed in a future version @@ -234,7 +240,6 @@ the parser will attempt to cast it as the smallest integer dtype possible, either signed or unsigned depending on the specification from the `use_unsigned` parameter. - use_unsigned : boolean, default False DEPRECATED: this argument will be removed in a future version @@ -448,6 +453,7 @@ def _read(filepath_or_buffer, kwds): 'float_precision', ]) _deprecated_args = set([ + 'buffer_lines', 'compact_ints', 'use_unsigned', ]) @@ -806,7 +812,8 @@ def _clean_options(self, options, engine): _validate_header_arg(options['header']) for arg in _deprecated_args: - if result[arg] != _c_parser_defaults[arg]: + parser_default = _c_parser_defaults[arg] + if result.get(arg, parser_default) != parser_default: warnings.warn("The '{arg}' argument has been deprecated " "and will be removed in a future version" .format(arg=arg), FutureWarning, stacklevel=2) diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py index ea8ce9b616f36..fda7b28769647 100644 --- a/pandas/io/tests/parser/test_parsers.py +++ b/pandas/io/tests/parser/test_parsers.py @@ -72,14 +72,12 @@ def read_csv(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine kwds['low_memory'] = self.low_memory - kwds['buffer_lines'] = 2 return read_csv(*args, **kwds) def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine kwds['low_memory'] = True - kwds['buffer_lines'] = 2 return read_table(*args, **kwds) diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py index e820924d2be8b..97862ffa90cef 100644 --- a/pandas/io/tests/parser/test_unsupported.py +++ b/pandas/io/tests/parser/test_unsupported.py @@ -124,6 +124,7 @@ def test_deprecated_args(self): # deprecated arguments with non-default values deprecated = { + 'buffer_lines': True, 'compact_ints': True, 'use_unsigned': True, } @@ -132,6 +133,10 @@ def test_deprecated_args(self): for engine in engines: for arg, non_default_val in deprecated.items(): + if engine == 'python' and arg == 'buffer_lines': + # unsupported --> exception is raised first + continue + with tm.assert_produces_warning( FutureWarning, check_stacklevel=False): kwargs = {arg: non_default_val}