From 5d6cc60cdee95bb1b52078a88d466ac210473f13 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Thu, 30 Nov 2017 09:26:21 -0500 Subject: [PATCH] DOC: header='infer' is not working when there is no header, closes #17473 (#18042) (cherry picked from commit 67c4d0f4f9f45b981d3e6cb07521f9c0bbb459d7) --- doc/source/io.rst | 31 ++++++++++++++++++++++--------- pandas/io/parsers.py | 22 +++++++++++++--------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 7418617ae9004..8269c1a69c95b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -103,15 +103,20 @@ Column and Index Locations and Names ++++++++++++++++++++++++++++++++++++ header : int or list of ints, default ``'infer'`` - Row number(s) to use as the column names, and the start of the data. Default - behavior is as if ``header=0`` if no ``names`` passed, otherwise as if - ``header=None``. Explicitly pass ``header=0`` to be able to replace existing - names. The header can be a list of ints that specify row locations for a - multi-index on the columns e.g. ``[0,1,3]``. Intervening rows that are not - specified will be skipped (e.g. 2 in this example is skipped). Note that - this parameter ignores commented lines and empty lines if - ``skip_blank_lines=True``, so header=0 denotes the first line of data - rather than the first line of the file. + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first line of the file, if column names are + passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to replace + existing names. + + The header can be a list of ints that specify row locations + for a multi-index on the columns e.g. ``[0,1,3]``. Intervening rows + that are not specified will be skipped (e.g. 2 in this example is + skipped). Note that this parameter ignores commented lines and empty + lines if ``skip_blank_lines=True``, so header=0 denotes the first + line of data rather than the first line of the file. names : array-like, default ``None`` List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause @@ -553,6 +558,14 @@ If the header is in a row other than the first, pass the row number to data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9' pd.read_csv(StringIO(data), header=1) +.. note:: + + Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first nonblank line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. + .. _io.dupe_names: Duplicate names parsing diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index cf181f1de938b..e4b221b1768ec 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -74,15 +74,19 @@ .. versionadded:: 0.18.1 support for the Python parser. header : int or list of ints, default 'infer' - Row number(s) to use as the column names, and the start of the data. - Default behavior is as if set to 0 if no ``names`` passed, otherwise - ``None``. Explicitly pass ``header=0`` to be able to replace existing - names. The header can be a list of integers that specify row locations for - a multi-index on the columns e.g. [0,1,3]. Intervening rows that are not - specified will be skipped (e.g. 2 in this example is skipped). Note that - this parameter ignores commented lines and empty lines if - ``skip_blank_lines=True``, so header=0 denotes the first line of data - rather than the first line of the file. + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names + are passed the behavior is identical to ``header=0`` and column + names are inferred from the first line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to + replace existing names. The header can be a list of integers that + specify row locations for a multi-index on the columns + e.g. [0,1,3]. Intervening rows that are not specified will be + skipped (e.g. 2 in this example is skipped). Note that this + parameter ignores commented lines and empty lines if + ``skip_blank_lines=True``, so header=0 denotes the first line of + data rather than the first line of the file. names : array-like, default None List of column names to use. If file contains no header row, then you should explicitly pass header=None. Duplicates in this list will cause