pandas-dev · TomAugspurger · Sep 18, 2018 · Sep 18, 2018 · Sep 18, 2018 · Sep 18, 2018
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -22,8 +22,21 @@ if [ "$DOC" ]; then
     echo # Log file for the doc build  #
     echo ###############################
 
-    echo ./make.py
-    ./make.py
+    echo './make.py 2>&1 | tee doc-build.log'
+    ./make.py 2>&1 | tee doc-build.log
+
+    echo ##################
+    echo # Lint build log #
+    echo ##################
+
+    echo './make.py lint_log --logfile=doc-build.log'
+    ./make.py lint_log --logfile=doc-build.log
+
+    if [ ?$ == 1 ]
+    then
+        echo "Errors in documentation build."
+        exit 1
+    fi
 
     echo ########################
     echo # Create and send docs #

diff --git a/doc/.gitignore b/doc/.gitignore
@@ -0,0 +1 @@
+doc-build.log
diff --git a/doc/make.py b/doc/make.py
@@ -14,6 +14,7 @@
 import importlib
 import sys
 import os
+import textwrap
 import shutil
 # import subprocess
 import argparse
@@ -78,7 +79,7 @@ class DocBuilder:
     script.
     """
     def __init__(self, num_jobs=1, include_api=True, single_doc=None,
-                 verbosity=0):
+                 verbosity=0, warnings_are_errors=False, log_file=None):
         self.num_jobs = num_jobs
         self.include_api = include_api
         self.verbosity = verbosity
@@ -87,6 +88,8 @@ def __init__(self, num_jobs=1, include_api=True, single_doc=None,
         if single_doc is not None:
             self._process_single_doc(single_doc)
         self.exclude_patterns = self._exclude_patterns
+        self.warnings_are_errors = warnings_are_errors
+        self.log_file = log_file
 
         self._generate_index()
         if self.single_doc_type == 'docstring':
@@ -135,6 +138,12 @@ def _process_single_doc(self, single_doc):
             try:
                 obj = pandas  # noqa: F821
                 for name in single_doc.split('.'):
+                    try:
+                        # for names not in the top-level namespace by default,
+                        # e.g. pandas.io.formats.style.Styler
+                        importlib.import_module('.'.join([obj.__name__, name]))
+                    except ModuleNotFoundError:
+                        pass
                     obj = getattr(obj, name)
             except AttributeError:
                 raise ValueError('Single document not understood, it should '
@@ -227,10 +236,10 @@ def _sphinx_build(self, kind):
         if kind not in ('html', 'latex', 'spelling'):
             raise ValueError('kind must be html, latex or '
                              'spelling, not {}'.format(kind))
-
         self._run_os('sphinx-build',
                      '-j{}'.format(self.num_jobs),
                      '-b{}'.format(kind),
+                     '{}'.format("W" if self.warnings_are_errors else ""),
                      '-{}'.format(
                          'v' * self.verbosity) if self.verbosity else '',
                      '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
@@ -317,6 +326,44 @@ def spellcheck(self):
                     ' Check pandas/doc/build/spelling/output.txt'
                     ' for more details.')
 
+    def lint_log(self):
+        with open(self.log_file) as f:
+            log = f.readlines()
+
+        failures = self._check_log(log)
+        if failures:
+            self._report_failures(failures)
+            sys.exit(1)
+
+    @staticmethod
+    def _check_log(log):
+        # type: (List[str]) -> List[Tuple[int, str]]
+        failures = []
+        for i, line in enumerate(log):
+            if "WARNING:" in line:
+                failures.append((i, line))
+
+        return failures
+
+    @staticmethod
+    def _report_failures(failures):
+        tpl = textwrap.dedent("""\
+        {n} failure{s}
+
+        {individual}
+        """)
+        joined = []
+        for i, (lineno, f) in enumerate(failures):
+            line = "Failure [{}]: {} (log line {})".format(i,
+                                                           f.strip(),
+                                                           lineno)
+            joined.append(line)
+        joined = '\n'.join(joined)
+
+        print(tpl.format(n=len(failures),
+                         s="s" if len(failures) != 1 else "",
+                         individual=joined))
+
 
 def main():
     cmds = [method for method in dir(DocBuilder) if not method.startswith('_')]
@@ -349,6 +396,13 @@ def main():
     argparser.add_argument('-v', action='count', dest='verbosity', default=0,
                            help=('increase verbosity (can be repeated), '
                                  'passed to the sphinx build command'))
+    argparser.add_argument("--warnings-are-errors",
+                           default=False,
+                           action="store_true",
+                           help="Whether to fail the build on warnings.")
+    argparser.add_argument("--log-file",
+                           default="doc-build.log",
+                           help="Log file of the build to lint for warnings.")
     args = argparser.parse_args()
 
     if args.command not in cmds:
@@ -368,7 +422,8 @@ def main():
     os.environ['MPLBACKEND'] = 'module://matplotlib.backends.backend_agg'
 
     builder = DocBuilder(args.num_jobs, not args.no_api, args.single,
-                         args.verbosity)
+                         args.verbosity, args.warnings_are_errors,
+                         args.log_file)
     getattr(builder, args.command)()
 
 

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -2603,3 +2603,12 @@ objects.
    generated/pandas.Series.ix
    generated/pandas.Series.imag
    generated/pandas.Series.real
+
+
+.. Can't convince sphinx to generate toctree for this class attribute.
+.. So we do it manually to avoid a warning
+
+.. toctree::
+   :hidden:
+
+   generated/pandas.api.extensions.ExtensionDtype.na_value
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
 * :ref:`Categorical <categorical>`
 * :ref:`Datetime with Timezone <timeseries.timezone_series>`
 * :ref:`Period <timeseries.periods>`
-* :ref:`Interval <advanced.indexing.intervallindex>`
+* :ref:`Interval <indexing.intervallindex>`
 
 Pandas uses the ``object`` dtype for storing strings.
 

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -345,6 +345,11 @@ Some other important things to know about the docs:
   Every method should be included in a ``toctree`` in ``api.rst``, else Sphinx
   will emit a warning.
 
+* The pandas CI system does not allow warnings in the documentation build.
+  If you cannot discover the cause of the warning from the build output, you can
+  try elevating warnings to errors with ``python make.py --warnings-are-errors``,
+  which will immediately halt the build when a warning is encountered.
+
 .. note::
 
     The ``.rst`` files are used to automatically generate Markdown and HTML versions

diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 .. ipython:: python
 
    df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
-
    gb = df.groupby('A')
 
    def replace(g):
-      mask = g < 0
-      g.loc[mask] = g[~mask].mean()
-      return g
+       mask = g < 0
+       return g.where(mask, g[~mask].mean())
 
    gb.transform(replace)
 

diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
@@ -73,8 +73,8 @@ large data to thin clients.
 `seaborn <https://seaborn.pydata.org>`__
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Seaborn is a Python visualization library based on `matplotlib
-<http://matplotlib.org>`__.  It provides a high-level, dataset-oriented
+Seaborn is a Python visualization library based on
+`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
 interface for creating attractive statistical graphics. The plotting functions
 in seaborn understand pandas objects and leverage pandas grouping operations
 internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
 (Note: HTML tables may or may not be
 compatible with non-HTML Jupyter output formats.)
 
-See :ref:`Options and Settings <options>` and :ref:`<options.available>`
+See :ref:`Options and Settings <options>` and :ref:`options.available`
 for pandas ``display.`` settings.
 
 `quantopian/qgrid <https://github.com/quantopian/qgrid>`__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
 Most pandas classes, methods and data attributes can be autocompleted in
 Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
 `IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
-and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
+and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
 and render Numpydoc documentation on pandas objects in rich text with Sphinx
 both automatically and on-demand.
 

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
 CSV & Text files
 ----------------
 
-The two workhorse functions for reading text files (a.k.a. flat files) are
-:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
-intelligently convert tabular data into a ``DataFrame`` object. See the
-:ref:`cookbook<cookbook.csv>` for some advanced strategies.
+The workhorse function for reading text files (a.k.a. flat files) is
+:func:`read_csv`. See the :ref:`cookbook<cookbook.csv>` for some advanced strategies.
 
 Parsing options
 '''''''''''''''
 
-The functions :func:`read_csv` and :func:`read_table` accept the following
-common arguments:
+:func:`read_csv` accepts the following common arguments:
 
 Basic
 +++++
@@ -780,8 +777,8 @@ Date Handling
 Specifying Date Columns
 +++++++++++++++++++++++
 
-To better facilitate working with datetime data, :func:`read_csv` and
-:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
+To better facilitate working with datetime data, :func:`read_csv`
+uses the keyword arguments ``parse_dates`` and ``date_parser``
 to allow users to specify a variety of columns and date/time formats to turn the
 input text data into ``datetime`` objects.
 
@@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
 
    print(open('data/mindex_ex.csv').read())
 
-The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
+The ``index_col`` argument to ``read_csv`` can take a list of
 column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
 returned object:
 
@@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
 
 .. ipython:: python
 
-    print(open('tmp2.sv').read())
-    pd.read_csv('tmp2.sv', sep=None, engine='python')
+   print(open('tmp2.sv').read())
+   pd.read_csv('tmp2.sv', sep=None, engine='python')
 
 .. _io.multiple_files:
 
@@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
 .. ipython:: python
 
    print(open('tmp.sv').read())
-   table = pd.read_table('tmp.sv', sep='|')
+   table = pd.read_csv('tmp.sv', sep='|')
    table
 
 
-By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
+By specifying a ``chunksize`` to ``read_csv``, the return
 value will be an iterable object of type ``TextFileReader``:
 
 .. ipython:: python
 
-   reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
+   reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
    reader
 
    for chunk in reader:
@@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
 
 .. ipython:: python
 
-   reader = pd.read_table('tmp.sv', sep='|', iterator=True)
+   reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
    reader.get_chunk(5)
 
 .. ipython:: python
@@ -3067,7 +3064,7 @@ Clipboard
 
 A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
 which takes the contents of the clipboard buffer and passes them to the
-``read_table`` method. For instance, you can copy the following text to the
+``read_csv`` method. For instance, you can copy the following text to the
 clipboard (CTRL-C on many operating systems):
 
 .. code-block:: python

diff --git a/doc/source/text.rst b/doc/source/text.rst
@@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
 
     s
     u
-    s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
+    s.str.cat([u.values,
+               u.index.astype(str).values], na_rep='-')
 
 All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
 
 .. ipython:: python
 
     v
-    s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
+    s.str.cat([u, v], join='outer', na_rep='-')
 
 If using ``join='right'`` on a list of ``others`` that contains different indexes,
 the union of these indexes will be used as the basis for the final concatenation: