From 985013b6efbf1b877d337731aef467a3b6fe4854 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 29 May 2018 07:02:31 -0400
Subject: [PATCH 1/2] BUG: dropna incorrect with categoricals in pivot_table

closes #21133
---
 doc/source/whatsnew/v0.23.1.txt    |  1 +
 pandas/core/reshape/pivot.py       | 20 ++++++++++++++++++--
 pandas/tests/reshape/test_pivot.py | 26 +++++++++++++++++++++++++-
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index 974527624a312..e679c0d305827 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -111,6 +111,7 @@ Reshaping
 ^^^^^^^^^
 
 - Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`)
+- Bug in :func:`pivot_table` with ``dropna=True``, an ordered ``Categorical`` for the index pivots and missing values in the ``values`` would give a mis-ordered result (:issue:`21133`)
 -
 
 Other
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index e02420323704e..9a2ad5d13d77a 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -1,8 +1,10 @@
 # pylint: disable=E1103
 
 
-from pandas.core.dtypes.common import is_list_like, is_scalar
+from pandas.core.dtypes.common import (
+    is_list_like, is_scalar, is_integer_dtype)
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 
 from pandas.core.reshape.concat import concat
 from pandas.core.series import Series
@@ -79,8 +81,22 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
                 pass
         values = list(values)
 
-    grouped = data.groupby(keys, observed=dropna)
+    # group by the cartesian product of the grouper
+    # if we have a categorical
+    grouped = data.groupby(keys, observed=False)
     agged = grouped.agg(aggfunc)
+    if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
+        agged = agged.dropna(how='all')
+
+        # gh-21133
+        # we want to down cast if
+        # the original values are ints
+        # as we grouped with a NaN value
+        # and then dropped, coercing to floats
+        for v in [v for v in values if v in data and v in agged]:
+            if (is_integer_dtype(data[v]) and
+                    not is_integer_dtype(agged[v])):
+                agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
 
     table = agged
     if table.index.nlevels > 1:
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index d2cf3fc11e165..3ec60d50f2792 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 
 from datetime import datetime, date, timedelta
 
@@ -16,6 +17,11 @@
 from pandas.api.types import CategoricalDtype as CDT
 
 
+@pytest.fixture(params=[True, False])
+def dropna(request):
+    return request.param
+
+
 class TestPivotTable(object):
 
     def setup_method(self, method):
@@ -109,7 +115,6 @@ def test_pivot_table_categorical(self):
             index=exp_index)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize('dropna', [True, False])
     def test_pivot_table_dropna_categoricals(self, dropna):
         # GH 15193
         categories = ['a', 'b', 'c', 'd']
@@ -137,6 +142,25 @@ def test_pivot_table_dropna_categoricals(self, dropna):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_pivot_with_non_observable_dropna(self, dropna):
+        # gh-21133
+        df = pd.DataFrame(
+            {'A': pd.Categorical([np.nan, 'low', 'high', 'low', 'high'],
+                                 categories=['low', 'high'],
+                                 ordered=True),
+             'B': range(5)})
+
+        result = df.pivot_table(index='A', values='B', dropna=dropna)
+        expected = pd.DataFrame(
+            {'B': [2, 3]},
+            index=pd.Index(
+                pd.Categorical.from_codes([0, 1],
+                                          categories=['low', 'high'],
+                                          ordered=True),
+                name='A'))
+
+        tm.assert_frame_equal(result, expected)
+
     def test_pass_array(self):
         result = self.data.pivot_table(
             'D', index=self.data.A, columns=self.data.C)

From 683fd9e578bdfcd03d222551429cbf0fdaa8d76f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 8 Jun 2018 00:04:10 +0200
Subject: [PATCH 2/2] correct whatsnew message

---
 doc/source/whatsnew/v0.23.1.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index 9deb7603d5964..97a5975dad9a6 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -29,7 +29,8 @@ Fixed Regressions
 - Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`)
 - Bug preventing pandas from being importable with -OO optimization (:issue:`21071`)
 - Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`)
-- Regression in :func:`pivot_table` where an ordered ``Categorical`` for the ``index`` and missing values in the ``values`` would give a mis-ordered result (:issue:`21133`)
+- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing
+  values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`)
 
 
 .. _whatsnew_0231.performance: