Skip to content

Commit

Permalink
BUG: support "fill_value" for ".unstack()" called with list of levels (
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz authored and dmanikowski-reef committed Oct 16, 2017
1 parent ff32d61 commit 1e0a3e5
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,7 @@ Reshaping
- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`)
- Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`)
- Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`)
- Bug in :func:`unstack` which, when called on a list of levels, would discard the ``fillna`` argument (:issue:`13971`)

Numeric
^^^^^^^
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def get_new_index(self):
names=self.new_index_names, verify_integrity=False)


def _unstack_multiple(data, clocs):
def _unstack_multiple(data, clocs, fill_value=None):
if len(clocs) == 0:
return data

Expand Down Expand Up @@ -330,7 +330,7 @@ def _unstack_multiple(data, clocs):
if isinstance(data, Series):
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
unstacked = dummy.unstack('__placeholder__', fill_value=fill_value)
new_levels = clevels
new_names = cnames
new_labels = recons_labels
Expand All @@ -347,7 +347,7 @@ def _unstack_multiple(data, clocs):
dummy = data.copy()
dummy.index = dummy_index

unstacked = dummy.unstack('__placeholder__')
unstacked = dummy.unstack('__placeholder__', fill_value=fill_value)
if isinstance(unstacked, Series):
unstcols = unstacked.index
else:
Expand Down Expand Up @@ -460,7 +460,7 @@ def unstack(obj, level, fill_value=None):
if len(level) != 1:
# _unstack_multiple only handles MultiIndexes,
# and isn't needed for a single level
return _unstack_multiple(obj, level)
return _unstack_multiple(obj, level, fill_value=fill_value)
else:
level = level[0]

Expand Down
38 changes: 31 additions & 7 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,22 @@ def test_pivot_index_none(self):
tm.assert_frame_equal(result, expected)

def test_stack_unstack(self):
f = self.frame.copy()
f[:] = np.arange(np.prod(f.shape)).reshape(f.shape)
df = self.frame.copy()
df[:] = np.arange(np.prod(df.shape)).reshape(df.shape)

stacked = f.stack()
stacked = df.stack()
stacked_df = DataFrame({'foo': stacked, 'bar': stacked})

unstacked = stacked.unstack()
unstacked_df = stacked_df.unstack()

assert_frame_equal(unstacked, f)
assert_frame_equal(unstacked_df['bar'], f)
assert_frame_equal(unstacked, df)
assert_frame_equal(unstacked_df['bar'], df)

unstacked_cols = stacked.unstack(0)
unstacked_cols_df = stacked_df.unstack(0)
assert_frame_equal(unstacked_cols.T, f)
assert_frame_equal(unstacked_cols_df['bar'].T, f)
assert_frame_equal(unstacked_cols.T, df)
assert_frame_equal(unstacked_cols_df['bar'].T, df)

def test_unstack_fill(self):

Expand All @@ -154,6 +154,30 @@ def test_unstack_fill(self):
index=['x', 'y', 'z'], dtype=np.float)
assert_frame_equal(result, expected)

# GH #13971: fill_value when unstacking multiple levels:
df = DataFrame({'x': ['a', 'a', 'b'],
'y': ['j', 'k', 'j'],
'z': [0, 1, 2],
'w': [0, 1, 2]}).set_index(['x', 'y', 'z'])
unstacked = df.unstack(['x', 'y'], fill_value=0)
key = ('w', 'b', 'j')
expected = unstacked[key]
result = pd.Series([0, 0, 2], index=unstacked.index, name=key)
assert_series_equal(result, expected)

stacked = unstacked.stack(['x', 'y'])
stacked.index = stacked.index.reorder_levels(df.index.names)
# Workaround for GH #17886 (unnecessarily casts to float):
stacked = stacked.astype(np.int64)
result = stacked.loc[df.index]
assert_frame_equal(result, df)

# From a series
s = df['w']
result = s.unstack(['x', 'y'], fill_value=0)
expected = unstacked['w']
assert_frame_equal(result, expected)

def test_unstack_fill_frame(self):

# From a dataframe
Expand Down

0 comments on commit 1e0a3e5

Please sign in to comment.