Skip to content

Commit

Permalink
BUG: fix MultiIndex.remove_unused_levels() when index contains NaNs
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz committed Nov 22, 2017
1 parent 103ea6f commit 6d1fcc1
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 13 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Indexing

- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
-
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
-

I/O
Expand Down
30 changes: 18 additions & 12 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,25 +1365,31 @@ def remove_unused_levels(self):
new_labels = []

changed = False
for lev, lab in zip(self.levels, self.labels):
for idx, (lev, lab) in enumerate(zip(self.levels, self.labels)):
null_idces = np.where(lab == -1)[0]

if len(null_idces):
lab = np.delete(lab, null_idces)

uniques = algos.unique(lab)

# nothing unused
if len(uniques) == len(lev):
new_levels.append(lev)
new_labels.append(lab)
continue
if len(uniques) != len(lev):
changed = True

# labels get mapped from uniques to 0:len(uniques)
label_mapping = np.zeros(len(lev))
label_mapping[uniques] = np.arange(len(uniques))

changed = True
lab = label_mapping[lab]

# labels get mapped from uniques to 0:len(uniques)
label_mapping = np.zeros(len(lev))
label_mapping[uniques] = np.arange(len(uniques))
lab = label_mapping[lab]
# new levels are simple
lev = lev.take(uniques)

# new levels are simple
lev = lev.take(uniques)
if len(null_idces):
lab = np.insert(lab, null_idces, -1)
else:
lab = self.labels[idx]

new_levels.append(lev)
new_labels.append(lab)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2629,6 +2629,20 @@ def test_reconstruct_remove_unused(self):
tm.assert_index_equal(result2, expected)
assert result2.is_(result)

@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
['a', 'd', 'b', 'unused']])
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
['w', 'x', 'y', 'z', 'unused']])
def test_remove_unused_nan(self, level0, level1):
# GH 18417
mi = pd.MultiIndex(levels=[level0, level1],
labels=[[0, 2, -1, 1, 1], [0, 1, 2, 3, 2]])

result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert('unused' not in result.levels[level])

@pytest.mark.parametrize('first_type,second_type', [
('int64', 'int64'),
('datetime64[D]', 'str')])
Expand Down

0 comments on commit 6d1fcc1

Please sign in to comment.