Skip to content

Commit

Permalink
support for removing unused levels (internally)
Browse files Browse the repository at this point in the history
xref #2770
  • Loading branch information
jreback committed Mar 16, 2017
1 parent 54c6e93 commit ae6b9ec
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 5 deletions.
48 changes: 44 additions & 4 deletions pandas/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
labels = cartesian_product(labels)
return MultiIndex(levels, labels, sortorder=sortorder, names=names)

def _reconstruct(self, sort=False):
def _reconstruct(self, sort=False, remove_unused=False):
"""
reconstruct the MultiIndex
Expand All @@ -1188,20 +1188,32 @@ def _reconstruct(self, sort=False):
----------
sort: boolean, default False
monotonically sort the levels
remove_unused: boolean, default False
remove unsued levels
Returns
-------
MultiIndex
"""

if sort and remove_unused:
raise ValueError("only support one of sort / remove_unused")

if not (sort or remove_unused):
raise ValueError("must supply one of sort / remove_unsued")

levels = self.levels
labels = self.labels
new_levels = []
new_labels = []

if sort:

if self.is_monotonic:
return self

new_levels = []
new_labels = []
for lev, lab in zip(self.levels, self.labels):
for lev, lab in zip(levels, labels):

if lev.is_monotonic:
new_levels.append(lev)
Expand All @@ -1219,6 +1231,34 @@ def _reconstruct(self, sort=False):
new_levels.append(lev)
new_labels.append(lab)

elif remove_unused:

for lev, lab in zip(levels, labels):

uniques = np.sort(algos.unique(lab))

# nothing unused
if len(uniques) == len(lev):
new_levels.append(lev)
new_labels.append(lab)
continue

unused = list(reversed(sorted(set(
np.arange(len(lev))) - set(uniques))))

# new levels are simple
lev = lev.take(uniques)

# new labels, we remove the unsued
# by decrementing the labels for that value
# prob a better way
for u in unused:

lab = np.where(lab > u, lab - 1, lab)

new_levels.append(lev)
new_labels.append(lab)

return MultiIndex(new_levels, new_labels,
names=self.names, sortorder=self.sortorder,
verify_integrity=False)
Expand Down
38 changes: 37 additions & 1 deletion pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2411,7 +2411,19 @@ def test_is_monotonic(self):

self.assertFalse(i.is_monotonic)

def test_reconstruct(self):
def test_reconstruct_api(self):

mi = MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])

with pytest.raises(ValueError):
mi._reconstruct()

with pytest.raises(ValueError):
mi._reconstruct(sort=True, remove_unused=True)

def test_reconstruct_sort(self):

# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([
Expand Down Expand Up @@ -2456,6 +2468,30 @@ def test_reconstruct(self):
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))

def test_reconstruct_remove_unused(self):
# xref to GH 2770
df = DataFrame([['deleteMe', 1, 9],
['keepMe', 2, 9],
['keepMeToo', 3, 9]],
columns=['first', 'second', 'third'])
df2 = df.set_index(['first', 'second'], drop=False)
df2 = df2[df2['first'] != 'deleteMe']

# removed levels are there
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
[1, 2, 3]],
labels=[[1, 2], [1, 2]],
names=['first', 'second'])
result = df2.index
tm.assert_index_equal(result, expected)

expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
[2, 3]],
labels=[[0, 1], [0, 1]],
names=['first', 'second'])
result = df2.index._reconstruct(remove_unused=True)
tm.assert_index_equal(result, expected)

def test_isin(self):
values = [('foo', 2), ('bar', 3), ('quux', 4)]

Expand Down

0 comments on commit ae6b9ec

Please sign in to comment.