diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 722e19d2703b5..6cfc90c14ec70 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -432,7 +432,7 @@ Other API Changes - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - +- :class:`MultiIndex` constructor now checks if the values of each level are unique when ``verify_integrity=True`` (:issue:`17464`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8b2cf0e7c0b40..5e463d276d655 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -147,6 +147,11 @@ def _verify_integrity(self, labels=None, levels=None): " level (%d). NOTE: this index is in an" " inconsistent state" % (i, label.max(), len(level))) + for i, level in enumerate(levels): + if not level.is_unique: + raise ValueError("Level values must be unique: {0!r}" + " on level {1}".format([value for value + in level], i)) def _get_levels(self): return self._levels diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 86308192c9166..959d72e49b676 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -588,6 +588,14 @@ def test_constructor_mismatched_label_levels(self): with tm.assert_raises_regex(ValueError, label_error): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] + def test_constructor_non_unique_level_values(self): + # GH #17464 + with tm.assert_raises_regex(ValueError, '^Level values'): + MultiIndex(levels=[[0, 1], [0, 0, 1, 1]], + labels=[[0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1]], + names=[u'idx0', u'idx1']) + def assert_multiindex_copied(self, copy, original): # Levels should be (at least, shallow copied) tm.assert_copy(copy.levels, original.levels)