Skip to content

Commit

Permalink
BUG: Try to sort result of Index.union rather than guessing sortability
Browse files Browse the repository at this point in the history
closes #17376
  • Loading branch information
toobaz committed Aug 30, 2017
1 parent 0d676a3 commit 0a725fc
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 77 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ Indexing
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)

I/O
^^^
Expand Down
28 changes: 7 additions & 21 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2195,35 +2195,21 @@ def union(self, other):
value_set = set(self._values)
result.extend([x for x in other._values if x not in value_set])
else:
indexer = self.get_indexer(other)
indexer, = (indexer == -1).nonzero()

indexer = np.where(self.get_indexer(other) == -1)[0]
if len(indexer) > 0:
other_diff = algos.take_nd(other._values, indexer,
allow_fill=False)
result = _concat._concat_compat((self._values, other_diff))

try:
self._values[0] < other_diff[0]
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)
else:
types = frozenset((self.inferred_type,
other.inferred_type))
if not types & _unsortable_types:
result.sort()

else:
result = self._values

try:
result = np.sort(result)
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)
try:
result = sorting.safe_sort(result)
except TypeError as e:
warnings.warn("%s, sort order is undefined for "
"incomparable objects" % e, RuntimeWarning,
stacklevel=3)

# for subclasses
return self._wrap_union_result(other, result)
Expand Down
56 changes: 21 additions & 35 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,7 @@ def test_union(self):
expected = Index(list('ab'), name='A')
tm.assert_index_equal(union, expected)

with tm.assert_produces_warning(RuntimeWarning):
firstCat = self.strIndex.union(self.dateIndex)
firstCat = self.strIndex.union(self.dateIndex)
secondCat = self.strIndex.union(self.strIndex)

if self.dateIndex.dtype == np.object_:
Expand Down Expand Up @@ -1309,29 +1308,26 @@ def test_drop(self):
expected = Index([1, 2])
tm.assert_index_equal(dropped, expected)

def test_tuple_union_bug(self):
import pandas
import numpy as np

def test_tuples_intersection_union(self):
aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
dtype=[('num', int), ('let', 'a1')])
aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'),
(2, 'B'), (1, 'C'), (2, 'C')],
dtype=[('num', int), ('let', 'a1')])

idx1 = pandas.Index(aidx1)
idx2 = pandas.Index(aidx2)
idx1 = Index(aidx1)
idx2 = Index(aidx2)

# intersection broken?
# intersection
int_idx = idx1.intersection(idx2)
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
# needs to be 1d like idx1 and idx2
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
assert int_idx.ndim == 1
tm.assert_index_equal(int_idx, expected)

# union broken
# GH 17376 (union)
union_idx = idx1.union(idx2)
expected = idx2
expected = idx2.sort_values()
assert union_idx.ndim == 1
tm.assert_index_equal(union_idx, expected)

Expand Down Expand Up @@ -1507,13 +1503,19 @@ def test_outer_join_sort(self):
left_idx = Index(np.random.permutation(15))
right_idx = tm.makeDateIndex(10)

with tm.assert_produces_warning(RuntimeWarning):
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
joined = left_idx.join(right_idx, how='outer')
else:
joined = left_idx.join(right_idx, how='outer')

# right_idx in this case because DatetimeIndex has join precedence over
# Int64Index
with tm.assert_produces_warning(RuntimeWarning):
expected = right_idx.astype(object).union(left_idx.astype(object))
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
expected = right_idx.astype(object).union(left_idx)
else:
expected = right_idx.astype(object).union(left_idx)
tm.assert_index_equal(joined, expected)

def test_nan_first_take_datetime(self):
Expand Down Expand Up @@ -1897,10 +1899,7 @@ def test_copy_name(self):
s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])

warning_type = RuntimeWarning if PY3 else None
with tm.assert_produces_warning(warning_type):
# Python 3: Unorderable types
s3 = s1 * s2
s3 = s1 * s2

assert s3.index.name == 'mario'

Expand Down Expand Up @@ -1933,27 +1932,14 @@ def test_union_base(self):
first = idx[3:]
second = idx[:5]

if PY3:
with tm.assert_produces_warning(RuntimeWarning):
# unorderable types
result = first.union(second)
expected = Index(['b', 2, 'c', 0, 'a', 1])
tm.assert_index_equal(result, expected)
else:
result = first.union(second)
expected = Index(['b', 2, 'c', 0, 'a', 1])
tm.assert_index_equal(result, expected)
expected = Index([0, 1, 2, 'a', 'b', 'c'])
result = first.union(second)
tm.assert_index_equal(result, expected)

# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if PY3:
with tm.assert_produces_warning(RuntimeWarning):
# unorderable types
result = first.union(case)
assert tm.equalContents(result, idx)
else:
result = first.union(case)
assert tm.equalContents(result, idx)

Expand Down
26 changes: 5 additions & 21 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,11 +1198,7 @@ def test_comparison_label_based(self):
assert_series_equal(result, a[a])

for e in [Series(['z'])]:
if compat.PY3:
with tm.assert_produces_warning(RuntimeWarning):
result = a[a | e]
else:
result = a[a | e]
result = a[a | e]
assert_series_equal(result, a[a])

# vs scalars
Expand Down Expand Up @@ -1394,24 +1390,12 @@ def test_operators_bitwise(self):
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])

# s_0123 will be all false now because of reindexing like s_tft
if compat.PY3:
# unable to sort incompatible object via .union.
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
with tm.assert_produces_warning(RuntimeWarning):
assert_series_equal(s_tft & s_0123, exp)
else:
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_tft & s_0123, exp)
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_tft & s_0123, exp)

# s_tft will be all false now because of reindexing like s_0123
if compat.PY3:
# unable to sort incompatible object via .union.
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
with tm.assert_produces_warning(RuntimeWarning):
assert_series_equal(s_0123 & s_tft, exp)
else:
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_0123 & s_tft, exp)
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_0123 & s_tft, exp)

assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
Expand Down

0 comments on commit 0a725fc

Please sign in to comment.