From 835c78eb627dc30e800f25271be5fb9a8c0d4070 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Nov 2021 01:12:24 +0100
Subject: [PATCH] [ArrayManager] TST: Convert skip into xfail and clean-up
 tests that now work (#44571)

---
 pandas/tests/frame/indexing/test_setitem.py   |  2 --
 pandas/tests/frame/methods/test_fillna.py     |  3 ---
 .../tests/frame/methods/test_interpolate.py   |  7 ++++--
 pandas/tests/frame/methods/test_rename.py     |  1 -
 pandas/tests/frame/test_api.py                |  1 -
 pandas/tests/frame/test_arithmetic.py         | 18 ++++++++++----
 pandas/tests/frame/test_constructors.py       | 24 +++++++++++++++----
 pandas/tests/frame/test_stack_unstack.py      |  3 ---
 pandas/tests/groupby/aggregate/test_other.py  |  3 ---
 pandas/tests/groupby/test_categorical.py      |  6 +----
 pandas/tests/indexing/test_iloc.py            |  2 --
 pandas/tests/indexing/test_loc.py             |  3 ---
 pandas/tests/indexing/test_partial.py         |  6 -----
 pandas/tests/io/json/test_normalize.py        |  6 -----
 pandas/tests/io/test_common.py                |  1 -
 pandas/tests/io/test_pickle.py                |  9 ++++---
 pandas/tests/series/indexing/test_where.py    | 10 ++++----
 pandas/tests/test_downstream.py               |  3 ---
 pandas/util/_test_decorators.py               |  7 +++++-
 19 files changed, 55 insertions(+), 60 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index 15e62e27c08d5..597216f55e444 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -728,8 +728,6 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):
 
 
 class TestDataFrameSetItemWithExpansion:
-    # TODO(ArrayManager) update parent (_maybe_update_cacher)
-    @td.skip_array_manager_not_yet_implemented
     def test_setitem_listlike_views(self):
         # GH#38148
         df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 7e486f9fac083..45a3bf9b145b9 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -232,7 +232,6 @@ def test_fillna_categorical_nan(self):
         df = DataFrame({"a": Categorical(idx)})
         tm.assert_frame_equal(df.fillna(value=NaT), df)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) implement downcast
     def test_fillna_downcast(self):
         # GH#15277
         # infer int64 from float64
@@ -258,7 +257,6 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         expected["A"] = 0.0
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) object upcasting
     def test_fillna_dtype_conversion(self):
         # make sure that fillna on an empty frame works
         df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
@@ -276,7 +274,6 @@ def test_fillna_dtype_conversion(self):
         expected = DataFrame("nan", index=range(3), columns=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) object upcasting
     @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
     def test_fillna_dtype_conversion_equiv_replace(self, val):
         df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]})
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 7a749f3705e35..93225ff1050a3 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -328,10 +328,13 @@ def test_interp_string_axis(self, axis_name, axis_number):
         expected = df.interpolate(method="linear", axis=axis_number)
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) support axis=1
     @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"])
-    def test_interp_fillna_methods(self, axis, method):
+    def test_interp_fillna_methods(self, request, axis, method, using_array_manager):
         # GH 12918
+        if using_array_manager and (axis == 1 or axis == "columns"):
+            # TODO(ArrayManager) support axis=1
+            td.mark_array_manager_not_yet_implemented(request)
+
         df = DataFrame(
             {
                 "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0],
diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py
index 0bd46cbb22f2a..1581bc8a0c70b 100644
--- a/pandas/tests/frame/methods/test_rename.py
+++ b/pandas/tests/frame/methods/test_rename.py
@@ -365,7 +365,6 @@ def test_rename_mapper_and_positional_arguments_raises(self):
         with pytest.raises(TypeError, match=msg):
             df.rename({}, columns={}, index={})
 
-    @td.skip_array_manager_not_yet_implemented
     def test_rename_with_duplicate_columns(self):
         # GH#4403
         df4 = DataFrame(
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 49649c1487f13..2e276f4f27a67 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -296,7 +296,6 @@ def test_attrs(self):
         result = df.rename(columns=str)
         assert result.attrs == {"version": 1}
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) setitem (no copy)
     @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
     def test_set_flags(self, allows_duplicate_labels, frame_or_series):
         obj = DataFrame({"A": [1, 2]})
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 6927a5927ef48..0e6b36a484c47 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -722,11 +722,16 @@ def test_df_add_2d_array_collike_broadcasts(self):
         result = collike + df
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) decide on dtypes
-    def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
+    def test_df_arith_2d_array_rowlike_broadcasts(
+        self, request, all_arithmetic_operators, using_array_manager
+    ):
         # GH#23000
         opname = all_arithmetic_operators
 
+        if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
+            # TODO(ArrayManager) decide on dtypes
+            td.mark_array_manager_not_yet_implemented(request)
+
         arr = np.arange(6).reshape(3, 2)
         df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
 
@@ -744,11 +749,16 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
         result = getattr(df, opname)(rowlike)
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) decide on dtypes
-    def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators):
+    def test_df_arith_2d_array_collike_broadcasts(
+        self, request, all_arithmetic_operators, using_array_manager
+    ):
         # GH#23000
         opname = all_arithmetic_operators
 
+        if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
+            # TODO(ArrayManager) decide on dtypes
+            td.mark_array_manager_not_yet_implemented(request)
+
         arr = np.arange(6).reshape(3, 2)
         df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 6ec2b243d540a..fc5bffab118af 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -2287,16 +2287,18 @@ def test_check_dtype_empty_numeric_column(self, dtype):
 
         assert data.b.dtype == dtype
 
-    # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize(
         "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
     )
-    def test_check_dtype_empty_string_column(self, dtype):
+    def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager):
         # GH24386: Ensure dtypes are set correctly for an empty DataFrame.
         # Empty DataFrame is generated via dictionary data with non-overlapping columns.
         data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype)
 
+        if using_array_manager and dtype in tm.BYTES_DTYPES:
+            # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype
+            td.mark_array_manager_not_yet_implemented(request)
+
         assert data.b.dtype.name == "object"
 
     def test_to_frame_with_falsey_names(self):
@@ -2466,8 +2468,20 @@ def test_constructor_list_str_na(self, string_dtype):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("copy", [False, True])
-    @td.skip_array_manager_not_yet_implemented
-    def test_dict_nocopy(self, copy, any_numeric_ea_dtype, any_numpy_dtype):
+    def test_dict_nocopy(
+        self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager
+    ):
+        if using_array_manager and not (
+            (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES))
+            or (
+                any_numpy_dtype
+                in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES)
+                and copy
+            )
+        ):
+            # TODO(ArrayManager) properly honor copy keyword for dict input
+            td.mark_array_manager_not_yet_implemented(request)
+
         a = np.array([1, 2], dtype=any_numpy_dtype)
         b = np.array([3, 4], dtype=any_numpy_dtype)
         if b.dtype.kind in ["S", "U"]:
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 62512249dabfc..689c54b03b507 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -5,8 +5,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -949,7 +947,6 @@ def test_unstack_nan_index4(self):
         left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
         tm.assert_frame_equal(left.unstack(), right)
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) MultiIndex bug
     def test_unstack_nan_index5(self):
         # GH9497 - multiple unstack with nulls
         df = DataFrame(
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 66b968e01eef1..06044ddd3f4b8 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -424,7 +422,6 @@ def __call__(self, x):
         tm.assert_frame_equal(result, expected)
 
 
-@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) columns with ndarrays
 def test_agg_over_numpy_arrays():
     # GH 3788
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 339bb2c30736d..28128dee9da0f 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -301,9 +299,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-# TODO(ArrayManager) incorrect dtype for mean()
-@td.skip_array_manager_not_yet_implemented
-def test_observed(observed, using_array_manager):
+def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 7d2f68b00d95f..ccaaafa75f3af 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -106,8 +106,6 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         expected = DataFrame({0: cat, 1: range(3)})
         tm.assert_frame_equal(df, expected)
 
-    # TODO(ArrayManager) does not yet update parent
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize("box", [array, Series])
     def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
         # GH#38952 Case with not setting a full column
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 6d162bbc27cd8..2a9ee81b7a23a 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1143,9 +1143,6 @@ def test_loc_setitem_empty_append_single_value(self):
         df.loc[0, "x"] = expected.loc[0, "x"]
         tm.assert_frame_equal(df, expected)
 
-    # TODO(ArrayManager) "split" path doesn't handle this case and gives wrong
-    # error message
-    @td.skip_array_manager_not_yet_implemented
     def test_loc_setitem_empty_append_raises(self):
         # GH6173, various appends to an empty dataframe
 
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 82d55a7bf7189..95a9fd227c685 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -355,10 +353,6 @@ def test_partial_setting2(self):
         df.at[dates[-1] + dates.freq, 0] = 7
         tm.assert_frame_equal(df, expected)
 
-    # TODO(ArrayManager)
-    # df.loc[0] = Series(1, index=range(4)) case creates float columns
-    # instead of object dtype
-    @td.skip_array_manager_not_yet_implemented
     def test_partial_setting_mixed_dtype(self):
 
         # in a mixed dtype environment, try to preserve dtypes
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 272a4aa6723dd..231228ef6c0af 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas import (
     DataFrame,
     Index,
@@ -153,8 +151,6 @@ def test_simple_records(self):
 
         tm.assert_frame_equal(result, expected)
 
-    # TODO(ArrayManager) sanitize S/U numpy dtypes to object
-    @td.skip_array_manager_not_yet_implemented
     def test_simple_normalize(self, state_data):
         result = json_normalize(state_data[0], "counties")
         expected = DataFrame(state_data[0]["counties"])
@@ -381,8 +377,6 @@ def test_meta_parameter_not_modified(self):
         for val in ["metafoo", "metabar", "foo", "bar"]:
             assert val in result
 
-    # TODO(ArrayManager) sanitize S/U numpy dtypes to object
-    @td.skip_array_manager_not_yet_implemented
     def test_record_prefix(self, state_data):
         result = json_normalize(state_data[0], "counties")
         expected = DataFrame(state_data[0]["counties"])
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 699459ab3666d..5f1256c4e5ba3 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -384,7 +384,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
     @pytest.mark.filterwarnings(  # pytables np.object usage
         "ignore:`np.object` is a deprecated alias:DeprecationWarning"
     )
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) IO HDF5
     def test_write_fspath_hdf5(self):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 9253e5ae700c7..d656c56b0ee10 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -54,11 +54,9 @@
 lzma = import_lzma()
 
 
-# TODO(ArrayManager) pickling
-pytestmark = [
-    td.skip_array_manager_not_yet_implemented,
-    pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning"),
-]
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Timestamp.freq is deprecated:FutureWarning"
+)
 
 
 @pytest.fixture(scope="module")
@@ -612,6 +610,7 @@ def test_pickle_strings(string_series):
     tm.assert_series_equal(unp_series, string_series)
 
 
+@td.skip_array_manager_invalid_test
 def test_pickle_preserves_block_ndim():
     # GH#37631
     ser = Series(list("abc")).astype("category").iloc[[0]]
diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py
index 88b75164d2f3e..bf4bee203a3a1 100644
--- a/pandas/tests/series/indexing/test_where.py
+++ b/pandas/tests/series/indexing/test_where.py
@@ -440,13 +440,15 @@ def test_where_categorical(frame_or_series):
     tm.assert_equal(exp, res)
 
 
-# TODO(ArrayManager) DataFrame.values not yet correctly returning datetime array
-# for categorical with datetime categories
-@td.skip_array_manager_not_yet_implemented
-def test_where_datetimelike_categorical(tz_naive_fixture):
+def test_where_datetimelike_categorical(request, tz_naive_fixture, using_array_manager):
     # GH#37682
     tz = tz_naive_fixture
 
+    if using_array_manager and tz is None:
+        # TODO(ArrayManager) DataFrame.values not yet correctly returning datetime array
+        # for categorical with datetime categories
+        td.mark_array_manager_not_yet_implemented(request)
+
     dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
     lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
     rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index a15658ad43498..1afd431a5da2d 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -29,9 +29,6 @@ def df():
     return DataFrame({"A": [1, 2, 3]})
 
 
-# TODO(ArrayManager) dask is still accessing the blocks
-# https://github.com/dask/dask/pull/7318
-@td.skip_array_manager_not_yet_implemented
 @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
 def test_dask(df):
 
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index d5ffca36d325f..411f8537e76bd 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -285,7 +285,12 @@ def async_mark():
     return async_mark
 
 
-skip_array_manager_not_yet_implemented = pytest.mark.skipif(
+def mark_array_manager_not_yet_implemented(request):
+    mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
+    request.node.add_marker(mark)
+
+
+skip_array_manager_not_yet_implemented = pytest.mark.xfail(
     get_option("mode.data_manager") == "array",
     reason="Not yet implemented for ArrayManager",
 )