From 835c78eb627dc30e800f25271be5fb9a8c0d4070 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Nov 2021 01:12:24 +0100 Subject: [PATCH] [ArrayManager] TST: Convert skip into xfail and clean-up tests that now work (#44571) --- pandas/tests/frame/indexing/test_setitem.py | 2 -- pandas/tests/frame/methods/test_fillna.py | 3 --- .../tests/frame/methods/test_interpolate.py | 7 ++++-- pandas/tests/frame/methods/test_rename.py | 1 - pandas/tests/frame/test_api.py | 1 - pandas/tests/frame/test_arithmetic.py | 18 ++++++++++---- pandas/tests/frame/test_constructors.py | 24 +++++++++++++++---- pandas/tests/frame/test_stack_unstack.py | 3 --- pandas/tests/groupby/aggregate/test_other.py | 3 --- pandas/tests/groupby/test_categorical.py | 6 +---- pandas/tests/indexing/test_iloc.py | 2 -- pandas/tests/indexing/test_loc.py | 3 --- pandas/tests/indexing/test_partial.py | 6 ----- pandas/tests/io/json/test_normalize.py | 6 ----- pandas/tests/io/test_common.py | 1 - pandas/tests/io/test_pickle.py | 9 ++++--- pandas/tests/series/indexing/test_where.py | 10 ++++---- pandas/tests/test_downstream.py | 3 --- pandas/util/_test_decorators.py | 7 +++++- 19 files changed, 55 insertions(+), 60 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 15e62e27c08d5..597216f55e444 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -728,8 +728,6 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): class TestDataFrameSetItemWithExpansion: - # TODO(ArrayManager) update parent (_maybe_update_cacher) - @td.skip_array_manager_not_yet_implemented def test_setitem_listlike_views(self): # GH#38148 df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 7e486f9fac083..45a3bf9b145b9 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -232,7 +232,6 @@ def test_fillna_categorical_nan(self): df = DataFrame({"a": Categorical(idx)}) tm.assert_frame_equal(df.fillna(value=NaT), df) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) implement downcast def test_fillna_downcast(self): # GH#15277 # infer int64 from float64 @@ -258,7 +257,6 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns): expected["A"] = 0.0 tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) object upcasting def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) @@ -276,7 +274,6 @@ def test_fillna_dtype_conversion(self): expected = DataFrame("nan", index=range(3), columns=["A", "B"]) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) object upcasting @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0]) def test_fillna_dtype_conversion_equiv_replace(self, val): df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]}) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 7a749f3705e35..93225ff1050a3 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -328,10 +328,13 @@ def test_interp_string_axis(self, axis_name, axis_number): expected = df.interpolate(method="linear", axis=axis_number) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) support axis=1 @pytest.mark.parametrize("method", ["ffill", "bfill", "pad"]) - def test_interp_fillna_methods(self, axis, method): + def test_interp_fillna_methods(self, request, axis, method, using_array_manager): # GH 12918 + if using_array_manager and (axis == 1 or axis == "columns"): + # TODO(ArrayManager) support axis=1 + td.mark_array_manager_not_yet_implemented(request) + df = DataFrame( { "A": [1.0, 2.0, 3.0, 4.0, np.nan, 5.0], diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 0bd46cbb22f2a..1581bc8a0c70b 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -365,7 +365,6 @@ def test_rename_mapper_and_positional_arguments_raises(self): with pytest.raises(TypeError, match=msg): df.rename({}, columns={}, index={}) - @td.skip_array_manager_not_yet_implemented def test_rename_with_duplicate_columns(self): # GH#4403 df4 = DataFrame( diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 49649c1487f13..2e276f4f27a67 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -296,7 +296,6 @@ def test_attrs(self): result = df.rename(columns=str) assert result.attrs == {"version": 1} - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem (no copy) @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) def test_set_flags(self, allows_duplicate_labels, frame_or_series): obj = DataFrame({"A": [1, 2]}) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 6927a5927ef48..0e6b36a484c47 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -722,11 +722,16 @@ def test_df_add_2d_array_collike_broadcasts(self): result = collike + df tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes - def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): + def test_df_arith_2d_array_rowlike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): # GH#23000 opname = all_arithmetic_operators + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + arr = np.arange(6).reshape(3, 2) df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) @@ -744,11 +749,16 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes - def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): + def test_df_arith_2d_array_collike_broadcasts( + self, request, all_arithmetic_operators, using_array_manager + ): # GH#23000 opname = all_arithmetic_operators + if using_array_manager and opname in ("__rmod__", "__rfloordiv__"): + # TODO(ArrayManager) decide on dtypes + td.mark_array_manager_not_yet_implemented(request) + arr = np.arange(6).reshape(3, 2) df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6ec2b243d540a..fc5bffab118af 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2287,16 +2287,18 @@ def test_check_dtype_empty_numeric_column(self, dtype): assert data.b.dtype == dtype - # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype - @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize( "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES ) - def test_check_dtype_empty_string_column(self, dtype): + def test_check_dtype_empty_string_column(self, request, dtype, using_array_manager): # GH24386: Ensure dtypes are set correctly for an empty DataFrame. # Empty DataFrame is generated via dictionary data with non-overlapping columns. data = DataFrame({"a": [1, 2]}, columns=["b"], dtype=dtype) + if using_array_manager and dtype in tm.BYTES_DTYPES: + # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype + td.mark_array_manager_not_yet_implemented(request) + assert data.b.dtype.name == "object" def test_to_frame_with_falsey_names(self): @@ -2466,8 +2468,20 @@ def test_constructor_list_str_na(self, string_dtype): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("copy", [False, True]) - @td.skip_array_manager_not_yet_implemented - def test_dict_nocopy(self, copy, any_numeric_ea_dtype, any_numpy_dtype): + def test_dict_nocopy( + self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager + ): + if using_array_manager and not ( + (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES)) + or ( + any_numpy_dtype + in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES) + and copy + ) + ): + # TODO(ArrayManager) properly honor copy keyword for dict input + td.mark_array_manager_not_yet_implemented(request) + a = np.array([1, 2], dtype=any_numpy_dtype) b = np.array([3, 4], dtype=any_numpy_dtype) if b.dtype.kind in ["S", "U"]: diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 62512249dabfc..689c54b03b507 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -5,8 +5,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -949,7 +947,6 @@ def test_unstack_nan_index4(self): left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) tm.assert_frame_equal(left.unstack(), right) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug def test_unstack_nan_index5(self): # GH9497 - multiple unstack with nulls df = DataFrame( diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 66b968e01eef1..06044ddd3f4b8 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -8,8 +8,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -424,7 +422,6 @@ def __call__(self, x): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) columns with ndarrays def test_agg_over_numpy_arrays(): # GH 3788 df = DataFrame( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 339bb2c30736d..28128dee9da0f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -3,8 +3,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( Categorical, @@ -301,9 +299,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) -# TODO(ArrayManager) incorrect dtype for mean() -@td.skip_array_manager_not_yet_implemented -def test_observed(observed, using_array_manager): +def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper # gh-14942 (implement) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 7d2f68b00d95f..ccaaafa75f3af 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -106,8 +106,6 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage expected = DataFrame({0: cat, 1: range(3)}) tm.assert_frame_equal(df, expected) - # TODO(ArrayManager) does not yet update parent - @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("box", [array, Series]) def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager): # GH#38952 Case with not setting a full column diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6d162bbc27cd8..2a9ee81b7a23a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1143,9 +1143,6 @@ def test_loc_setitem_empty_append_single_value(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) - # TODO(ArrayManager) "split" path doesn't handle this case and gives wrong - # error message - @td.skip_array_manager_not_yet_implemented def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 82d55a7bf7189..95a9fd227c685 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -7,8 +7,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -355,10 +353,6 @@ def test_partial_setting2(self): df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) - # TODO(ArrayManager) - # df.loc[0] = Series(1, index=range(4)) case creates float columns - # instead of object dtype - @td.skip_array_manager_not_yet_implemented def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 272a4aa6723dd..231228ef6c0af 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -3,8 +3,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import ( DataFrame, Index, @@ -153,8 +151,6 @@ def test_simple_records(self): tm.assert_frame_equal(result, expected) - # TODO(ArrayManager) sanitize S/U numpy dtypes to object - @td.skip_array_manager_not_yet_implemented def test_simple_normalize(self, state_data): result = json_normalize(state_data[0], "counties") expected = DataFrame(state_data[0]["counties"]) @@ -381,8 +377,6 @@ def test_meta_parameter_not_modified(self): for val in ["metafoo", "metabar", "foo", "bar"]: assert val in result - # TODO(ArrayManager) sanitize S/U numpy dtypes to object - @td.skip_array_manager_not_yet_implemented def test_record_prefix(self, state_data): result = json_normalize(state_data[0], "counties") expected = DataFrame(state_data[0]["counties"]) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 699459ab3666d..5f1256c4e5ba3 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -384,7 +384,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module): @pytest.mark.filterwarnings( # pytables np.object usage "ignore:`np.object` is a deprecated alias:DeprecationWarning" ) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) IO HDF5 def test_write_fspath_hdf5(self): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 9253e5ae700c7..d656c56b0ee10 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -54,11 +54,9 @@ lzma = import_lzma() -# TODO(ArrayManager) pickling -pytestmark = [ - td.skip_array_manager_not_yet_implemented, - pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning"), -] +pytestmark = pytest.mark.filterwarnings( + "ignore:Timestamp.freq is deprecated:FutureWarning" +) @pytest.fixture(scope="module") @@ -612,6 +610,7 @@ def test_pickle_strings(string_series): tm.assert_series_equal(unp_series, string_series) +@td.skip_array_manager_invalid_test def test_pickle_preserves_block_ndim(): # GH#37631 ser = Series(list("abc")).astype("category").iloc[[0]] diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 88b75164d2f3e..bf4bee203a3a1 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -440,13 +440,15 @@ def test_where_categorical(frame_or_series): tm.assert_equal(exp, res) -# TODO(ArrayManager) DataFrame.values not yet correctly returning datetime array -# for categorical with datetime categories -@td.skip_array_manager_not_yet_implemented -def test_where_datetimelike_categorical(tz_naive_fixture): +def test_where_datetimelike_categorical(request, tz_naive_fixture, using_array_manager): # GH#37682 tz = tz_naive_fixture + if using_array_manager and tz is None: + # TODO(ArrayManager) DataFrame.values not yet correctly returning datetime array + # for categorical with datetime categories + td.mark_array_manager_not_yet_implemented(request) + dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None) lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT]) rvals = pd.Categorical([dr[0], pd.NaT, dr[2]]) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index a15658ad43498..1afd431a5da2d 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -29,9 +29,6 @@ def df(): return DataFrame({"A": [1, 2, 3]}) -# TODO(ArrayManager) dask is still accessing the blocks -# https://github.com/dask/dask/pull/7318 -@td.skip_array_manager_not_yet_implemented @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") def test_dask(df): diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index d5ffca36d325f..411f8537e76bd 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -285,7 +285,12 @@ def async_mark(): return async_mark -skip_array_manager_not_yet_implemented = pytest.mark.skipif( +def mark_array_manager_not_yet_implemented(request): + mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager") + request.node.add_marker(mark) + + +skip_array_manager_not_yet_implemented = pytest.mark.xfail( get_option("mode.data_manager") == "array", reason="Not yet implemented for ArrayManager", )