Unpin pandas version (#1708)

alteryx · Mar 16, 2021 · 9576d5d · 9576d5d
1 parent 888dce8
commit 9576d5d
Show file tree

Hide file tree

Showing 15 changed files with 24 additions and 27 deletions.
diff --git a/core-requirements.txt b/core-requirements.txt
@@ -1,5 +1,5 @@
 numpy>=1.19.1
-pandas>=1.1.0,<1.2.0
+pandas>=1.1.0
 scipy>=1.2.1
 scikit-learn>=0.23.1
 scikit-optimize>=0.8.1

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -36,6 +36,7 @@ Release Notes
         * Aggregating the shap values for predictions that we know the provenance of, e.g. OHE, text, and date-time. :pr:`1901`
         * Improved error message when custom objective is passed as a string in ``pipeline.score`` :pr:`1941`
         * Added ``score_pipelines`` and ``train_pipelines`` methods to ``AutoMLSearch`` :pr:`1913`
+        * Added support for ``pandas`` version 1.2.0 :pr:`1708`
         * Added ``score_batch`` and ``train_batch`` abstact methods to ``EngineBase`` and implementations in ``SequentialEngine`` :pr:`1913`
     * Fixes
         * Removed CI check for ``check_dependencies_updated_linux`` :pr:`1950`

diff --git a/evalml/objectives/objective_base.py b/evalml/objectives/objective_base.py
@@ -91,7 +91,7 @@ def _standardize_input_type(input_data):
             pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided.
         """
         if isinstance(input_data, (pd.Series, pd.DataFrame)):
-            return input_data
+            return _convert_woodwork_types_wrapper(input_data)
         if isinstance(input_data, ww.DataTable):
             return _convert_woodwork_types_wrapper(input_data.to_dataframe())
         if isinstance(input_data, ww.DataColumn):

diff --git a/evalml/pipelines/components/transformers/encoders/onehot_encoder.py b/evalml/pipelines/components/transformers/encoders/onehot_encoder.py
@@ -152,7 +152,6 @@ def transform(self, X, y=None):
             X_cat = pd.DataFrame(self._encoder.transform(X_copy[self.features_to_encode]).toarray(), index=X_copy.index)
             X_cat.columns = self.get_feature_names()
             X_t = pd.concat([X_t, X_cat], axis=1)
-
         return _retain_custom_types_and_initalize_woodwork(X_ww, X_t)
 
     def _handle_parameter_handle_missing(self, X):

diff --git a/evalml/pipelines/components/transformers/preprocessing/delayed_feature_transformer.py b/evalml/pipelines/components/transformers/preprocessing/delayed_feature_transformer.py
@@ -94,15 +94,13 @@ def transform(self, X, y=None):
         X_ww = infer_feature_types(X)
         categorical_columns = self._get_categorical_columns(X_ww)
         X = _convert_woodwork_types_wrapper(X_ww.to_dataframe())
-
         if self.delay_features and len(X) > 0:
             X_categorical = self._encode_X_while_preserving_index(X[categorical_columns])
             for col_name in X:
                 col = X[col_name]
                 if col_name in categorical_columns:
                     col = X_categorical[col_name]
                 X = X.assign(**{f"{col_name}_delay_{t}": col.shift(t) for t in range(1, self.max_delay + 1)})
-
         # Handle cases where the target was passed in
         if self.delay_target and y is not None:
             y = infer_feature_types(y)

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -929,9 +929,9 @@ def test_describe_pipeline(mock_fit, mock_score, return_dict, caplog, X_y_binary
     assert "* strategy : mode" in out
     assert "Total training time (including CV): " in out
     assert "Log Loss Binary # Training # Validation" in out
-    assert "0                      1.000     66.000       34.000" in out
-    assert "1                      1.000     67.000       33.000" in out
-    assert "2                      1.000     67.000       33.000" in out
+    assert "0                      1.000       66.0         34.0" in out
+    assert "1                      1.000       67.0         33.0" in out
+    assert "2                      1.000       67.0         33.0" in out
     assert "mean                   1.000          -            -" in out
     assert "std                    0.000          -            -" in out
     assert "coef of var            0.000          -            -" in out

diff --git a/evalml/tests/data_checks_tests/test_class_imbalance_data_check.py b/evalml/tests/data_checks_tests/test_class_imbalance_data_check.py
@@ -145,10 +145,10 @@ def test_class_imbalance_data_check_multiclass(input_type):
     class_imbalance_check = ClassImbalanceDataCheck(num_cv_folds=2)
     assert class_imbalance_check.validate(X, y_imbalanced_cv) == {
         "warnings": [],
-        "errors": [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 4 instances: [2, 0]",
+        "errors": [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 4 instances: [0, 2]",
                                   data_check_name=class_imbalance_data_check_name,
                                   message_code=DataCheckMessageCode.CLASS_IMBALANCE_BELOW_FOLDS,
-                                  details={"target_values": [2, 0]}).to_dict()],
+                                  details={"target_values": [0, 2]}).to_dict()],
         "actions": []
     }
 

diff --git a/evalml/tests/data_checks_tests/test_data_checks.py b/evalml/tests/data_checks_tests/test_data_checks.py
@@ -115,10 +115,10 @@ def test_default_data_checks_classification(input_type):
 
     data_checks = DefaultDataChecks("binary", get_default_primary_search_objective("binary"))
 
-    imbalance = [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 6 instances: [1.0, 0.0]",
+    imbalance = [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 6 instances: [0.0, 1.0]",
                                 data_check_name="ClassImbalanceDataCheck",
                                 message_code=DataCheckMessageCode.CLASS_IMBALANCE_BELOW_FOLDS,
-                                details={"target_values": [1.0, 0.0]}).to_dict()]
+                                details={"target_values": [0.0, 1.0]}).to_dict()]
 
     assert data_checks.validate(X, y) == {"warnings": messages[:3], "errors": messages[3:] + imbalance, "actions": []}
 

diff --git a/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py b/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py
@@ -72,7 +72,7 @@ def test_invalid_target_data_check_multiclass_two_examples_per_class():
         "errors": [DataCheckError(message=expected_message,
                                   data_check_name=invalid_targets_data_check_name,
                                   message_code=DataCheckMessageCode.TARGET_BINARY_NOT_TWO_EXAMPLES_PER_CLASS,
-                                  details={"least_populated_class_labels": [1, 0]}).to_dict()],
+                                  details={"least_populated_class_labels": [0, 1]}).to_dict()],
         "actions": []
     }
 

diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -12,7 +12,7 @@ matplotlib==3.3.4
 networkx==2.5
 nlp-primitives==1.1.0
 numpy==1.20.1
-pandas==1.1.5
+pandas==1.2.3
 plotly==4.14.3
 psutil==5.8.0
 pyzmq==21.0.2

diff --git a/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py b/evalml/tests/model_understanding_tests/prediction_explanations_tests/test_explainers.py
@@ -632,8 +632,8 @@ def transform_y_for_problem_type(problem_type, y):
                                   'datetime_month_0', 'datetime_month_1', 'datetime_month_2', 'datetime_month_4',
                                   'datetime_month_5', 'datetime_month_6', 'datetime_month_7'}
 
-EXPECTED_CURRENCY_FEATURES = {'currency_XDR', 'currency_MUR', 'currency_NIS', 'currency_CNY', 'currency_TZS',
-                              'currency_LAK', 'currency_MOP', 'currency_IMP', 'currency_QAR', 'currency_EGP'}
+EXPECTED_CURRENCY_FEATURES = {'currency_XDR', 'currency_HTG', 'currency_PAB', 'currency_CNY', 'currency_TZS',
+                              'currency_LAK', 'currency_NAD', 'currency_IMP', 'currency_QAR', 'currency_EGP'}
 
 EXPECTED_PROVIDER_FEATURES_OHE = {'provider_JCB 16 digit', 'provider_Discover', 'provider_American Express',
                                   'provider_JCB 15 digit', 'provider_Maestro', 'provider_VISA 19 digit',

diff --git a/evalml/tests/model_understanding_tests/test_partial_dependence.py b/evalml/tests/model_understanding_tests/test_partial_dependence.py
@@ -272,9 +272,9 @@ def round_dict_keys(dictionary, places=6):
     num_cat_features = len(set(X["currency"].to_series()))
     assert num_cat_features == 164
 
-    part_dep_ans = {0.1424060057413758: 154, 0.006837318701999957: 1, 0.24445532203317386: 1, 0.15637574440029903: 1,
-                    0.11676042311300606: 1, 0.13434069071819482: 1, 0.1502609021969637: 1, 0.14486201259150977: 1,
-                    0.16687406140200164: 1, 0.06815227785761911: 1, 0.0791821060634158: 1}
+    part_dep_ans = {0.1432616813857269: 154, 0.1502346349971562: 1, 0.14487916687594762: 1,
+                    0.1573183451314127: 1, 0.11695462432136654: 1, 0.07950579532536253: 1, 0.006794444792966759: 1,
+                    0.17745270478939879: 1, 0.1666874487986626: 1, 0.13357573073236878: 1, 0.06778096366056789: 1}
     part_dep_ans_rounded = round_dict_keys(part_dep_ans)
 
     # Check the case where grid_resolution < number of categorical features

diff --git a/evalml/tests/pipeline_tests/test_component_graph.py b/evalml/tests/pipeline_tests/test_component_graph.py
@@ -696,7 +696,7 @@ def test_custom_input_feature_types(example_graph):
     assert input_feature_names['Imputer'] == ['column_1', 'column_2']
     assert input_feature_names['OneHot_RandomForest'] == ['column_1', 'column_2']
     assert input_feature_names['OneHot_ElasticNet'] == ['column_1', 'column_2']
-    assert input_feature_names['Random Forest'] == ['column_1_a', 'column_1_b', 'column_2_3', 'column_2_4']
+    assert input_feature_names['Random Forest'] == ['column_1_a', 'column_1_b', 'column_2_4', 'column_2_5']
     assert input_feature_names['Elastic Net'] == ['column_1_a', 'column_1_b', 'column_1_c', 'column_2_3', 'column_2_4', 'column_2_5']
     assert input_feature_names['Logistic Regression'] == ['Random Forest', 'Elastic Net']
 

diff --git a/evalml/tests/pipeline_tests/test_time_series_pipeline.py b/evalml/tests/pipeline_tests/test_time_series_pipeline.py
@@ -234,7 +234,6 @@ class MyTsPipeline(pipeline_class):
                            "target_delay_1": y_series.shift(1)}).dropna(axis=0, how='any')
 
     df_passed_to_estimator, target_passed_to_estimator = mock_fit.call_args[0]
-
     # Check the features have target values encoded as ints.
     assert_frame_equal(df_passed_to_estimator, answer)
 

diff --git a/evalml/tests/utils_tests/test_gen_utils.py b/evalml/tests/utils_tests/test_gen_utils.py
@@ -216,16 +216,16 @@ def _check_equality(data, expected, check_index_type=True):
 
 
 @pytest.mark.parametrize("data,num_to_pad,expected",
-                         [(pd.Series([1, 2, 3]), 1, pd.Series([np.nan, 1, 2, 3])),
+                         [(pd.Series([1, 2, 3]), 1, pd.Series([np.nan, 1, 2, 3], dtype="Float64")),
                           (pd.Series([1, 2, 3]), 0, pd.Series([1, 2, 3])),
                           (pd.Series([1, 2, 3, 4], index=pd.date_range("2020-10-01", "2020-10-04")),
-                           2, pd.Series([np.nan, np.nan, 1, 2, 3, 4])),
+                           2, pd.Series([np.nan, np.nan, 1, 2, 3, 4], dtype="Float64")),
                           (pd.DataFrame({"a": [1., 2., 3.], "b": [4., 5., 6.]}), 0,
-                           pd.DataFrame({"a": [1., 2., 3.], "b": [4., 5., 6.]})),
+                           pd.DataFrame({"a": pd.Series([1., 2., 3.], dtype="Float64"), "b": pd.Series([4., 5., 6.], dtype="Float64")})),
                           (pd.DataFrame({"a": [4, 5, 6], "b": ["a", "b", "c"]}), 1,
-                           pd.DataFrame({"a": [np.nan, 4, 5, 6], "b": [np.nan, "a", "b", "c"]})),
+                           pd.DataFrame({"a": pd.Series([np.nan, 4, 5, 6], dtype="Float64"), "b": [np.nan, "a", "b", "c"]})),
                           (pd.DataFrame({"a": [1, 0, 1]}), 2,
-                           pd.DataFrame({"a": [np.nan, np.nan, 1, 0, 1]}))])
+                           pd.DataFrame({"a": pd.Series([np.nan, np.nan, 1, 0, 1], dtype="Float64")}))])
 def test_pad_with_nans(data, num_to_pad, expected):
     padded = pad_with_nans(data, num_to_pad)
     _check_equality(padded, expected)
@@ -235,7 +235,7 @@ def test_pad_with_nans_with_series_name():
     name = "data to pad"
     data = pd.Series([1, 2, 3], name=name)
     padded = pad_with_nans(data, 1)
-    _check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name))
+    _check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name, dtype="Float64"))
 
 
 @pytest.mark.parametrize("data, expected",