Skip to content

Commit

Permalink
Unpin pandas version (#1708)
Browse files Browse the repository at this point in the history
  • Loading branch information
angela97lin authored Mar 16, 2021
1 parent 888dce8 commit 9576d5d
Show file tree
Hide file tree
Showing 15 changed files with 24 additions and 27 deletions.
2 changes: 1 addition & 1 deletion core-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy>=1.19.1
pandas>=1.1.0,<1.2.0
pandas>=1.1.0
scipy>=1.2.1
scikit-learn>=0.23.1
scikit-optimize>=0.8.1
Expand Down
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Release Notes
* Aggregating the shap values for predictions that we know the provenance of, e.g. OHE, text, and date-time. :pr:`1901`
* Improved error message when custom objective is passed as a string in ``pipeline.score`` :pr:`1941`
* Added ``score_pipelines`` and ``train_pipelines`` methods to ``AutoMLSearch`` :pr:`1913`
* Added support for ``pandas`` version 1.2.0 :pr:`1708`
* Added ``score_batch`` and ``train_batch`` abstact methods to ``EngineBase`` and implementations in ``SequentialEngine`` :pr:`1913`
* Fixes
* Removed CI check for ``check_dependencies_updated_linux`` :pr:`1950`
Expand Down
2 changes: 1 addition & 1 deletion evalml/objectives/objective_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _standardize_input_type(input_data):
pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided.
"""
if isinstance(input_data, (pd.Series, pd.DataFrame)):
return input_data
return _convert_woodwork_types_wrapper(input_data)
if isinstance(input_data, ww.DataTable):
return _convert_woodwork_types_wrapper(input_data.to_dataframe())
if isinstance(input_data, ww.DataColumn):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def transform(self, X, y=None):
X_cat = pd.DataFrame(self._encoder.transform(X_copy[self.features_to_encode]).toarray(), index=X_copy.index)
X_cat.columns = self.get_feature_names()
X_t = pd.concat([X_t, X_cat], axis=1)

return _retain_custom_types_and_initalize_woodwork(X_ww, X_t)

def _handle_parameter_handle_missing(self, X):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,13 @@ def transform(self, X, y=None):
X_ww = infer_feature_types(X)
categorical_columns = self._get_categorical_columns(X_ww)
X = _convert_woodwork_types_wrapper(X_ww.to_dataframe())

if self.delay_features and len(X) > 0:
X_categorical = self._encode_X_while_preserving_index(X[categorical_columns])
for col_name in X:
col = X[col_name]
if col_name in categorical_columns:
col = X_categorical[col_name]
X = X.assign(**{f"{col_name}_delay_{t}": col.shift(t) for t in range(1, self.max_delay + 1)})

# Handle cases where the target was passed in
if self.delay_target and y is not None:
y = infer_feature_types(y)
Expand Down
6 changes: 3 additions & 3 deletions evalml/tests/automl_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,9 +929,9 @@ def test_describe_pipeline(mock_fit, mock_score, return_dict, caplog, X_y_binary
assert "* strategy : mode" in out
assert "Total training time (including CV): " in out
assert "Log Loss Binary # Training # Validation" in out
assert "0 1.000 66.000 34.000" in out
assert "1 1.000 67.000 33.000" in out
assert "2 1.000 67.000 33.000" in out
assert "0 1.000 66.0 34.0" in out
assert "1 1.000 67.0 33.0" in out
assert "2 1.000 67.0 33.0" in out
assert "mean 1.000 - -" in out
assert "std 0.000 - -" in out
assert "coef of var 0.000 - -" in out
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ def test_class_imbalance_data_check_multiclass(input_type):
class_imbalance_check = ClassImbalanceDataCheck(num_cv_folds=2)
assert class_imbalance_check.validate(X, y_imbalanced_cv) == {
"warnings": [],
"errors": [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 4 instances: [2, 0]",
"errors": [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 4 instances: [0, 2]",
data_check_name=class_imbalance_data_check_name,
message_code=DataCheckMessageCode.CLASS_IMBALANCE_BELOW_FOLDS,
details={"target_values": [2, 0]}).to_dict()],
details={"target_values": [0, 2]}).to_dict()],
"actions": []
}

Expand Down
4 changes: 2 additions & 2 deletions evalml/tests/data_checks_tests/test_data_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ def test_default_data_checks_classification(input_type):

data_checks = DefaultDataChecks("binary", get_default_primary_search_objective("binary"))

imbalance = [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 6 instances: [1.0, 0.0]",
imbalance = [DataCheckError(message="The number of instances of these targets is less than 2 * the number of cross folds = 6 instances: [0.0, 1.0]",
data_check_name="ClassImbalanceDataCheck",
message_code=DataCheckMessageCode.CLASS_IMBALANCE_BELOW_FOLDS,
details={"target_values": [1.0, 0.0]}).to_dict()]
details={"target_values": [0.0, 1.0]}).to_dict()]

assert data_checks.validate(X, y) == {"warnings": messages[:3], "errors": messages[3:] + imbalance, "actions": []}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_invalid_target_data_check_multiclass_two_examples_per_class():
"errors": [DataCheckError(message=expected_message,
data_check_name=invalid_targets_data_check_name,
message_code=DataCheckMessageCode.TARGET_BINARY_NOT_TWO_EXAMPLES_PER_CLASS,
details={"least_populated_class_labels": [1, 0]}).to_dict()],
details={"least_populated_class_labels": [0, 1]}).to_dict()],
"actions": []
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ matplotlib==3.3.4
networkx==2.5
nlp-primitives==1.1.0
numpy==1.20.1
pandas==1.1.5
pandas==1.2.3
plotly==4.14.3
psutil==5.8.0
pyzmq==21.0.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,8 @@ def transform_y_for_problem_type(problem_type, y):
'datetime_month_0', 'datetime_month_1', 'datetime_month_2', 'datetime_month_4',
'datetime_month_5', 'datetime_month_6', 'datetime_month_7'}

EXPECTED_CURRENCY_FEATURES = {'currency_XDR', 'currency_MUR', 'currency_NIS', 'currency_CNY', 'currency_TZS',
'currency_LAK', 'currency_MOP', 'currency_IMP', 'currency_QAR', 'currency_EGP'}
EXPECTED_CURRENCY_FEATURES = {'currency_XDR', 'currency_HTG', 'currency_PAB', 'currency_CNY', 'currency_TZS',
'currency_LAK', 'currency_NAD', 'currency_IMP', 'currency_QAR', 'currency_EGP'}

EXPECTED_PROVIDER_FEATURES_OHE = {'provider_JCB 16 digit', 'provider_Discover', 'provider_American Express',
'provider_JCB 15 digit', 'provider_Maestro', 'provider_VISA 19 digit',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,9 @@ def round_dict_keys(dictionary, places=6):
num_cat_features = len(set(X["currency"].to_series()))
assert num_cat_features == 164

part_dep_ans = {0.1424060057413758: 154, 0.006837318701999957: 1, 0.24445532203317386: 1, 0.15637574440029903: 1,
0.11676042311300606: 1, 0.13434069071819482: 1, 0.1502609021969637: 1, 0.14486201259150977: 1,
0.16687406140200164: 1, 0.06815227785761911: 1, 0.0791821060634158: 1}
part_dep_ans = {0.1432616813857269: 154, 0.1502346349971562: 1, 0.14487916687594762: 1,
0.1573183451314127: 1, 0.11695462432136654: 1, 0.07950579532536253: 1, 0.006794444792966759: 1,
0.17745270478939879: 1, 0.1666874487986626: 1, 0.13357573073236878: 1, 0.06778096366056789: 1}
part_dep_ans_rounded = round_dict_keys(part_dep_ans)

# Check the case where grid_resolution < number of categorical features
Expand Down
2 changes: 1 addition & 1 deletion evalml/tests/pipeline_tests/test_component_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ def test_custom_input_feature_types(example_graph):
assert input_feature_names['Imputer'] == ['column_1', 'column_2']
assert input_feature_names['OneHot_RandomForest'] == ['column_1', 'column_2']
assert input_feature_names['OneHot_ElasticNet'] == ['column_1', 'column_2']
assert input_feature_names['Random Forest'] == ['column_1_a', 'column_1_b', 'column_2_3', 'column_2_4']
assert input_feature_names['Random Forest'] == ['column_1_a', 'column_1_b', 'column_2_4', 'column_2_5']
assert input_feature_names['Elastic Net'] == ['column_1_a', 'column_1_b', 'column_1_c', 'column_2_3', 'column_2_4', 'column_2_5']
assert input_feature_names['Logistic Regression'] == ['Random Forest', 'Elastic Net']

Expand Down
1 change: 0 additions & 1 deletion evalml/tests/pipeline_tests/test_time_series_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ class MyTsPipeline(pipeline_class):
"target_delay_1": y_series.shift(1)}).dropna(axis=0, how='any')

df_passed_to_estimator, target_passed_to_estimator = mock_fit.call_args[0]

# Check the features have target values encoded as ints.
assert_frame_equal(df_passed_to_estimator, answer)

Expand Down
12 changes: 6 additions & 6 deletions evalml/tests/utils_tests/test_gen_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,16 +216,16 @@ def _check_equality(data, expected, check_index_type=True):


@pytest.mark.parametrize("data,num_to_pad,expected",
[(pd.Series([1, 2, 3]), 1, pd.Series([np.nan, 1, 2, 3])),
[(pd.Series([1, 2, 3]), 1, pd.Series([np.nan, 1, 2, 3], dtype="Float64")),
(pd.Series([1, 2, 3]), 0, pd.Series([1, 2, 3])),
(pd.Series([1, 2, 3, 4], index=pd.date_range("2020-10-01", "2020-10-04")),
2, pd.Series([np.nan, np.nan, 1, 2, 3, 4])),
2, pd.Series([np.nan, np.nan, 1, 2, 3, 4], dtype="Float64")),
(pd.DataFrame({"a": [1., 2., 3.], "b": [4., 5., 6.]}), 0,
pd.DataFrame({"a": [1., 2., 3.], "b": [4., 5., 6.]})),
pd.DataFrame({"a": pd.Series([1., 2., 3.], dtype="Float64"), "b": pd.Series([4., 5., 6.], dtype="Float64")})),
(pd.DataFrame({"a": [4, 5, 6], "b": ["a", "b", "c"]}), 1,
pd.DataFrame({"a": [np.nan, 4, 5, 6], "b": [np.nan, "a", "b", "c"]})),
pd.DataFrame({"a": pd.Series([np.nan, 4, 5, 6], dtype="Float64"), "b": [np.nan, "a", "b", "c"]})),
(pd.DataFrame({"a": [1, 0, 1]}), 2,
pd.DataFrame({"a": [np.nan, np.nan, 1, 0, 1]}))])
pd.DataFrame({"a": pd.Series([np.nan, np.nan, 1, 0, 1], dtype="Float64")}))])
def test_pad_with_nans(data, num_to_pad, expected):
padded = pad_with_nans(data, num_to_pad)
_check_equality(padded, expected)
Expand All @@ -235,7 +235,7 @@ def test_pad_with_nans_with_series_name():
name = "data to pad"
data = pd.Series([1, 2, 3], name=name)
padded = pad_with_nans(data, 1)
_check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name))
_check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name, dtype="Float64"))


@pytest.mark.parametrize("data, expected",
Expand Down

0 comments on commit 9576d5d

Please sign in to comment.