diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
index a60229684..8c6d2a217 100755
--- a/docs/sources/CHANGELOG.md
+++ b/docs/sources/CHANGELOG.md
@@ -17,6 +17,10 @@ The CHANGELOG for the current development version is available at
##### New Features
+
+The fit method of the SequentialFeatureSelector now optionally accepts **fit_params for the estimator that is used for the feature selection. ([#350](https://github.com/rasbt/mlxtend/pull/350) by Zach Griffith)
+
+
- -
##### Changes
diff --git a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
index 2185a7715..107f4fc04 100644
--- a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
+++ b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb
@@ -1435,7 +1435,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
@@ -1555,7 +1555,7 @@
"\n",
"
\n",
"\n",
- "*fit(X, y)*\n",
+ "*fit(X, y, **fit_params)*\n",
"\n",
"Perform feature selection and learn model from training data.\n",
"\n",
@@ -1570,6 +1570,10 @@
"\n",
" Target values.\n",
"\n",
+ "- `fit_params` : dict of string -> object, optional\n",
+ "\n",
+ " Parameters to pass to to the fit method of classifier.\n",
+ "\n",
"**Returns**\n",
"\n",
"- `self` : object\n",
@@ -1577,7 +1581,7 @@
"\n",
"
\n",
"\n",
- "*fit_transform(X, y)*\n",
+ "*fit_transform(X, y, **fit_params)*\n",
"\n",
"Fit to training data then reduce X to its most important features.\n",
"\n",
@@ -1588,6 +1592,10 @@
" Training vectors, where n_samples is the number of samples and\n",
" n_features is the number of features.\n",
"\n",
+ "- `fit_params` : dict of string -> object, optional\n",
+ "\n",
+ " Parameters to pass to to the fit method of classifier.\n",
+ "\n",
"**Returns**\n",
"\n",
"Reduced feature subset of X, shape={n_samples, k_features}\n",
diff --git a/mlxtend/feature_selection/sequential_feature_selector.py b/mlxtend/feature_selection/sequential_feature_selector.py
index ea93ab70d..2f8aed793 100644
--- a/mlxtend/feature_selection/sequential_feature_selector.py
+++ b/mlxtend/feature_selection/sequential_feature_selector.py
@@ -22,16 +22,17 @@
from sklearn.externals.joblib import Parallel, delayed
-def _calc_score(selector, X, y, indices):
+def _calc_score(selector, X, y, indices, **fit_params):
if selector.cv:
scores = cross_val_score(selector.est_,
X[:, indices], y,
cv=selector.cv,
scoring=selector.scorer,
n_jobs=1,
- pre_dispatch=selector.pre_dispatch)
+ pre_dispatch=selector.pre_dispatch,
+ fit_params=fit_params)
else:
- selector.est_.fit(X[:, indices], y)
+ selector.est_.fit(X[:, indices], y, **fit_params)
scores = np.array([selector.scorer(selector.est_, X[:, indices], y)])
return indices, scores
@@ -169,7 +170,7 @@ def __init__(self, estimator, k_features=1,
# don't mess with this unless testing
self._TESTING_INTERRUPT_MODE = False
- def fit(self, X, y):
+ def fit(self, X, y, **fit_params):
"""Perform feature selection and learn model from training data.
Parameters
@@ -179,6 +180,8 @@ def fit(self, X, y):
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
+ fit_params : dict of string -> object, optional
+ Parameters to pass to to the fit method of classifier.
Returns
-------
@@ -248,7 +251,7 @@ def fit(self, X, y):
k_to_select = min_k
k_idx = tuple(range(X.shape[1]))
k = len(k_idx)
- k_idx, k_score = _calc_score(self, X, y, k_idx)
+ k_idx, k_score = _calc_score(self, X, y, k_idx, **fit_params)
self.subsets_[k] = {
'feature_idx': k_idx,
'cv_scores': k_score,
@@ -266,14 +269,16 @@ def fit(self, X, y):
orig_set=orig_set,
subset=prev_subset,
X=X,
- y=y
+ y=y,
+ **fit_params
)
else:
k_idx, k_score, cv_scores = self._exclusion(
feature_set=prev_subset,
X=X,
- y=y
+ y=y,
+ **fit_params
)
if self.floating:
@@ -298,7 +303,8 @@ def fit(self, X, y):
feature_set=k_idx,
fixed_feature=new_feature,
X=X,
- y=y
+ y=y,
+ **fit_params
)
else:
@@ -306,7 +312,8 @@ def fit(self, X, y):
orig_set=orig_set - {new_feature},
subset=set(k_idx),
X=X,
- y=y
+ y=y,
+ **fit_params
)
if k_score_c is not None and k_score_c > k_score:
@@ -395,7 +402,7 @@ def fit(self, X, y):
self.fitted = True
return self
- def _inclusion(self, orig_set, subset, X, y, ignore_feature=None):
+ def _inclusion(self, orig_set, subset, X, y, ignore_feature=None, **fit_params):
all_avg_scores = []
all_cv_scores = []
all_subsets = []
@@ -407,7 +414,7 @@ def _inclusion(self, orig_set, subset, X, y, ignore_feature=None):
parallel = Parallel(n_jobs=n_jobs, verbose=self.verbose,
pre_dispatch=self.pre_dispatch)
work = parallel(delayed(_calc_score)
- (self, X, y, tuple(subset | {feature}))
+ (self, X, y, tuple(subset | {feature}), **fit_params)
for feature in remaining
if feature != ignore_feature)
@@ -422,7 +429,7 @@ def _inclusion(self, orig_set, subset, X, y, ignore_feature=None):
all_cv_scores[best])
return res
- def _exclusion(self, feature_set, X, y, fixed_feature=None):
+ def _exclusion(self, feature_set, X, y, fixed_feature=None, **fit_params):
n = len(feature_set)
res = (None, None, None)
if n > 1:
@@ -433,7 +440,7 @@ def _exclusion(self, feature_set, X, y, fixed_feature=None):
n_jobs = min(self.n_jobs, features)
parallel = Parallel(n_jobs=n_jobs, verbose=self.verbose,
pre_dispatch=self.pre_dispatch)
- work = parallel(delayed(_calc_score)(self, X, y, p)
+ work = parallel(delayed(_calc_score)(self, X, y, p, **fit_params)
for p in combinations(feature_set, r=n - 1)
if not fixed_feature or fixed_feature in set(p))
@@ -466,7 +473,7 @@ def transform(self, X):
self._check_fitted()
return X[:, self.k_feature_idx_]
- def fit_transform(self, X, y):
+ def fit_transform(self, X, y, **fit_params):
"""Fit to training data then reduce X to its most important features.
Parameters
@@ -474,13 +481,15 @@ def fit_transform(self, X, y):
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
+ fit_params : dict of string -> object, optional
+ Parameters to pass to to the fit method of classifier.
Returns
-------
Reduced feature subset of X, shape={n_samples, k_features}
"""
- self.fit(X, y)
+ self.fit(X, y, **fit_params)
return self.transform(X)
def get_metric_dict(self, confidence_interval=0.95):
diff --git a/mlxtend/feature_selection/tests/test_sequential_feature_selector.py b/mlxtend/feature_selection/tests/test_sequential_feature_selector.py
index ffdabbecc..6c2f6ba99 100644
--- a/mlxtend/feature_selection/tests/test_sequential_feature_selector.py
+++ b/mlxtend/feature_selection/tests/test_sequential_feature_selector.py
@@ -61,6 +61,18 @@ def test_run_default():
assert sfs.k_feature_idx_ == (3,)
+def test_fit_params():
+ iris = load_iris()
+ X = iris.data
+ y = iris.target
+ sample_weight = np.ones(X.shape[0])
+ forest = RandomForestClassifier(n_estimators=100, random_state=123)
+ sfs = SFS(estimator=forest,
+ verbose=0)
+ sfs.fit(X, y, sample_weight=sample_weight)
+ assert sfs.k_feature_idx_ == (3,)
+
+
def test_kfeatures_type_1():
iris = load_iris()
X = iris.data