Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix automl handling of positional initializer args #373

Merged
merged 2 commits into from
Jan 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions econml/automated_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@

def setAutomatedMLWorkspace(create_workspace=False,
create_resource_group=False, workspace_region=None, *,
subscription_id=None, resource_group=None, workspace_name=None, auth=None):
auth=None, subscription_id, resource_group, workspace_name):
"""Set configuration file for AutomatedML actions with the EconML library. If
``create_workspace`` is set true, a new workspace is created
for the user. If ``create_workspace`` is set true, a new workspace is
created for the user.
for the user.

Parameters
----------
Expand All @@ -68,8 +67,7 @@ def setAutomatedMLWorkspace(create_workspace=False,
authentication portal in the browser.

subscription_id: String, required
Definition of a class that will serve as the parent class of the
AutomatedMLMixin. This class must inherit from _BaseDML.
Azure subscription ID for the subscription under which to run the models

resource_group: String, required
Name of resource group of workspace to be created or set.
Expand Down Expand Up @@ -285,12 +283,12 @@ def __init__(self, *args, **kwargs):
# Loop through the kwargs and args if any of them is an AutoMLConfig file, pass them
# create model and pass model into final.
new_args = ()
for var in args:
for idx, arg in enumerate(args):
# If item is an automl config, get its corresponding
# AutomatedML Model and add it to new_Args
if isinstance(var, EconAutoMLConfig):
var = self._get_automated_ml_model(kwarg, key)
new_args += (var,)
if isinstance(arg, EconAutoMLConfig):
arg = self._get_automated_ml_model(arg, f"arg{idx}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the prefix actually used for?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and what if "arg0" is also a keyword argument?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it is just used to make it easier for a person to identify the experiment when looking through experiments in AzureML studio, and that the service itself adds a unique suffix so that the whole thing will be unambiguous in any case.

new_args += (arg,)

for key in kwargs:
kwarg = kwargs[key]
Expand Down
4 changes: 2 additions & 2 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1502,7 +1502,7 @@ def n_crossfit_splits(self):
@n_crossfit_splits.setter
def n_crossfit_splits(self, value):
if value != 'raise':
warn("Deprecated by parameter `n_splits` and will be removed in next version.")
warn("Deprecated by parameter `cv` and will be removed in next version.")
self.cv = value

@property
Expand Down Expand Up @@ -1534,4 +1534,4 @@ def subsample_fr(self, value):
warn("The parameter 'subsample_fr' has been deprecated and will be removed in the next version. "
"Use 'max_samples' instead, with the convention that "
"'subsample_fr=x' is equivalent to 'max_samples=x/2'.")
max_samples = .45 if value == 'auto' else value / 2
self.max_samples = .45 if value == 'auto' else value / 2
51 changes: 29 additions & 22 deletions econml/tests/test_automated_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
AutomatedKernelDML = addAutomatedML(KernelDML)
AutomatedNonParamDML = \
addAutomatedML(NonParamDML)
AutomatedForestDML = addAutomatedML(ForestDML)
AutomatedCausalForestDML = addAutomatedML(CausalForestDML)

AUTOML_SETTINGS_REG = {
'experiment_timeout_minutes': 1,
'experiment_timeout_minutes': 15,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

15 minutes is the new lowest allowed value for the experiment timeout.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, that’s a lot

'enable_early_stopping': True,
'iteration_timeout_minutes': 1,
'max_cores_per_iteration': 1,
Expand All @@ -61,7 +61,7 @@
}

AUTOML_SETTINGS_CLF = {
'experiment_timeout_minutes': 1,
'experiment_timeout_minutes': 15,
'enable_early_stopping': True,
'iteration_timeout_minutes': 1,
'max_cores_per_iteration': 1,
Expand Down Expand Up @@ -118,7 +118,7 @@ def automl_model_sample_weight_reg():


@pytest.mark.automl
class TestAutomatedDML(unittest.TestCase):
class TestAutomatedML(unittest.TestCase):

@classmethod
def setUpClass(cls):
Expand All @@ -134,7 +134,6 @@ def setUpClass(cls):

def test_nonparam(self):
"""Testing the completion of the fit and effect estimation of an automated Nonparametic DML"""
Y, T, X, _ = ihdp_surface_B()
est = AutomatedNonParamDML(model_y=automl_model_reg(),
model_t=automl_model_clf(),
model_final=automl_model_sample_weight_reg(), featurizer=None,
Expand All @@ -144,7 +143,6 @@ def test_nonparam(self):

def test_param(self):
"""Testing the completion of the fit and effect estimation of an automated Parametric DML"""
Y, T, X, _ = ihdp_surface_B()
est = AutomatedLinearDML(model_y=automl_model_reg(),
model_t=GradientBoostingClassifier(),
featurizer=None,
Expand All @@ -154,28 +152,21 @@ def test_param(self):

def test_forest_dml(self):
"""Testing the completion of the fit and effect estimation of an AutomatedForestDML"""

Y, T, X, _ = ihdp_surface_B()
est = AutomatedForestDML(model_y=automl_model_reg(),
model_t=GradientBoostingClassifier(),
discrete_treatment=True,
n_estimators=1000,
subsample_fr=.8,
min_samples_leaf=10,
min_impurity_decrease=0.001,
verbose=0, min_weight_fraction_leaf=.01)
est = AutomatedCausalForestDML(model_y=automl_model_reg(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where did the data generation go?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were already performing the data generation in the big try block up top, and we made use of this fact in one test but not the others, which I found confusing.

model_t=GradientBoostingClassifier(),
discrete_treatment=True,
n_estimators=1000,
max_samples=.4,
min_samples_leaf=10,
min_impurity_decrease=0.001,
verbose=0, min_weight_fraction_leaf=.01)
est.fit(Y, T, X=X)
_ = est.effect(X)


@pytest.mark.automl
class TestAutomatedMetalearners(unittest.TestCase):

def test_TLearner(self):
"""Testing the completion of the fit and effect estimation of an AutomatedTLearner"""
# TLearner test
# Instantiate TLearner
Y, T, X, _ = ihdp_surface_B()
est = AutomatedTLearner(models=automl_model_reg())

# Test constant and heterogeneous treatment effect, single and multi output y
Expand All @@ -188,7 +179,6 @@ def test_SLearner(self):
# Test constant treatment effect with multi output Y
# Test heterogeneous treatment effect
# Need interactions between T and features
Y, T, X, _ = ihdp_surface_B()
est = AutomatedSLearner(overall_model=automl_model_reg())

est.fit(Y, T, X=X)
Expand All @@ -206,3 +196,20 @@ def test_DALearner(self):

est.fit(Y, T, X=X)
_ = est.effect(X)

def test_positional(self):
"""Test that positional arguments can be used with AutoML wrappers"""

class TestEstimator:
def __init__(self, model_x):
self.model_x = model_x

def fit(self, X, Y):
self.model_x.fit(X, Y)
return self

def predict(self, X):
return self.model_x.predict(X)

AutoMLTestEstimator = addAutomatedML(TestEstimator)
AutoMLTestEstimator(automl_model_reg()).fit(X, Y).predict(X)