Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RScorer class for causal model selection #361

Merged
merged 11 commits into from
Jan 12, 2021
1 change: 1 addition & 0 deletions doc/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Public Module Reference
econml.metalearners
econml.ortho_forest
econml.ortho_iv
econml.score
econml.two_stage_least_squares
econml.utilities

Expand Down
2 changes: 1 addition & 1 deletion econml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
'cate_interpreter', 'causal_forest',
'data', 'deepiv', 'dml', 'drlearner', 'inference',
'metalearners', 'ortho_forest', 'ortho_iv',
'sklearn_extensions', 'tree',
'score', 'sklearn_extensions', 'tree',
'two_stage_least_squares', 'utilities']
13 changes: 13 additions & 0 deletions econml/score/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
# Licensed under the MIT License.

"""
A suite of scoring methods for scoring CATE models out-of-sample for the
purpose of model selection.
"""

from .rscorer import RScorer
from .ensemble_cate import EnsembleCateEstimator

__all__ = ['RScorer',
'EnsembleCateEstimator']
68 changes: 68 additions & 0 deletions econml/score/ensemble_cate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import numpy as np
from sklearn.utils.validation import check_array
from .._cate_estimator import BaseCateEstimator, LinearCateEstimator


class EnsembleCateEstimator:
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
kbattocchi marked this conversation as resolved.
Show resolved Hide resolved
""" A CATE estimator that represents a weighted ensemble of many
CATE estimators. Returns their weighted effect prediction.

Parameters
----------
cate_models : list of BaseCateEstimator objects
A list of fitted cate estimator objects that will be used in the ensemble.
The models are passed by reference, and not copied internally, because we
need the fitted objects, so any change to the passed models will affect
the internal predictions (e.g. if the input models are refitted).
weights : np.ndarray of shape (len(cate_models),)
The weight placed on each model. Weights must be non-positive. The
ensemble will predict effects based on the weighted average predictions
of the cate_models estiamtors, weighted by the corresponding weight in `weights`.
"""

def __init__(self, *, cate_models, weights):
self._cate_models = cate_models
self._weights = weights

def effect(self, X=None, *, T0=0, T1=1):
return np.average([mdl.effect(X=X, T0=T0, T1=T1) for mdl in self.cate_models],
weights=self.weights, axis=0)
effect.__doc__ = BaseCateEstimator.effect.__doc__

def marginal_effect(self, T, X=None):
return np.average([mdl.marginal_effect(T, X=X) for mdl in self.cate_models],
weights=self.weights, axis=0)
marginal_effect.__doc__ = BaseCateEstimator.marginal_effect.__doc__

def const_marginal_effect(self, X=None):
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
if np.any([not hasattr(mdl, 'const_marginal_effect') for mdl in self.cate_models]):
raise ValueError("One of the base CATE models in parameter `cate_models` does not support "
"the `const_marginal_effect` method.")
return np.average([mdl.const_marginal_effect(X=X) for mdl in self.cate_models],
weights=self.weights, axis=0)
const_marginal_effect.__doc__ = LinearCateEstimator.const_marginal_effect.__doc__

@property
def cate_models(self):
return self._cate_models

@cate_models.setter
def cate_models(self, value):
if not isinstance(value, list):
raise ValueError('Parameter `cate_models` should be a list of `BaseCateEstimator` objects.')
self._cate_models = value

@property
def weights(self):
return self._weights

@weights.setter
def weights(self, value):
weights = check_array(value, accept_sparse=False, ensure_2d=False, allow_nd=False, dtype='numeric',
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
force_all_finite=True)
if np.any(weights < 0):
raise ValueError("All weights in parameter `weights` must be non-negative.")
self._weights = weights
219 changes: 219 additions & 0 deletions econml/score/rscorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from ..dml import LinearDML
from sklearn.base import clone
import numpy as np
from scipy.special import softmax
from .ensemble_cate import EnsembleCateEstimator


class RScorer:
""" Scorer based on the RLearner loss. Fits residual models at fit time and calculates
residuals of the evaluation data in a cross-fitting manner::

Yres = Y - E[Y|X, W]
Tres = T - E[T|X, W]

Then for any given cate model calculates the loss::

loss(cate) = E_n[(Yres - <cate(X), Tres>)^2]

Also calculates a baseline loss based on a constant treatment effect model, i.e.::

base_loss = min_{theta} E_n[(Yres - <theta, Tres>)^2]

Returns an analogue of the R-square score for regression::

score = 1 - loss(cate) / base_loss

This corresponds to the extra variance of the outcome explained by introducing heterogeneity
in the effect as captured by the cate model, as opposed to always predicting a constant effect.
A negative score, means that the cate model performs even worse than a constant effect model
and hints at overfitting during training of the cate model.

Parameters
----------
model_y: estimator
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.

model_t: estimator
The estimator for fitting the treatment to the features. Must implement
`fit` and `predict` methods.

discrete_treatment: bool, optional (default is ``False``)
Whether the treatment values should be treated as categorical, rather than continuous, quantities

categories: 'auto' or list, default 'auto'
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
The first category will be treated as the control treatment.

n_splits: int, cross-validation generator or an iterable, optional (Default=2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:

- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- :term:`CV splitter`
- An iterable yielding (train, test) splits as arrays of indices.

For integer/None inputs, if the treatment is discrete
:class:`~sklearn.model_selection.StratifiedKFold` is used, else,
:class:`~sklearn.model_selection.KFold` is used
(with a random shuffle in either case).

Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all
W, X are None, then we call `split(ones((T.shape[0], 1)), T)`.

mc_iters: int, optional (default=None)
The number of times to rerun the first stage models to reduce the variance of the nuisances.

mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.

random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
"""

def __init__(self, *,
model_y,
model_t,
discrete_treatment=False,
categories='auto',
n_splits=2,
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
mc_iters=None,
mc_agg='mean',
random_state=None):
self.model_y = clone(model_y, safe=False)
self.model_t = clone(model_t, safe=False)
self.discrete_treatment = discrete_treatment
self.n_splits = n_splits
self.categories = categories
self.random_state = random_state
self.mc_iters = mc_iters
self.mc_agg = mc_agg

def fit(self, y, T, X=None, W=None, sample_weight=None, groups=None):
"""

Parameters
----------
Y: (n × d_y) matrix or vector of length n
Outcomes for each sample
T: (n × dₜ) matrix or vector of length n
Treatments for each sample
X: optional (n × dₓ) matrix
Features for each sample
W: optional (n × d_w) matrix
Controls for each sample
sample_weight: optional (n,) vector
Weights for each row
groups: (n,) vector, optional
All rows corresponding to the same group will be kept together during splitting.
If groups is not None, the n_splits argument passed to this class's initializer
must support a 'groups' argument to its split method.

Returns
-------
self
"""
if X is None:
raise ValueError("X cannot be None for the RScorer!")

self.lineardml_ = LinearDML(model_y=self.model_y,
model_t=self.model_t,
n_splits=self.n_splits,
discrete_treatment=self.discrete_treatment,
categories=self.categories,
random_state=self.random_state,
mc_iters=self.mc_iters,
mc_agg=self.mc_agg)
self.lineardml_.fit(y, T, X=None, W=np.hstack([v for v in [X, W] if v is not None]),
sample_weight=sample_weight, groups=groups, cache_values=True)
self.base_score_ = self.lineardml_.score_
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
self.dx_ = X.shape[1]
return self

def score(self, cate_model):
"""
Parameters
----------
cate_model : instance of fitted BaseCateEstimator

Returns
-------
score : double
An analogue of the R-square loss for the causal setting.
"""
Y_res, T_res = self.lineardml_._cached_values.nuisances
X = self.lineardml_._cached_values.W[:, :self.dx_]
sample_weight = self.lineardml_._cached_values.sample_weight
if Y_res.ndim == 1:
Y_res = Y_res.reshape((-1, 1))
if T_res.ndim == 1:
T_res = T_res.reshape((-1, 1))
effects = cate_model.const_marginal_effect(X).reshape((-1, Y_res.shape[1], T_res.shape[1]))
Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape)
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
if sample_weight is not None:
return 1 - np.mean(np.average((Y_res - Y_res_pred)**2, weights=sample_weight, axis=0)) / self.base_score_
else:
return 1 - np.mean((Y_res - Y_res_pred) ** 2) / self.base_score_

def best_model(self, cate_models, return_scores=False):
""" Chooses the best among a list of models

Parameters
----------
cate_models : list of instances of fitted BaseCateEstimator
return_scores : bool, optional (default=False)
Whether to return the list scores of each model
Returns
-------
best_model : instance of fitted BaseCateEstimator
The model that achieves the best score
best_score : double
The score of the best model
scores : list of double
The list of scores for each of the input models. Returned only if `return_scores=True`.
"""
rscores = [self.score(mdl) for mdl in cate_models]
best = np.argmax(rscores)
if return_scores:
return cate_models[best], rscores[best], rscores
else:
return cate_models[best], rscores[best]

def ensemble(self, cate_models, eta=1000.0, return_scores=False):
""" Ensembles a list of models based on their performance

Parameters
----------
cate_models : list of instances of fitted BaseCateEstimator
eta : double, optional (default=1000)
The soft-max parameter for the ensemble
return_scores : bool, optional (default=False)
Whether to return the list scores of each model
Returns
-------
ensemble_model : instance of fitted EnsembleCateEstimator
A fitted ensemble cate model that calculates effects based on a weighted
version of the input cate models, weighted by a softmax of their score
performance
ensemble_score : double
The score of the ensemble model
scores : list of double
The list of scores for each of the input models. Returned only if `return_scores=True`.
"""
rscores = np.array([self.score(mdl) for mdl in cate_models])
weights = softmax(eta * rscores)
ensemble = EnsembleCateEstimator(cate_models=cate_models, weights=weights)
ensemble_score = self.score(ensemble)
if return_scores:
return ensemble, ensemble_score, rscores
else:
return ensemble, ensemble_score
75 changes: 75 additions & 0 deletions econml/tests/test_rscorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import unittest
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from joblib import Parallel, delayed

from econml.dml import DML, LinearDML, SparseLinearDML, NonParamDML
from econml.metalearners import XLearner, TLearner, SLearner, DomainAdaptationLearner
from econml.drlearner import DRLearner
from econml.score import RScorer


def _fit_model(name, model, Y, T, X):
return name, model.fit(Y, T, X=X)


class TestRScorer(unittest.TestCase):

def _get_data(self):
X = np.random.normal(0, 1, size=(500, 2))
T = np.random.binomial(1, .5, size=(500,))
y = X[:, 0] * T + np.random.normal(size=(500,))
return y, T, X, X[:, 0]

def test_comparison(self):
def reg():
return LinearRegression()

def clf():
return LogisticRegression()

y, T, X, true_eff = self._get_data()
(X_train, X_val, T_train, T_val,
Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)

models = [('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
linear_first_stages=False, n_splits=3)),
('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
featurizer=PolynomialFeatures(degree=2, include_bias=False),
linear_first_stages=False, n_splits=3)),
('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())),
('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())),
('slearner', SLearner(overall_model=reg())),
('tlearner', TLearner(models=reg())),
('drlearner', DRLearner(model_propensity=clf(), model_regression=reg(),
model_final=reg(), n_splits=3)),
('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(),
discrete_treatment=True, n_splits=3)),
('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True,
featurizer=PolynomialFeatures(degree=3),
linear_first_stages=False, n_splits=3))
]

models = Parallel(n_jobs=-1, verbose=1)(delayed(_fit_model)(name, mdl,
Y_train, T_train, X_train)
for name, mdl in models)

scorer = RScorer(model_y=reg(), model_t=clf(),
discrete_treatment=True, n_splits=3, mc_iters=2, mc_agg='median')
scorer.fit(Y_val, T_val, X=X_val)
rscore = [scorer.score(mdl) for _, mdl in models]
rootpehe_score = [np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
for _, mdl in models]
assert LinearRegression().fit(np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5
mdl, _ = scorer.best_model([mdl for _, mdl in models])
rootpehe_best = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
assert rootpehe_best < 1.2 * np.min(rootpehe_score)
mdl, _ = scorer.ensemble([mdl for _, mdl in models])
rootpehe_ensemble = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
assert rootpehe_ensemble < 1.2 * np.min(rootpehe_score)
Loading