27
27
28
28
from abc import abstractmethod
29
29
import numpy as np
30
-
30
+ import pandas as pd
31
+ from sklearn .metrics import (
32
+ get_scorer ,
33
+ get_scorer_names
34
+ )
35
+ from typing import Callable , Union
31
36
from ..sklearn_extensions .model_selection import ModelSelector
32
37
from ..utilities import (filter_none_kwargs )
33
38
from .._ortho_learner import _OrthoLearner
34
39
35
-
36
40
class _ModelNuisance (ModelSelector ):
37
41
"""
38
42
RLearner nuisance model.
@@ -54,10 +58,13 @@ def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight
54
58
filter_none_kwargs (sample_weight = sample_weight , groups = groups ))
55
59
return self
56
60
57
- def score (self , Y , T , X = None , W = None , Z = None , sample_weight = None , groups = None ):
61
+ def score (self , Y , T , X = None , W = None , Z = None , sample_weight = None , groups = None ,
62
+ y_scoring = None , t_scoring = None , t_score_by_dim = False ):
58
63
# note that groups are not passed to score because they are only used for fitting
59
- T_score = self ._model_t .score (X , W , T , ** filter_none_kwargs (sample_weight = sample_weight ))
60
- Y_score = self ._model_y .score (X , W , Y , ** filter_none_kwargs (sample_weight = sample_weight ))
64
+ T_score = self ._model_t .score (X , W , T , ** filter_none_kwargs (sample_weight = sample_weight ),
65
+ scoring = t_scoring , score_by_dim = t_score_by_dim )
66
+ Y_score = self ._model_y .score (X , W , Y , ** filter_none_kwargs (sample_weight = sample_weight ),
67
+ scoring = y_scoring )
61
68
return Y_score , T_score
62
69
63
70
def predict (self , Y , T , X = None , W = None , Z = None , sample_weight = None , groups = None ):
@@ -98,18 +105,92 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None,
98
105
def predict (self , X = None ):
99
106
return self ._model_final .predict (X )
100
107
101
- def score (self , Y , T , X = None , W = None , Z = None , nuisances = None , sample_weight = None , groups = None ):
108
+ def score (self , Y , T , X = None , W = None , Z = None , nuisances = None , sample_weight = None , groups = None ,
109
+ scoring = 'mean_squared_error' ):
110
+ """
111
+ Score final model fit of residualized outcomes from residualized treatments and nuisances.
112
+
113
+ The default scoring method "mean_squared_error" is the score used to fit residualized
114
+ outcomes from residualized treatments and nuisances, and reproduces the behavior of this
115
+ score function from before the scoring method option.
116
+
117
+ :param Y: Unused
118
+ :param T: Unused
119
+ :param X: Combined nuisances, treatments and instruments to call _model_final.predict
120
+ :param W: Unused
121
+ :param Z: Unused
122
+ :param nuisances: tuple of the outcome (Y) residuals and treatment (T) residuals
123
+ :param sample_weight: Optional weighting on the samples
124
+ :param groups: Unused
125
+ :param scoring: Optional alternative scoring metric from sklearn.get_scorer
126
+ :return: Float score
127
+ """
102
128
Y_res , T_res = nuisances
103
129
if Y_res .ndim == 1 :
104
130
Y_res = Y_res .reshape ((- 1 , 1 ))
105
131
if T_res .ndim == 1 :
106
132
T_res = T_res .reshape ((- 1 , 1 ))
107
133
effects = self ._model_final .predict (X ).reshape ((- 1 , Y_res .shape [1 ], T_res .shape [1 ]))
108
134
Y_res_pred = np .einsum ('ijk,ik->ij' , effects , T_res ).reshape (Y_res .shape )
135
+ return _ModelFinal ._wrap_scoring (Y_true = Y_res , Y_pred = Y_res_pred , scoring = scoring , sample_weight = sample_weight )
136
+
137
+
138
+ @staticmethod
139
+ def _wrap_scoring (scoring :Union [str , Callable ], Y_true , Y_pred , sample_weight = None ):
140
+ """
141
+ Pull the scoring function from sklearn.get_scorer and call it with Y_true, Y_pred.
142
+
143
+ Standard score names like "mean_squared_error" are present in sklearn scoring as
144
+ "neg_..." so score names are accepted either with or without the "neg_" prefix.
145
+ The function _score_func is called directly because the scorer objects from get_scorer()
146
+ do not accept a sample_weight parameter. The _score_func member has been available in
147
+ sklearn scorers since before sklearn 1.0. Note that custom callable score functions
148
+ are allowed but they are not validated before use; any errors will be raised.
149
+
150
+
151
+ :param scoring: A string name of a scoring function from sklearn, or any callable that will
152
+ function as thes core.
153
+ :param Y_true: True Y values
154
+ :param Y_pred: Predicted Y values
155
+ :param sample_weight: Optional weighting on the examples
156
+ :return: Float score
157
+ """
158
+ if isinstance (scoring ,str ) and scoring in get_scorer_names ():
159
+ score_fn = get_scorer (scoring )._score_func
160
+ elif isinstance (scoring ,str ) and 'neg_' + scoring in get_scorer_names ():
161
+ score_fn = get_scorer ('neg_' + scoring )._score_func
162
+ elif callable (scoring ):
163
+ score_fn = scoring
164
+ else :
165
+ raise NotImplementedError (f"_wrap_scoring does not support '{ scoring } '" )
166
+
167
+ # Some score like functions are partial to np.array and not np.ndarray with shape (N,1)
168
+ Y_true = Y_true .squeeze () if len (Y_true .shape )== 2 and Y_true .shape [1 ]== 1 else Y_true
169
+ Y_pred = Y_pred .squeeze () if len (Y_pred .shape )== 2 and Y_pred .shape [1 ]== 1 else Y_pred
109
170
if sample_weight is not None :
110
- return np . mean ( np . average (( Y_res - Y_res_pred ) ** 2 , weights = sample_weight , axis = 0 ) )
171
+ res = score_fn ( Y_true , Y_pred , sample_weight = sample_weight )
111
172
else :
112
- return np .mean ((Y_res - Y_res_pred ) ** 2 )
173
+ res = score_fn (Y_true , Y_pred )
174
+
175
+ return res
176
+
177
+
178
+ @staticmethod
179
+ def wrap_scoring (scoring , Y_true , Y_pred , sample_weight = None , score_by_dim = False ):
180
+ """
181
+ In case the caller wants a score for each dimension of a multiple treatment model.
182
+
183
+ Loop over the call to the single score wrapper.
184
+ """
185
+ if not score_by_dim :
186
+ return _ModelFinal ._wrap_scoring (scoring , Y_true , Y_pred , sample_weight )
187
+ else :
188
+ assert Y_true .shape == Y_pred .shape , "Mismatch shape in wrap_scoring"
189
+ n_out = Y_pred .shape [1 ]
190
+ res = [None ]* Y_pred .shape [1 ]
191
+ for yidx in range (n_out ):
192
+ res [yidx ]= _ModelFinal .wrap_scoring (scoring , Y_true [:,yidx ], Y_pred [:,yidx ], sample_weight )
193
+ return res
113
194
114
195
115
196
class _RLearner (_OrthoLearner ):
@@ -255,13 +336,13 @@ def _gen_rlearner_model_final(self):
255
336
>>> est.effect(np.ones((1,1)), T0=0, T1=10)
256
337
array([9.996314...])
257
338
>>> est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])
258
- np.float64( 9.73638006...e-05)
339
+ 9.73638006...e-05
259
340
>>> est.rlearner_model_final_.model
260
341
LinearRegression(fit_intercept=False)
261
342
>>> est.rlearner_model_final_.model.coef_
262
343
array([0.999631...])
263
344
>>> est.score_
264
- np.float64( 9.82623204...e-05)
345
+ 9.82623204...e-05
265
346
>>> [mdl._model for mdls in est.models_y for mdl in mdls]
266
347
[LinearRegression(), LinearRegression()]
267
348
>>> [mdl._model for mdls in est.models_t for mdl in mdls]
@@ -422,7 +503,7 @@ def fit(self, Y, T, *, X=None, W=None, sample_weight=None, freq_weight=None, sam
422
503
cache_values = cache_values ,
423
504
inference = inference )
424
505
425
- def score (self , Y , T , X = None , W = None , sample_weight = None ):
506
+ def score (self , Y , T , X = None , W = None , sample_weight = None , scoring = None ):
426
507
"""
427
508
Score the fitted CATE model on a new data set.
428
509
@@ -453,7 +534,7 @@ def score(self, Y, T, X=None, W=None, sample_weight=None):
453
534
The MSE of the final CATE model on the new data.
454
535
"""
455
536
# Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring
456
- return super ().score (Y , T , X = X , W = W , sample_weight = sample_weight )
537
+ return super ().score (Y , T , X = X , W = W , sample_weight = sample_weight , scoring = scoring )
457
538
458
539
@property
459
540
def rlearner_model_final_ (self ):
@@ -493,3 +574,68 @@ def residuals_(self):
493
574
"Set to `True` to enable residual storage." )
494
575
Y_res , T_res = self ._cached_values .nuisances
495
576
return Y_res , T_res , self ._cached_values .X , self ._cached_values .W
577
+
578
+ @staticmethod
579
+ def scoring_name (scoring : Union [str ,Callable ,None ])-> str :
580
+ if scoring is None :
581
+ return 'default_score'
582
+ elif isinstance (scoring ,str ):
583
+ return scoring
584
+ elif callable (scoring ):
585
+ return scoring .__name__
586
+ else :
587
+ raise ValueError ("Scoring should be str|Callable|None" )
588
+
589
+
590
+ def score_nuisances (self , Y , T , X = None , W = None , Z = None , sample_weight = None , y_scoring = None ,
591
+ t_scoring = None , t_score_by_dim = False ):
592
+ """
593
+ Score the fitted nuisance models on arbitrary data and using any supported sklearn scoring.
594
+
595
+ Parameters
596
+ ----------
597
+ Y: (n, d_y) matrix or vector of length n
598
+ Outcomes for each sample
599
+ T: (n, d_t) matrix or vector of length n
600
+ Treatments for each sample
601
+ X: (n, d_x) matrix, optional
602
+ Features for each sample
603
+ W: (n, d_w) matrix, optional
604
+ Controls for each sample
605
+ Z: (n, d_z) matrix, optional
606
+ Instruments for each sample
607
+ sample_weight:(n,) vector, optional
608
+ Weights for each samples
609
+ t_scoring: str, optional
610
+ Name of an sklearn scoring function to use instead of the default for model_t, choices
611
+ are from sklearn.get_scoring_names() plus pearsonr
612
+ y_scoring: str, optional
613
+ Name of an sklearn scoring function to use instead of the default for model_y, choices
614
+ are from sklearn.get_scoring_names() plus pearsonr
615
+ t_score_by_dim: bool, default=False
616
+ Score prediction of treatment dimensions separately
617
+
618
+ Returns
619
+ -------
620
+ score_dict : dict[str,list[float]]
621
+ A dictionary where the keys indicate the Y and T scores used and the values are
622
+ lists of scores, one per CV fold model.
623
+ """
624
+ Y_key = f'Y_{ _RLearner .scoring_name (y_scoring )} '
625
+ T_Key = f'T_{ _RLearner .scoring_name (t_scoring )} '
626
+ score_dict = {
627
+ Y_key : [],
628
+ T_Key : []
629
+ }
630
+
631
+ # For discrete treatments, these will have to be one hot encoded
632
+ Y_2_score = pd .get_dummies (Y ) if self .discrete_outcome and (len (Y .shape ) == 1 or Y .shape [1 ] == 1 ) else Y
633
+ T_2_score = pd .get_dummies (T ) if self .discrete_treatment and (len (T .shape ) == 1 or T .shape [1 ] == 1 ) else T
634
+
635
+ for m in self ._models_nuisance [0 ]:
636
+ Y_score , T_score = m .score (Y_2_score , T_2_score , X = X , W = W , Z = Z , sample_weight = sample_weight ,
637
+ y_scoring = y_scoring , t_scoring = t_scoring ,
638
+ t_score_by_dim = t_score_by_dim )
639
+ score_dict [Y_key ].append (Y_score )
640
+ score_dict [T_Key ].append (T_score )
641
+ return score_dict
0 commit comments