4
4
from ._types import BOOL
5
5
from sklearn .base import RegressorMixin , BaseEstimator , TransformerMixin
6
6
from sklearn .utils .validation import (assert_all_finite , check_is_fitted ,
7
- check_X_y )
7
+ check_X_y , check_array )
8
8
import numpy as np
9
9
from scipy import sparse
10
10
from ._version import get_versions
11
+ try :
12
+ from sklearn .utils .estimator_checks import check_complex_data
13
+ except ImportError :
14
+ check_complex_data = lambda x : x
15
+
11
16
__version__ = get_versions ()['version' ]
12
17
13
18
class Earth (BaseEstimator , RegressorMixin , TransformerMixin ):
@@ -256,11 +261,11 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
256
261
specified, then it is dict where each key is a feature importance type
257
262
name and its corresponding value is an array of shape m.
258
263
259
- `_version `: string
260
- The version of py-earth in which the Earth object was originally
261
- created . This information may be useful when dealing with
264
+ `fit_version_ `: string
265
+ The version of py-earth with which the Earth object was originally
266
+ fitted . This information may be useful when dealing with
262
267
serialized Earth objects.
263
-
268
+
264
269
265
270
References
266
271
----------
@@ -293,7 +298,7 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
293
298
'feature_importance_type' ,
294
299
'verbose'
295
300
])
296
-
301
+
297
302
def __init__ (self , max_terms = None , max_degree = None , allow_missing = False ,
298
303
penalty = None , endspan_alpha = None , endspan = None ,
299
304
minspan_alpha = None , minspan = None ,
@@ -323,7 +328,6 @@ def __init__(self, max_terms=None, max_degree=None, allow_missing=False,
323
328
self .enable_pruning = enable_pruning
324
329
self .feature_importance_type = feature_importance_type
325
330
self .verbose = verbose
326
- self ._version = __version__
327
331
328
332
def __eq__ (self , other ):
329
333
if self .__class__ is not other .__class__ :
@@ -399,6 +403,7 @@ def _scrub_x(self, X, missing, **kwargs):
399
403
raise TypeError ('A sparse matrix was passed, but dense data '
400
404
'is required. Use X.toarray() to convert to '
401
405
'dense.' )
406
+ check_array (X , ensure_2d = True , force_all_finite = False )
402
407
X = np .asarray (X , dtype = np .float64 , order = 'F' )
403
408
404
409
# Figure out missingness
@@ -466,14 +471,16 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
466
471
467
472
# Convert y to internally used data type
468
473
y = np .asarray (y , dtype = np .float64 )
469
- assert_all_finite (y )
470
474
471
475
if len (y .shape ) == 1 :
472
476
y = y [:, np .newaxis ]
473
477
474
478
# Deal with sample_weight
475
479
if sample_weight is None :
476
- sample_weight = np .ones ((y .shape [0 ], 1 ), dtype = y .dtype )
480
+ try :
481
+ sample_weight = np .ones ((y .shape [0 ], 1 ), dtype = y .dtype )
482
+ except :
483
+ raise
477
484
else :
478
485
sample_weight = np .asarray (sample_weight , dtype = np .float64 )
479
486
assert_all_finite (sample_weight )
@@ -501,10 +508,11 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
501
508
502
509
# Make sure everything is finite (except X, which is allowed to have
503
510
# missing values)
504
- assert_all_finite (missing )
505
- assert_all_finite (y )
506
- assert_all_finite (sample_weight )
507
- assert_all_finite (output_weight )
511
+ check_array (missing )
512
+ check_array (y , ensure_2d = False )
513
+ check_array (sample_weight , ensure_2d = False )
514
+ if output_weight is not None :
515
+ check_array (output_weight , ensure_2d = False )
508
516
509
517
# Make sure everything is consistent
510
518
check_X_y (X , y , accept_sparse = False , multi_output = True ,
@@ -598,7 +606,7 @@ def fit(self, X, y=None,
598
606
for k in feature_importance_type :
599
607
if k not in FEAT_IMP_CRITERIA :
600
608
msg = ("'{}' is not valid value for feature_importance, "
601
- "allowed critera are : {}" .format (k , FEAT_IMP_CRITERIA ))
609
+ "allowed criteria are : {}" .format (k , FEAT_IMP_CRITERIA ))
602
610
raise ValueError (msg )
603
611
604
612
if len (feature_importance_type ) > 0 and self .enable_pruning is False :
@@ -622,6 +630,10 @@ def fit(self, X, y=None,
622
630
self .basis_ = self .basis_ .smooth (X )
623
631
self .linear_fit (X , y , sample_weight , output_weight , missing ,
624
632
skip_scrub = True )
633
+
634
+ # Record the version used for fitting
635
+ self .fit_version_ = __version__
636
+
625
637
return self
626
638
627
639
# def forward_pass2(self, X, y=None,
@@ -1271,51 +1283,51 @@ def score(self, X, y=None, sample_weight=None, output_weight=None,
1271
1283
# mse0 = np.sum(y_sqr * output_weight) / m
1272
1284
return 1 - (mse / mse0 )
1273
1285
1274
- def score_samples (self , X , y = None , missing = None ):
1275
- '''
1276
-
1277
- Calculate sample-wise fit scores.
1278
-
1279
- Parameters
1280
- ----------
1281
-
1282
- X : array-like, shape = [m, n] where m is the number of samples
1283
- and n is the number of features The training predictors.
1284
- The X parameter can be a numpy array, a pandas DataFrame, a patsy
1285
- DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1286
- by patsy.dmatrices.
1287
-
1288
- y : array-like, optional (default=None), shape = [m, p] where m is the
1289
- number of samples, p the number of outputs.
1290
- The y parameter can be a numpy array, a pandas DataFrame,
1291
- a Patsy DesignMatrix, or can be left as None (default) if X was
1292
- the output of a call to patsy.dmatrices (in which case, X contains
1293
- the response).
1294
-
1295
- missing : array-like, shape = [m, n] where m is the number of samples
1296
- and n is the number of features.
1297
- The missing parameter can be a numpy array, a pandas DataFrame, or
1298
- a patsy DesignMatrix. All entries will be interpreted as boolean
1299
- values, with True indicating the corresponding entry in X should be
1300
- interpreted as missing. If the missing argument not used but the X
1301
- argument is a pandas DataFrame, missing will be inferred from X if
1302
- allow_missing is True.
1303
-
1304
- Returns
1305
- -------
1306
-
1307
- scores : array of shape=[m, p] of floats with maximum value of 1
1308
- (it can be negative).
1309
- The scores represent how good each output of each example is
1310
- predicted, a perfect score would be 1
1311
- (the score can be negative).
1312
-
1313
- '''
1314
- X , y , sample_weight , output_weight , missing = self ._scrub (
1315
- X , y , None , None , missing )
1316
- y_hat = self .predict (X , missing = missing )
1317
- residual = 1 - (y - y_hat ) ** 2 / y ** 2
1318
- return residual
1286
+ # def score_samples(self, X, y, missing=None):
1287
+ # '''
1288
+ #
1289
+ # Calculate sample-wise fit scores.
1290
+ #
1291
+ # Parameters
1292
+ # ----------
1293
+ #
1294
+ # X : array-like, shape = [m, n] where m is the number of samples
1295
+ # and n is the number of features The training predictors.
1296
+ # The X parameter can be a numpy array, a pandas DataFrame, a patsy
1297
+ # DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1298
+ # by patsy.dmatrices.
1299
+ #
1300
+ # y : array-like, optional (default=None), shape = [m, p] where m is the
1301
+ # number of samples, p the number of outputs.
1302
+ # The y parameter can be a numpy array, a pandas DataFrame,
1303
+ # a Patsy DesignMatrix, or can be left as None (default) if X was
1304
+ # the output of a call to patsy.dmatrices (in which case, X contains
1305
+ # the response).
1306
+ #
1307
+ # missing : array-like, shape = [m, n] where m is the number of samples
1308
+ # and n is the number of features.
1309
+ # The missing parameter can be a numpy array, a pandas DataFrame, or
1310
+ # a patsy DesignMatrix. All entries will be interpreted as boolean
1311
+ # values, with True indicating the corresponding entry in X should be
1312
+ # interpreted as missing. If the missing argument not used but the X
1313
+ # argument is a pandas DataFrame, missing will be inferred from X if
1314
+ # allow_missing is True.
1315
+ #
1316
+ # Returns
1317
+ # -------
1318
+ #
1319
+ # scores : array of shape=[m, p] of floats with maximum value of 1
1320
+ # (it can be negative).
1321
+ # The scores represent how good each output of each example is
1322
+ # predicted, a perfect score would be 1
1323
+ # (the score can be negative).
1324
+ #
1325
+ # '''
1326
+ # X, y, sample_weight, output_weight, missing = self._scrub(
1327
+ # X, y, None, None, missing)
1328
+ # y_hat = self.predict(X, missing=missing)
1329
+ # residual = 1 - (y - y_hat) ** 2 / y**2
1330
+ # return residual
1319
1331
1320
1332
def transform (self , X , missing = None ):
1321
1333
'''
0 commit comments