Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.

Commit e6680b3

Browse files
committed
Get tests passing under sklearn 0.20.1 and python 3.7. Had to compile with latest master of cython and remove score_samples method.
1 parent dde5f89 commit e6680b3

File tree

5 files changed

+144
-87
lines changed

5 files changed

+144
-87
lines changed

pyearth/_knot_search.c

Lines changed: 40 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyearth/_knot_search.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ cdef class MultipleOutcomeDependentData:
8282
self.weights = weights
8383

8484
@classmethod
85-
def alloc(cls, FLOAT_t[:,:] y, w, INDEX_t m, INDEX_t n_outcomes, INDEX_t max_terms, FLOAT_t zero_tol):
85+
def alloc(cls, const FLOAT_t[:,:] y, w, INDEX_t m, INDEX_t n_outcomes, INDEX_t max_terms, FLOAT_t zero_tol):
8686
cdef list weights
8787
cdef list outcomes
8888
cdef int i, n_weights

pyearth/earth.py

Lines changed: 71 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@
44
from ._types import BOOL
55
from sklearn.base import RegressorMixin, BaseEstimator, TransformerMixin
66
from sklearn.utils.validation import (assert_all_finite, check_is_fitted,
7-
check_X_y)
7+
check_X_y, check_array)
88
import numpy as np
99
from scipy import sparse
1010
from ._version import get_versions
11+
try:
12+
from sklearn.utils.estimator_checks import check_complex_data
13+
except ImportError:
14+
check_complex_data = lambda x: x
15+
1116
__version__ = get_versions()['version']
1217

1318
class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
@@ -256,11 +261,11 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
256261
specified, then it is dict where each key is a feature importance type
257262
name and its corresponding value is an array of shape m.
258263
259-
`_version`: string
260-
The version of py-earth in which the Earth object was originally
261-
created. This information may be useful when dealing with
264+
`fit_version_`: string
265+
The version of py-earth with which the Earth object was originally
266+
fitted. This information may be useful when dealing with
262267
serialized Earth objects.
263-
268+
264269
265270
References
266271
----------
@@ -293,7 +298,7 @@ class Earth(BaseEstimator, RegressorMixin, TransformerMixin):
293298
'feature_importance_type',
294299
'verbose'
295300
])
296-
301+
297302
def __init__(self, max_terms=None, max_degree=None, allow_missing=False,
298303
penalty=None, endspan_alpha=None, endspan=None,
299304
minspan_alpha=None, minspan=None,
@@ -323,7 +328,6 @@ def __init__(self, max_terms=None, max_degree=None, allow_missing=False,
323328
self.enable_pruning = enable_pruning
324329
self.feature_importance_type = feature_importance_type
325330
self.verbose = verbose
326-
self._version = __version__
327331

328332
def __eq__(self, other):
329333
if self.__class__ is not other.__class__:
@@ -399,6 +403,7 @@ def _scrub_x(self, X, missing, **kwargs):
399403
raise TypeError('A sparse matrix was passed, but dense data '
400404
'is required. Use X.toarray() to convert to '
401405
'dense.')
406+
check_array(X, ensure_2d=True, force_all_finite=False)
402407
X = np.asarray(X, dtype=np.float64, order='F')
403408

404409
# Figure out missingness
@@ -466,14 +471,16 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
466471

467472
# Convert y to internally used data type
468473
y = np.asarray(y, dtype=np.float64)
469-
assert_all_finite(y)
470474

471475
if len(y.shape) == 1:
472476
y = y[:, np.newaxis]
473477

474478
# Deal with sample_weight
475479
if sample_weight is None:
476-
sample_weight = np.ones((y.shape[0], 1), dtype=y.dtype)
480+
try:
481+
sample_weight = np.ones((y.shape[0], 1), dtype=y.dtype)
482+
except:
483+
raise
477484
else:
478485
sample_weight = np.asarray(sample_weight, dtype=np.float64)
479486
assert_all_finite(sample_weight)
@@ -501,10 +508,11 @@ def _scrub(self, X, y, sample_weight, output_weight, missing, **kwargs):
501508

502509
# Make sure everything is finite (except X, which is allowed to have
503510
# missing values)
504-
assert_all_finite(missing)
505-
assert_all_finite(y)
506-
assert_all_finite(sample_weight)
507-
assert_all_finite(output_weight)
511+
check_array(missing)
512+
check_array(y, ensure_2d=False)
513+
check_array(sample_weight, ensure_2d=False)
514+
if output_weight is not None:
515+
check_array(output_weight, ensure_2d=False)
508516

509517
# Make sure everything is consistent
510518
check_X_y(X, y, accept_sparse=False, multi_output=True,
@@ -598,7 +606,7 @@ def fit(self, X, y=None,
598606
for k in feature_importance_type:
599607
if k not in FEAT_IMP_CRITERIA:
600608
msg = ("'{}' is not valid value for feature_importance, "
601-
"allowed critera are : {}".format(k, FEAT_IMP_CRITERIA))
609+
"allowed criteria are : {}".format(k, FEAT_IMP_CRITERIA))
602610
raise ValueError(msg)
603611

604612
if len(feature_importance_type) > 0 and self.enable_pruning is False:
@@ -622,6 +630,10 @@ def fit(self, X, y=None,
622630
self.basis_ = self.basis_.smooth(X)
623631
self.linear_fit(X, y, sample_weight, output_weight, missing,
624632
skip_scrub=True)
633+
634+
# Record the version used for fitting
635+
self.fit_version_ = __version__
636+
625637
return self
626638

627639
# def forward_pass2(self, X, y=None,
@@ -1271,51 +1283,51 @@ def score(self, X, y=None, sample_weight=None, output_weight=None,
12711283
# mse0 = np.sum(y_sqr * output_weight) / m
12721284
return 1 - (mse / mse0)
12731285

1274-
def score_samples(self, X, y=None, missing=None):
1275-
'''
1276-
1277-
Calculate sample-wise fit scores.
1278-
1279-
Parameters
1280-
----------
1281-
1282-
X : array-like, shape = [m, n] where m is the number of samples
1283-
and n is the number of features The training predictors.
1284-
The X parameter can be a numpy array, a pandas DataFrame, a patsy
1285-
DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1286-
by patsy.dmatrices.
1287-
1288-
y : array-like, optional (default=None), shape = [m, p] where m is the
1289-
number of samples, p the number of outputs.
1290-
The y parameter can be a numpy array, a pandas DataFrame,
1291-
a Patsy DesignMatrix, or can be left as None (default) if X was
1292-
the output of a call to patsy.dmatrices (in which case, X contains
1293-
the response).
1294-
1295-
missing : array-like, shape = [m, n] where m is the number of samples
1296-
and n is the number of features.
1297-
The missing parameter can be a numpy array, a pandas DataFrame, or
1298-
a patsy DesignMatrix. All entries will be interpreted as boolean
1299-
values, with True indicating the corresponding entry in X should be
1300-
interpreted as missing. If the missing argument not used but the X
1301-
argument is a pandas DataFrame, missing will be inferred from X if
1302-
allow_missing is True.
1303-
1304-
Returns
1305-
-------
1306-
1307-
scores : array of shape=[m, p] of floats with maximum value of 1
1308-
(it can be negative).
1309-
The scores represent how good each output of each example is
1310-
predicted, a perfect score would be 1
1311-
(the score can be negative).
1312-
1313-
'''
1314-
X, y, sample_weight, output_weight, missing = self._scrub(
1315-
X, y, None, None, missing)
1316-
y_hat = self.predict(X, missing=missing)
1317-
residual = 1 - (y - y_hat) ** 2 / y**2
1318-
return residual
1286+
# def score_samples(self, X, y, missing=None):
1287+
# '''
1288+
#
1289+
# Calculate sample-wise fit scores.
1290+
#
1291+
# Parameters
1292+
# ----------
1293+
#
1294+
# X : array-like, shape = [m, n] where m is the number of samples
1295+
# and n is the number of features The training predictors.
1296+
# The X parameter can be a numpy array, a pandas DataFrame, a patsy
1297+
# DesignMatrix, or a tuple of patsy DesignMatrix objects as output
1298+
# by patsy.dmatrices.
1299+
#
1300+
# y : array-like, optional (default=None), shape = [m, p] where m is the
1301+
# number of samples, p the number of outputs.
1302+
# The y parameter can be a numpy array, a pandas DataFrame,
1303+
# a Patsy DesignMatrix, or can be left as None (default) if X was
1304+
# the output of a call to patsy.dmatrices (in which case, X contains
1305+
# the response).
1306+
#
1307+
# missing : array-like, shape = [m, n] where m is the number of samples
1308+
# and n is the number of features.
1309+
# The missing parameter can be a numpy array, a pandas DataFrame, or
1310+
# a patsy DesignMatrix. All entries will be interpreted as boolean
1311+
# values, with True indicating the corresponding entry in X should be
1312+
# interpreted as missing. If the missing argument not used but the X
1313+
# argument is a pandas DataFrame, missing will be inferred from X if
1314+
# allow_missing is True.
1315+
#
1316+
# Returns
1317+
# -------
1318+
#
1319+
# scores : array of shape=[m, p] of floats with maximum value of 1
1320+
# (it can be negative).
1321+
# The scores represent how good each output of each example is
1322+
# predicted, a perfect score would be 1
1323+
# (the score can be negative).
1324+
#
1325+
# '''
1326+
# X, y, sample_weight, output_weight, missing = self._scrub(
1327+
# X, y, None, None, missing)
1328+
# y_hat = self.predict(X, missing=missing)
1329+
# residual = 1 - (y - y_hat) ** 2 / y**2
1330+
# return residual
13191331

13201332
def transform(self, X, missing=None):
13211333
'''

0 commit comments

Comments
 (0)