@@ -399,6 +399,29 @@ def __init__(
399399 )
400400
401401
402+ def _directional_distance (equations , points ):
403+ """
404+ Computes the distance of the points to the planes defined by the equations
405+ with respect to the direction of the first dimension.
406+
407+ equations : ndarray of shape (n_facets, n_dim)
408+ each row contains the coefficienst for the plane equation of the form
409+ equations[i, 0]*x_1 + ...
410+ + equations[i, -2]*x_{n_dim} = equations[i, -1]
411+ -equations[i, -1] is the offset
412+ points : ndarray of shape (n_samples, n_dim)
413+ points to compute the directional distance from
414+
415+ Returns
416+ -------
417+ directional_distance : ndarray of shape (nsamples, nequations)
418+ closest distance wrt. the first dimension of the point to the planes
419+ defined by the equations
420+ """
421+ orthogonal_distances = - (points @ equations [:, :- 1 ].T ) - equations [:, - 1 :].T
422+ return - orthogonal_distances / equations [:, :1 ].T
423+
424+
402425class DirectionalConvexHull :
403426 """
404427 Performs Sample Selection by constructing a Directional Convex Hull and determining the distance to the hull as outlined in the reference
@@ -411,6 +434,14 @@ class DirectionalConvexHull:
411434 directional convex hull construction (also known as the low-
412435 dimensional (LD) hull). By default [0] is used.
413436
437+ tolerance : float, default=1.0E-12
438+ Tolerance for the negative distances to the directional
439+ convex hull to consider a point below the convex hull. Depending
440+ if a point is below or above the convex hull the distance is
441+ differently computed. A very low value can result in a completely
442+ wrong distances. Distances cannot be distinguished from zero up
443+ to tolerance. It is recommended to leave the default setting.
444+
414445 Attributes
415446 ----------
416447
@@ -426,22 +457,20 @@ class DirectionalConvexHull:
426457 Interpolater for the features in the high-
427458 dimensional space
428459
429- interpolator_y_ : scipy.interpolate.interpnd.LinearNDInterpolator
430- Interpolater for the targets y
431-
432460 References
433461 ----------
434462 .. [dch] A. Anelli, E. A. Engel, C. J. Pickard and M. Ceriotti,
435463 Physical Review Materials, 2018.
436464 """
437465
438- def __init__ (self , low_dim_idx = None ):
466+ def __init__ (self , low_dim_idx = None , tolerance = 1e-12 ):
439467 self .low_dim_idx = low_dim_idx
440468
441469 if low_dim_idx is None :
442470 self .low_dim_idx = [0 ]
443471 else :
444472 self .low_dim_idx = low_dim_idx
473+ self .tolerance = tolerance
445474
446475 def fit (self , X , y ):
447476 """
@@ -490,39 +519,43 @@ def fit(self, X, y):
490519 # create high-dimensional feature matrix
491520 high_dim_feats = X [:, self .high_dim_idx_ ]
492521 # get scipy convex hull
493- convex_hull = ConvexHull (convex_hull_data )
522+ self . convex_hull_ = ConvexHull (convex_hull_data , incremental = True )
494523 # get normal equations to the hull simplices
495- y_normal = convex_hull .equations [:, 0 ]
524+ y_normal = self . convex_hull_ .equations [:, 0 ]
496525
497526 # get vertices_idx of the convex hull
498- self .selected_idx_ = np .unique (
499- convex_hull .simplices [np .where (y_normal < 0 )[0 ]].flatten ()
500- )
527+ directional_facets_idx = np .where (y_normal < 0 )[0 ]
528+ self .directional_simplices_ = self .convex_hull_ .simplices [
529+ directional_facets_idx
530+ ]
531+ self ._directional_equations_ = self .convex_hull_ .equations [
532+ directional_facets_idx
533+ ]
534+
535+ self .selected_idx_ = np .unique (self .directional_simplices_ .flatten ())
536+ self ._directional_points_ = convex_hull_data [self .selected_idx_ ]
501537
502538 # required for the score_feature_matrix function
503539 self .interpolator_high_dim_ = _linear_interpolator (
504540 points = convex_hull_data [self .selected_idx_ , 1 :],
505541 values = high_dim_feats [self .selected_idx_ ],
506542 )
507543
508- # required to compute the distance of the low-dimensional feature to the convex hull
509- self .interpolator_y_ = _linear_interpolator (
510- points = convex_hull_data [self .selected_idx_ , 1 :],
511- values = convex_hull_data [self .selected_idx_ , 0 ],
512- )
513-
514544 return self
515545
546+ @property
547+ def directional_vertices_ (self ):
548+ return self .selected_idx_
549+
516550 def _check_X_y (self , X , y ):
517- return check_X_y (X , y , ensure_min_features = 2 , multi_output = False )
551+ return check_X_y (X , y , ensure_min_features = 1 , multi_output = False )
518552
519553 def _check_is_fitted (self , X ):
520554 check_is_fitted (
521555 self ,
522556 [
523557 "high_dim_idx_" ,
524558 "interpolator_high_dim_" ,
525- "interpolator_y_" ,
526559 "selected_idx_" ,
527560 ],
528561 )
@@ -556,7 +589,7 @@ def score_samples(self, X, y):
556589
557590 Returns
558591 -------
559- dch_distance : numpy.array of shape (n_samples, len(high_dim_idx_))
592+ dch_distance : ndarray of shape (n_samples, len(high_dim_idx_))
560593 The distance (residuals) of samples to the convex hull in
561594 the higher-dimensional space.
562595 """
@@ -565,17 +598,46 @@ def score_samples(self, X, y):
565598
566599 # features used for the convex hull construction
567600 low_dim_feats = X [:, self .low_dim_idx ]
601+ hull_space_data = np .hstack ((y .reshape (- 1 , 1 ), low_dim_feats ))
568602
569603 # the X points projected on the convex surface
570- interpolated_y = self .interpolator_y_ ( low_dim_feats ).reshape (y .shape )
604+ return self ._directional_convex_hull_distance ( hull_space_data ).reshape (y .shape )
571605
572- if np .any (np .isnan (interpolated_y )):
573- warnings .warn (
574- "There are samples in X with a low-dimensional part that is outside of the range of the convex surface. Distance will contain nans." ,
575- UserWarning ,
576- )
606+ def _directional_convex_hull_distance (self , points ):
607+ """
608+ Get the distance to the fitted directional convex hull in the target dimension y.
609+
610+ If a point lies above the convex hull, it will have a positive distance.
611+ If a point lies below the convex hull, it will have a negative distance - this should only
612+ occur if you pass a point that the convex hull has not been fitted on in the `fit` function.
613+ If a point lies on the convex hull, it will have a distance of zero.
577614
578- return y - interpolated_y
615+ Parameters
616+ ----------
617+ points : The points for which you would like to calculate the distance to the
618+ fitted directional convex hull.
619+ """
620+ # directional distances to all plane equations
621+ all_directional_distances = _directional_distance (
622+ self ._directional_equations_ , points
623+ )
624+ print (all_directional_distances )
625+ # we get negative distances for each plane to check if any distance is below the threshold
626+ below_directional_convex_hull = np .any (
627+ all_directional_distances < - self .tolerance , axis = 1
628+ )
629+ # directional distances to corresponding plane equation
630+ directional_distances = np .zeros (len (points ))
631+ directional_distances [~ below_directional_convex_hull ] = np .min (
632+ all_directional_distances [~ below_directional_convex_hull ], axis = 1
633+ )
634+ # some distances can be positive, so we take the max of all negative distances
635+ negative_directional_distances = all_directional_distances .copy ()
636+ negative_directional_distances [all_directional_distances > 0 ] = - np .inf
637+ directional_distances [below_directional_convex_hull ] = np .max (
638+ negative_directional_distances [below_directional_convex_hull ], axis = 1
639+ )
640+ return directional_distances
579641
580642 def score_feature_matrix (self , X ):
581643 """
0 commit comments