2222#
2323# --
2424
25- """Molecule dataset diversity calculation module."""
25+ """Subset diversity calculation module."""
2626
2727import warnings
2828
@@ -308,7 +308,7 @@ def wdud(x: np.ndarray) -> float:
308308 Returns
309309 -------
310310 float :
311- The mean of the WDUD of each feature over all molecules .
311+ The mean of the WDUD of each feature over all samples .
312312
313313 Notes
314314 -----
@@ -376,25 +376,25 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
376376 .. math::
377377 g(S) = \sum_{i < j}^k O(i, j) + \sum^k_m E(m)
378378
379- where :math:`i, j` is over the subset of molecules,
379+ where :math:`i, j` is over the subset of samples, e.g. molecules,
380380 :math:`O(i, j)` is the approximate overlap between hyperspheres,
381381 :math:`k` is the number of features and :math:`E`
382- is the edge penalty of a molecule .
382+ is the edge penalty of a sample .
383383
384384 Lower values mean more diversity.
385385
386386 Parameters
387387 ----------
388388 x : ndarray
389- Feature matrix of all molecules .
389+ Feature matrix of all samples .
390390 x_subset : ndarray
391- Feature matrix of selected subset of molecules .
391+ Feature matrix of selected subset of samples .
392392
393393 Returns
394394 -------
395395 float :
396396 The approximate overlapping volume of hyperspheres
397- drawn around the selected points/molecules .
397+ drawn around the selected points/samples .
398398
399399 Notes
400400 -----
@@ -409,7 +409,7 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
409409 "Input matrix cannot be binary because the diversity measurement is designed for continuous orthogonal features."
410410 )
411411
412- # Find the maximum and minimum over each feature across all molecules .
412+ # Find the maximum and minimum over each feature across all samples .
413413 max_x = np .max (x , axis = 0 )
414414 min_x = np .min (x , axis = 0 )
415415
@@ -432,9 +432,7 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
432432 # r_o = hypersphere radius
433433 r_o = d * np .sqrt (1 / k )
434434 if r_o > 0.5 :
435- warnings .warn (
436- "The number of molecules should be much larger" " than the number of features."
437- )
435+ warnings .warn ("The number of samples should be much larger" " than the number of features." )
438436 g_s = 0
439437 edge = 0
440438
0 commit comments