Skip to content

Commit 9c3cd6c

Browse files
FanwangMCopilot
andauthored
Update docstrings and documentation (#281)
* Update docstrings and documentation * Update selector/measures/diversity.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 48864dd commit 9c3cd6c

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

selector/measures/diversity.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#
2323
# --
2424

25-
"""Molecule dataset diversity calculation module."""
25+
"""Subset diversity calculation module."""
2626

2727
import warnings
2828

@@ -308,7 +308,7 @@ def wdud(x: np.ndarray) -> float:
308308
Returns
309309
-------
310310
float :
311-
The mean of the WDUD of each feature over all molecules.
311+
The mean of the WDUD of each feature over all samples.
312312
313313
Notes
314314
-----
@@ -376,25 +376,25 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
376376
.. math::
377377
g(S) = \sum_{i < j}^k O(i, j) + \sum^k_m E(m)
378378
379-
where :math:`i, j` is over the subset of molecules,
379+
where :math:`i, j` is over the subset of samples, e.g. molecules,
380380
:math:`O(i, j)` is the approximate overlap between hyperspheres,
381381
:math:`k` is the number of features and :math:`E`
382-
is the edge penalty of a molecule.
382+
is the edge penalty of a sample.
383383
384384
Lower values mean more diversity.
385385
386386
Parameters
387387
----------
388388
x : ndarray
389-
Feature matrix of all molecules.
389+
Feature matrix of all samples.
390390
x_subset : ndarray
391-
Feature matrix of selected subset of molecules.
391+
Feature matrix of selected subset of samples.
392392
393393
Returns
394394
-------
395395
float :
396396
The approximate overlapping volume of hyperspheres
397-
drawn around the selected points/molecules.
397+
drawn around the selected points/samples.
398398
399399
Notes
400400
-----
@@ -409,7 +409,7 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
409409
"Input matrix cannot be binary because the diversity measurement is designed for continuous orthogonal features."
410410
)
411411

412-
# Find the maximum and minimum over each feature across all molecules.
412+
# Find the maximum and minimum over each feature across all samples.
413413
max_x = np.max(x, axis=0)
414414
min_x = np.min(x, axis=0)
415415

@@ -432,9 +432,7 @@ def hypersphere_overlap_of_subset(x: np.ndarray, x_subset: np.array) -> float:
432432
# r_o = hypersphere radius
433433
r_o = d * np.sqrt(1 / k)
434434
if r_o > 0.5:
435-
warnings.warn(
436-
"The number of molecules should be much larger" " than the number of features."
437-
)
435+
warnings.warn("The number of samples should be much larger" " than the number of features.")
438436
g_s = 0
439437
edge = 0
440438

0 commit comments

Comments
 (0)