Skip to content

Commit 1fbbfcd

Browse files
meeseeksmachineivirshupadamgayosoDriesSchaumontlazappi
authored
Backport PR #2414 on branch 1.9.x (matplotlib 3.7 compat) (#2419)
* Backport PR #2414: matplotlib 3.7 compat * fix scrublet * Update visium default plot for matplotlib 3.7 * Update hashsolo docstrings * skip plotting test that changed on mpl 3.7 if mpl < 3.7 is installed * Fix hashsolo docs (again) * update anndata-dev tests to install anndata test deps * Temporarily set warnings as errors to False for doc builds * Release notes * Fix using custom layer with highly_variable_genes (#2302) * Fix using custom layer with highly_variable_genes * Add tests * Add release note * Move release note to correct section * Format release notes * Add check for number of normalized dispersions (#2231) * Add check for number of normalized dispersions In sc.pp.highly_variable_genes() when flavor='cell_ranger' and n_top_genes is set check that enough normalized dispersions have been calculated and if not raise a warning and set n_top_genes to the number of calculated dispersions. Fixes #2230 * Use .size instead of len() * Add test for n_top_genes warning * Add release note * Remove blank line Co-authored-by: Isaac Virshup <ivirshup@gmail.com> --------- Co-authored-by: Isaac Virshup <ivirshup@gmail.com> Co-authored-by: adamgayoso <adamgayoso@users.noreply.github.com> Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Luke Zappia <lazappi@users.noreply.github.com>
1 parent 97c2617 commit 1fbbfcd

File tree

10 files changed

+66
-19
lines changed

10 files changed

+66
-19
lines changed

.azure-pipelines.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
displayName: 'Install dependencies'
5151
5252
- script: |
53-
pip install -v git+https://github.com/scverse/anndata
53+
'pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"'
5454
displayName: 'Install development anndata'
5555
condition: eq(variables['ANNDATA_DEV'], 'yes')
5656

docs/conf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@
128128

129129

130130
def setup(app):
131-
app.warningiserror = on_rtd
131+
app.warningiserror = (
132+
False # Temporarily disable warnings as errors to get 1.9.2 out
133+
)
132134

133135

134136
# -- Options for other output formats ------------------------------------------

docs/release-notes/1.9.2.md

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
### 1.9.2 {small}`the future`
2-
3-
4-
```{rubric} Documentation
5-
```
1+
### 1.9.2 {small}`2023-02-16`
62

73
```{rubric} Bug fixes
84
```
95

10-
```{rubric} Performance
11-
```
6+
* {func}`~scanpy.pp.highly_variable_genes` `layer` argument now works in tandem with `batches` {pr}`2302` {smaller}`D Schaumont`
7+
* {func}`~scanpy.pp.highly_variable_genes` with `flavor='cell_ranger'` now handles the case in {issue}`2230` where the number of calculated dispersions is less than `n_top_genes` {pr}`2231` {smaller}`L Zappia`
8+
* Fix compatibility with matplotlib 3.7 {pr}`2414` {smaller}`I Virshup` {smaller}`P Fisher`
9+
* Fix scrublet numpy matrix compatibility issue {pr}`2395` {smaller}`A Gayoso`

scanpy/external/pp/_hashsolo.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525

2626

2727
def _calculate_log_likelihoods(data, number_of_noise_barcodes):
28-
"""Calculate log likelihoods for each hypothesis, negative, singlet, doublet
28+
"""\
29+
Calculate log likelihoods for each hypothesis, negative, singlet, doublet
2930
3031
Parameters
3132
----------
@@ -43,8 +44,8 @@ def _calculate_log_likelihoods(data, number_of_noise_barcodes):
4344
"""
4445

4546
def gaussian_updates(data, mu_o, std_o):
46-
"""Update parameters of your gaussian
47-
https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
47+
"""\
48+
Update parameters of your gaussian https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
4849
4950
Parameters
5051
----------
@@ -210,7 +211,7 @@ def gaussian_updates(data, mu_o, std_o):
210211

211212

212213
def _calculate_bayes_rule(data, priors, number_of_noise_barcodes):
213-
"""
214+
"""\
214215
Calculate bayes rule from log likelihoods
215216
216217
Parameters
@@ -263,7 +264,8 @@ def hashsolo(
263264
number_of_noise_barcodes: int = None,
264265
inplace: bool = True,
265266
):
266-
"""Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_.
267+
"""\
268+
Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_.
267269
268270
.. note::
269271
More information and bug reports `here <https://github.com/calico/solo>`__.
@@ -294,9 +296,8 @@ def hashsolo(
294296
295297
Returns
296298
-------
297-
adata
298-
if inplace is False returns AnnData with demultiplexing results
299-
in .obs attribute otherwise does is in place
299+
if inplace is False returns AnnData with demultiplexing results
300+
in .obs attribute otherwise does is in place
300301
301302
Examples
302303
-------

scanpy/external/pp/_scrublet.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,11 @@ def _scrublet_call_doublets(
431431

432432
if mean_center:
433433
logg.info('Embedding transcriptomes using PCA...')
434+
# Sklearn PCA doesn't like matrices, so convert to arrays
435+
if isinstance(scrub._E_obs_norm, np.matrix):
436+
scrub._E_obs_norm = np.asarray(scrub._E_obs_norm)
437+
if isinstance(scrub._E_sim_norm, np.matrix):
438+
scrub._E_sim_norm = np.asarray(scrub._E_sim_norm)
434439
sl.pipeline_pca(
435440
scrub, n_prin_comps=n_prin_comps, random_state=scrub.random_state
436441
)

scanpy/plotting/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
VBound = Union[str, float, Callable[[Sequence[float]], float]]
3333

3434

35-
class _AxesSubplot(Axes, axes.SubplotBase, ABC):
35+
class _AxesSubplot(Axes, axes.SubplotBase):
3636
"""Intersection between Axes and SubplotBase: Has methods of both"""
3737

3838

scanpy/preprocessing/_highly_variable_genes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@ def _highly_variable_genes_single_batch(
265265
if n_top_genes > adata.n_vars:
266266
logg.info('`n_top_genes` > `adata.n_var`, returning all genes.')
267267
n_top_genes = adata.n_vars
268+
if n_top_genes > dispersion_norm.size:
269+
warnings.warn(
270+
'`n_top_genes` > number of normalized dispersions, returning all genes with normalized dispersions.',
271+
UserWarning,
272+
)
273+
n_top_genes = dispersion_norm.size
268274
disp_cut_off = dispersion_norm[n_top_genes - 1]
269275
gene_subset = np.nan_to_num(df['dispersions_norm'].values) >= disp_cut_off
270276
logg.debug(
@@ -458,6 +464,7 @@ def highly_variable_genes(
458464

459465
hvg = _highly_variable_genes_single_batch(
460466
adata_subset,
467+
layer=layer,
461468
min_disp=min_disp,
462469
max_disp=max_disp,
463470
min_mean=min_mean,
-2 Bytes
Loading

scanpy/tests/test_embedding_plots.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from functools import partial
22
from pathlib import Path
33

4+
import matplotlib as mpl
45
import matplotlib.pyplot as plt
56
from matplotlib.colors import Normalize
67
from matplotlib.testing.compare import compare_images
@@ -304,6 +305,11 @@ def test_visium_circles(image_comparer): # standard visium data
304305

305306

306307
def test_visium_default(image_comparer): # default values
308+
from packaging.version import parse as parse_version
309+
310+
if parse_version(mpl.__version__) < parse_version("3.7.0"):
311+
pytest.xfail("Matplotlib 3.7.0+ required for this test")
312+
307313
save_and_compare_images = image_comparer(ROOT, FIGS, tol=5)
308314
adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
309315
adata.obs = adata.obs.astype({'array_row': 'str'})

scanpy/tests/test_highly_variable_genes.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,26 @@ def test_highly_variable_genes_basic():
3636
assert 'highly_variable_intersection' in adata.var.columns
3737

3838
adata = sc.datasets.blobs()
39-
adata.obs['batch'] = np.random.binomial(4, 0.5, size=(adata.n_obs))
39+
batch = np.random.binomial(4, 0.5, size=(adata.n_obs))
40+
adata.obs['batch'] = batch
4041
adata.obs['batch'] = adata.obs['batch'].astype('category')
4142
sc.pp.highly_variable_genes(adata, batch_key='batch', n_top_genes=3)
4243
assert 'highly_variable_nbatches' in adata.var.columns
4344
assert adata.var['highly_variable'].sum() == 3
45+
highly_var_first_layer = adata.var['highly_variable']
46+
47+
adata = sc.datasets.blobs()
48+
new_layer = adata.X.copy()
49+
np.random.shuffle(new_layer)
50+
adata.layers['test_layer'] = new_layer
51+
adata.obs['batch'] = batch
52+
adata.obs['batch'] = adata.obs['batch'].astype('category')
53+
sc.pp.highly_variable_genes(
54+
adata, batch_key='batch', n_top_genes=3, layer='test_layer'
55+
)
56+
assert 'highly_variable_nbatches' in adata.var.columns
57+
assert adata.var['highly_variable'].sum() == 3
58+
assert (highly_var_first_layer != adata.var['highly_variable']).any()
4459

4560
sc.pp.highly_variable_genes(adata)
4661
no_batch_hvg = adata.var.highly_variable.copy()
@@ -491,3 +506,16 @@ def test_seurat_v3_mean_var_output_with_batchkey():
491506
)
492507
np.testing.assert_allclose(true_mean, result_df['means'], rtol=2e-05, atol=2e-05)
493508
np.testing.assert_allclose(true_var, result_df['variances'], rtol=2e-05, atol=2e-05)
509+
510+
511+
def test_cellranger_n_top_genes_warning():
512+
X = np.random.poisson(2, (100, 30))
513+
adata = sc.AnnData(X, dtype=X.dtype)
514+
sc.pp.normalize_total(adata)
515+
sc.pp.log1p(adata)
516+
517+
with pytest.warns(
518+
UserWarning,
519+
match="`n_top_genes` > number of normalized dispersions, returning all genes with normalized dispersions.",
520+
):
521+
sc.pp.highly_variable_genes(adata, n_top_genes=1000, flavor="cell_ranger")

0 commit comments

Comments
 (0)