Remove use of AnnData constructor dtype kwarg (#2658) (#2659)

ivirshup · web-flow · commit 46969b4d3062 · 2023-09-08T20:44:58.000Z
* Remove use of AnnData constructor dtype kwarg (#2658) * Remove use of AnnData constructor dtype kwarg * release note * Fix release note (cherry picked from commit 0b49eeb) * Fix test * Set release date * Add release notes to index
diff --git a/docs/release-notes/1.9.5.md b/docs/release-notes/1.9.5.md
@@ -0,0 +1,6 @@
+### 1.9.5 {small}`2023-09-08`
+
+```{rubric} Bug fixes
+```
+
+- Remove use of deprecated `dtype` argument to AnnData constructor {pr}`2658` {smaller}`Isaac Virshup`
diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md
@@ -1,5 +1,8 @@
 ## Version 1.9
 
+```{include} /release-notes/1.9.5.md
+```
+
 ```{include} /release-notes/1.9.4.md
 ```
 
diff --git a/scanpy/datasets/_datasets.py b/scanpy/datasets/_datasets.py
@@ -49,7 +49,7 @@ def blobs(
         cluster_std=cluster_std,
         random_state=0,
     )
-    return ad.AnnData(X, obs=dict(blobs=y.astype(str)), dtype=X.dtype)
+    return ad.AnnData(X, obs=dict(blobs=y.astype(str)))
 
 
 @check_datasetdir_exists
@@ -172,13 +172,13 @@ def paul15() -> ad.AnnData:
     backup_url = 'http://falexwolf.de/data/paul15.h5'
     _utils.check_presence_download(filename, backup_url)
     with h5py.File(filename, 'r') as f:
-        X = f['data.debatched'][()]
+        X = f['data.debatched'][()].astype(np.float32)
         gene_names = f['data.debatched_rownames'][()].astype(str)
         cell_names = f['data.debatched_colnames'][()].astype(str)
         clusters = f['cluster.id'][()].flatten().astype(int)
         infogenes_names = f['info.genes_strings'][()].astype(str)
     # each row has to correspond to a observation, therefore transpose
-    adata = ad.AnnData(X.transpose(), dtype=np.float32)
+    adata = ad.AnnData(X.transpose())
     adata.var_names = gene_names
     adata.row_names = cell_names
     # names reflecting the cell type identifications from the paper
diff --git a/scanpy/testing/_helpers/__init__.py b/scanpy/testing/_helpers/__init__.py
@@ -21,7 +21,7 @@
 
 def check_rep_mutation(func, X, *, fields=("layer", "obsm"), **kwargs):
     """Check that only the array meant to be modified is modified."""
-    adata = sc.AnnData(X=X.copy(), dtype=X.dtype)
+    adata = sc.AnnData(X=X.copy())
     for field in fields:
         sc.get._set_obs_rep(adata, X, **{field: field})
     X_array = asarray(X)
diff --git a/scanpy/tests/test_get.py b/scanpy/tests/test_get.py
@@ -37,15 +37,14 @@ def adata():
     adata.layers['double'] is sparse np.ones((2,2)) * 2 to also test sparse matrices
     """
     return AnnData(
-        X=np.ones((2, 2)),
+        X=np.ones((2, 2), dtype=int),
         obs=pd.DataFrame(
             {"obs1": [0, 1], "obs2": ["a", "b"]}, index=["cell1", "cell2"]
         ),
         var=pd.DataFrame(
             {"gene_symbols": ["genesymbol1", "genesymbol2"]}, index=["gene1", "gene2"]
         ),
         layers={"double": sparse.csr_matrix(np.ones((2, 2)), dtype=int) * 2},
-        dtype=int,
     )
 
 
@@ -60,12 +59,11 @@ def test_obs_df(adata):
 
     # make raw with different genes than adata
     adata.raw = AnnData(
-        X=np.array([[1, 2, 3], [2, 4, 6]]),
+        X=np.array([[1, 2, 3], [2, 4, 6]], dtype=np.float64),
         var=pd.DataFrame(
             {"gene_symbols": ["raw1", "raw2", 'raw3']},
             index=["gene2", "gene3", "gene4"],
         ),
-        dtype='float64',
     )
     pd.testing.assert_frame_equal(
         sc.get.obs_df(
@@ -157,9 +155,8 @@ def test_repeated_gene_symbols():
     gene_symbols = [f"symbol_{i}" for i in ["a", "b", "b", "c"]]
     var_names = pd.Index([f"id_{i}" for i in ["a", "b.1", "b.2", "c"]])
     adata = sc.AnnData(
-        np.arange(3 * 4).reshape((3, 4)),
+        np.arange(3 * 4, dtype=np.float32).reshape((3, 4)),
         var=pd.DataFrame({"gene_symbols": gene_symbols}, index=var_names),
-        dtype=np.float32,
     )
 
     with pytest.raises(KeyError, match="symbol_b"):
diff --git a/scanpy/tests/test_highly_variable_genes.py b/scanpy/tests/test_highly_variable_genes.py
@@ -483,7 +483,7 @@ def test_seurat_v3_mean_var_output_with_batchkey():
 
 def test_cellranger_n_top_genes_warning():
     X = np.random.poisson(2, (100, 30))
-    adata = sc.AnnData(X, dtype=X.dtype)
+    adata = sc.AnnData(X)
     sc.pp.normalize_total(adata)
     sc.pp.log1p(adata)
 
diff --git a/scanpy/tests/test_normalization.py b/scanpy/tests/test_normalization.py
@@ -37,14 +37,14 @@ def typ(request):
 
 
 @pytest.mark.parametrize('dtype', ['float32', 'int64'])
-def test_normalize_total(typ, dtype):
-    adata = AnnData(typ(X_total), dtype=dtype)
+def test_normalize_total(array_type, dtype):
+    adata = AnnData(array_type(X_total).astype(dtype))
     sc.pp.normalize_total(adata, key_added='n_counts')
     assert np.allclose(np.ravel(adata.X.sum(axis=1)), [3.0, 3.0, 3.0])
     sc.pp.normalize_total(adata, target_sum=1, key_added='n_counts2')
     assert np.allclose(np.ravel(adata.X.sum(axis=1)), [1.0, 1.0, 1.0])
 
-    adata = AnnData(typ(X_frac), dtype=dtype)
+    adata = AnnData(array_type(X_frac).astype(dtype))
     sc.pp.normalize_total(adata, exclude_highly_expressed=True, max_fraction=0.7)
     assert np.allclose(np.ravel(adata.X[:, 1:3].sum(axis=1)), [1.0, 1.0, 1.0])
 
@@ -59,17 +59,17 @@ def test_normalize_total_rep(typ, dtype):
 
 
 @pytest.mark.parametrize('dtype', ['float32', 'int64'])
-def test_normalize_total_layers(typ, dtype):
-    adata = AnnData(typ(X_total), dtype=dtype)
+def test_normalize_total_layers(array_type, dtype):
+    adata = AnnData(array_type(X_total).astype(dtype))
     adata.layers["layer"] = adata.X.copy()
     with pytest.warns(FutureWarning, match=r".*layers.*deprecated"):
         sc.pp.normalize_total(adata, layers=["layer"])
     assert np.allclose(adata.layers["layer"].sum(axis=1), [3.0, 3.0, 3.0])
 
 
 @pytest.mark.parametrize('dtype', ['float32', 'int64'])
-def test_normalize_total_view(typ, dtype):
-    adata = AnnData(typ(X_total), dtype=dtype)
+def test_normalize_total_view(array_type, dtype):
+    adata = AnnData(array_type(X_total).astype(dtype))
     v = adata[:, :]
 
     sc.pp.normalize_total(v)
@@ -127,7 +127,7 @@ def test_normalize_pearson_residuals_values(sparsity_func, dtype, theta, clip):
         residuals_reference = (X - mu) / np.sqrt(mu + mu**2 / theta)
 
     # compute output to test
-    adata = AnnData(sparsity_func(X), dtype=dtype)
+    adata = AnnData(sparsity_func(X).astype(dtype))
     output = sc.experimental.pp.normalize_pearson_residuals(
         adata, theta=theta, clip=clip, inplace=False
     )
diff --git a/scanpy/tests/test_preprocessing.py b/scanpy/tests/test_preprocessing.py
@@ -147,7 +147,7 @@ def test_scale_array(count_matrix_format, zero_center):
     Test that running sc.pp.scale on an anndata object and an array returns the same results.
     """
     X = count_matrix_format(sp.random(100, 200, density=0.3).toarray())
-    adata = sc.AnnData(X=X.copy(), dtype=np.float64)
+    adata = sc.AnnData(X=X.copy().astype(np.float64))
 
     sc.pp.scale(adata, zero_center=zero_center)
     scaled_X = sc.pp.scale(X, zero_center=zero_center, copy=True)
@@ -254,7 +254,7 @@ def test_downsample_counts_per_cell(count_matrix_format, replace, dtype):
     TARGET = 1000
     X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
     X = X.astype(dtype)
-    adata = AnnData(X=count_matrix_format(X), dtype=dtype)
+    adata = AnnData(X=count_matrix_format(X).astype(dtype))
     with pytest.raises(ValueError):
         sc.pp.downsample_counts(
             adata, counts_per_cell=TARGET, total_counts=TARGET, replace=replace
@@ -286,7 +286,7 @@ def test_downsample_counts_per_cell_multiple_targets(
     TARGETS = np.random.randint(500, 1500, 1000)
     X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
     X = X.astype(dtype)
-    adata = AnnData(X=count_matrix_format(X), dtype=dtype)
+    adata = AnnData(X=count_matrix_format(X).astype(dtype))
     initial_totals = np.ravel(adata.X.sum(axis=1))
     with pytest.raises(ValueError):
         sc.pp.downsample_counts(adata, counts_per_cell=[40, 10], replace=replace)
@@ -312,7 +312,7 @@ def test_downsample_counts_per_cell_multiple_targets(
 def test_downsample_total_counts(count_matrix_format, replace, dtype):
     X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
     X = X.astype(dtype)
-    adata_orig = AnnData(X=count_matrix_format(X), dtype=dtype)
+    adata_orig = AnnData(X=count_matrix_format(X))
     total = X.sum()
     target = np.floor_divide(total, 10)
     initial_totals = np.ravel(adata_orig.X.sum(axis=1))
diff --git a/scanpy/tests/test_scaling.py b/scanpy/tests/test_scaling.py
@@ -28,15 +28,15 @@
 def test_scale(typ, dtype):
     # test AnnData arguments
     # test scaling with default zero_center == True
-    adata0 = AnnData(typ(X), dtype=dtype)
+    adata0 = AnnData(typ(X).astype(dtype))
     sc.pp.scale(adata0)
     assert np.allclose(csr_matrix(adata0.X).toarray(), X_centered)
     # test scaling with explicit zero_center == True
-    adata1 = AnnData(typ(X), dtype=dtype)
+    adata1 = AnnData(typ(X).astype(dtype))
     sc.pp.scale(adata1, zero_center=True)
     assert np.allclose(csr_matrix(adata1.X).toarray(), X_centered)
     # test scaling with explicit zero_center == False
-    adata2 = AnnData(typ(X), dtype=dtype)
+    adata2 = AnnData(typ(X).astype(dtype))
     sc.pp.scale(adata2, zero_center=False)
     assert np.allclose(csr_matrix(adata2.X).toarray(), X_scaled)
     # test bare count arguments, for simplicity only with explicit copy=True