Skip to content

Commit de4132b

Browse files
committed
read_tenx_visium executes successfully
1 parent bee220c commit de4132b

File tree

18 files changed

+1106
-33
lines changed

18 files changed

+1106
-33
lines changed

src/spatialexperiment/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
del version, PackageNotFoundError
1717

1818
from .ProxySFE import ProxySpatialFeatureExperiment
19+
from .io import read_tenx_visium
1920
from .SpatialExperiment import SpatialExperiment
2021
from .SpatialImage import (
2122
LoadedSpatialImage,
@@ -24,3 +25,15 @@
2425
VirtualSpatialImage,
2526
construct_spatial_image_class,
2627
)
28+
29+
__all__ = [
30+
"ProxySpatialFeatureExperiment",
31+
"read_tenx_visium",
32+
"SpatialExperiment",
33+
"LoadedSpatialImage",
34+
"RemoteSpatialImage",
35+
"StoredSpatialImage",
36+
"VirtualSpatialImage",
37+
"construct_spatial_image_class",
38+
]
39+

src/spatialexperiment/_imgutils.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from typing import Union
22

33
import os
4+
from io import BytesIO
5+
import requests
6+
from PIL import Image
47
from biocframe import BiocFrame
58
from .SpatialImage import construct_spatial_image_class
69

@@ -9,24 +12,65 @@
912
__license__ = "MIT"
1013

1114

15+
16+
def read_image(input_image):
17+
"""Read image from PIL Image, file path, or URL.
18+
19+
Args:
20+
input_image: PIL Image, string path to local file, or URL string.
21+
22+
Returns:
23+
The loaded image.
24+
25+
Raises:
26+
TypeError: If input is not PIL Image, path string, or URL string.
27+
"""
28+
if isinstance(input_image, Image.Image):
29+
return input_image
30+
31+
if isinstance(input_image, str):
32+
if input_image.startswith(('http://', 'https://')):
33+
response = requests.get(input_image)
34+
return Image.open(BytesIO(response.content))
35+
else:
36+
return Image.open(input_image)
37+
38+
raise TypeError(f"Expected PIL Image, path, or URL. Got {type(input_image)}")
39+
40+
1241
def get_img_data(
1342
img: Union[str, os.PathLike],
1443
scale_factor: str,
1544
sample_id: str,
1645
image_id: str,
46+
load: bool = True
1747
) -> BiocFrame:
1848
"""
1949
Construct an image data dataframe.
2050
2151
Args:
22-
img: A path or url to the image file.
23-
scale_factor (str): The scale factor associated with the image.
24-
sample_id (str): A unique identifier for the sample to which the image belongs.
25-
image_id (str): A unique identifier for the image itself.
52+
img:
53+
A path or url to the image file.
54+
55+
scale_factor:
56+
The scale factor associated with the image.
57+
58+
sample_id:
59+
A unique identifier for the sample to which the image belongs.
60+
61+
image_id:
62+
A unique identifier for the image itself.
63+
64+
load:
65+
A boolean specifying whether the image(s) should be loaded into memory? If False, will store the path/URL instead.
66+
Defaults to `True`.
2667
2768
Returns:
2869
The image data.
2970
"""
71+
if load:
72+
img = read_image(img)
73+
3074
spi = construct_spatial_image_class(img)
3175
return BiocFrame(
3276
{
@@ -47,7 +91,9 @@ def retrieve_rows_by_id(
4791
Retrieve rows from `img_data` based on specified `sample_id` and `image_id`.
4892
4993
Args:
50-
img_data: The data from which to retrieve rows.
94+
img_data:
95+
The data from which to retrieve rows.
96+
5197
sample_id:
5298
- `sample_id=True`: Matches all samples.
5399
- `sample_id=None`: Matches the first sample.

src/spatialexperiment/_initutils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from biocframe import BiocFrame
55
from PIL import Image
6-
from SpatialImage import SpatialImage
6+
from .SpatialImage import construct_spatial_image_class
77
from summarizedexperiment._frameutils import _sanitize_frame
88

99
__author__ = "keviny2"
@@ -96,7 +96,7 @@ def construct_img_data(
9696
spis = []
9797
for image_source in image_sources:
9898
result = Image.open(image_source) if load_image else image_source
99-
spi = SpatialImage(result)
99+
spi = construct_spatial_image_class(result)
100100
spis.append(spi)
101101

102102
img_data = {
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .tenx_visium import read_tenx_visium
2+
3+
__all__ = ["read_tenx_visium"]
4+
Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import pandas as pd
88

99
from biocframe import BiocFrame
10-
from biocutils import is_list_of_type
10+
import biocutils as ut
1111
from singlecellexperiment import read_tenx_mtx
12-
from spatialexperiment import SpatialExperiment
13-
from ._imgutils import get_img_data
14-
from ._initutils import construct_spatial_coords_from_names
12+
from ..SpatialExperiment import SpatialExperiment
13+
from .._imgutils import get_img_data
14+
from .._initutils import construct_spatial_coords_from_names
1515

1616

17-
def read_tissue_positions(tissue_positions_paths):
17+
def read_tissue_positions(tissue_positions_path):
1818
column_names = [
1919
"barcode",
2020
"in_tissue",
@@ -24,30 +24,23 @@ def read_tissue_positions(tissue_positions_paths):
2424
"pxl_col_in_fullres",
2525
]
2626

27-
tissue_positions_combined = []
28-
for i, tissue_positions_path in enumerate(tissue_positions_paths):
29-
has_header = "list" not in os.path.basename(tissue_positions_path)
27+
has_header = "list" not in os.path.basename(tissue_positions_path)
3028

31-
tissue_positions = pd.read_csv(
32-
tissue_positions_path, header=0 if has_header else None, index_col=0
33-
)
34-
tissue_positions.columns = column_names
35-
36-
if len(tissue_positions_paths) > 1:
37-
tissue_positions.index = [f"{i+1}_{idx}" for idx in tissue_positions.index]
38-
39-
tissue_positions["in_tissue"] = tissue_positions["in_tissue"].astype(bool)
40-
tissue_positions_combined.append(tissue_positions)
29+
tissue_positions = pd.read_csv(
30+
tissue_positions_path, header=0 if has_header else None, names=column_names
31+
)
32+
tissue_positions = tissue_positions.set_index("barcode")
33+
tissue_positions["in_tissue"] = tissue_positions["in_tissue"].astype(bool)
4134

42-
tissue_positions_combined = pd.concat(tissue_positions_combined)
43-
return tissue_positions_combined
35+
return tissue_positions
4436

4537

4638
def read_img_data(
4739
path: str = ".",
4840
sample_ids: Optional[List[str]] = None,
4941
image_sources: Optional[List[str]] = None,
5042
scale_factors: str = None,
43+
load: bool = True
5144
) -> BiocFrame:
5245
"""Read in images and scale factors for 10x Genomics Visium data, and return as a valid `img_data` object.
5346
@@ -63,6 +56,10 @@ def read_img_data(
6356
6457
scale_factors:
6558
The .json file where to find the scale factors.
59+
60+
load:
61+
A boolean specifying whether the image(s) should be loaded into memory? If False, will store the path/URL instead.
62+
Defaults to `True`.
6663
"""
6764
# get sample identifiers
6865
if sample_ids is None:
@@ -120,17 +117,18 @@ def read_img_data(
120117
scale_factor=scale_factor,
121118
sample_id=sample_id,
122119
image_id=image_id,
120+
load=load
123121
)
124122
img_data = img_data.combine_rows(curr_image_data)
125123

126124
return img_data
127125

128126

129-
def read_10x_visium(
127+
def read_tenx_visium(
130128
samples: List[Union[str, os.PathLike]],
131129
sample_ids: Optional[List[str]] = None,
132130
type: str = "HDF5",
133-
data: str = ["filtered", "raw"],
131+
data: str = "filtered",
134132
images: List[str] = "lowres",
135133
load: bool = True,
136134
):
@@ -154,6 +152,7 @@ def read_10x_visium(
154152
155153
load:
156154
A boolean specifying whether the image(s) should be loaded into memory? If False, will store the path/URL instead.
155+
Defaults to `True`.
157156
"""
158157
# check validity of input arguments
159158
allowed_types = ["HDF5", "sparse", "auto", "prefix"]
@@ -177,7 +176,7 @@ def read_10x_visium(
177176

178177
if sample_ids is None:
179178
sample_ids = [f"sample{str(i).zfill(2)}" for i in range(1, len(samples) + 1)]
180-
elif not is_list_of_type(sample_ids, str) and len(set(sample_ids)) != len(samples):
179+
elif not ut.is_list_of_type(sample_ids, str) and len(set(sample_ids)) != len(samples):
181180
raise ValueError(
182181
"`sample_ids` should contain as many unique values as `samples`."
183182
)
@@ -261,13 +260,18 @@ def read_10x_visium(
261260
sample_ids=sample_ids,
262261
image_sources=image_file_paths,
263262
scale_factors=scale_factors_paths,
263+
load=load
264264
)
265265

266+
spes = []
266267
for i, counts_dir_path in enumerate(counts_dir_paths):
267268
sce = read_tenx_mtx(counts_dir_path)
268269
tissue_positions = read_tissue_positions(tissue_positions_paths[i])
269270

270-
obs = set(sce.col_names).intersection(set(tissue_positions.index))
271+
barcodes = sce.column_data["barcode"]
272+
sce = sce.set_column_names(barcodes)
273+
274+
obs = list(set(sce.col_names).intersection(set(tissue_positions.index)))
271275
sce = sce[:, obs]
272276

273277
tissue_positions = tissue_positions.loc[obs, :]
@@ -279,9 +283,17 @@ def read_10x_visium(
279283

280284
spe = SpatialExperiment(
281285
assays=sce.assays,
282-
row_data=sce.row_data["Symbol"],
286+
row_data=BiocFrame(
287+
{
288+
"symbol": sce.row_data["gene_symbols"]
289+
}
290+
),
283291
column_data=column_data,
284292
spatial_coords=spatial_coords
285293
)
294+
spes.append(spe)
295+
296+
spe_combined = ut.combine_columns(*spes)
297+
spe_combined.img_data = image
286298

287-
# TODO: implement combine ops on SpatialExperiment objects
299+
return spe_combined
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
AAACAACGAATAGTTC-1
2+
AAACAAGTATCTCCCA-1
3+
AAACAATCTACTAGCA-1
4+
AAACACCAATAACTGC-1
5+
AAACAGAGCGACTCCT-1
6+
AAACAGCTTTCAGAAG-1
7+
AAACAGGGTCTATATT-1
8+
AAACAGTGTTCCTGGG-1
9+
AAACATGGTGAGAGGA-1
10+
AAACATTTCCCGGATT-1
11+
AAACCACTACACAGAT-1
12+
AAACCCGAACGAAATC-1
13+
AAACCGGAAATGTTAA-1
14+
AAACCGGGTAGGTACC-1
15+
AAACCGTTCGTCCAGG-1
16+
AAACCTAAGCAGCCGG-1
17+
AAACCTCATGAAGTTG-1
18+
AAACGAAGAACATACC-1
19+
AAACGAAGATGGAGTA-1
20+
AAACGACAGTCTTGCC-1
21+
AAACGAGACGGTTGAT-1
22+
AAACGCCCGAGATCGG-1
23+
AAACGCTGGGCACGAC-1
24+
AAACGGGCGTACGGGT-1
25+
AAACGGGTTGGTATCC-1
26+
AAACGGTTGCGAACTG-1
27+
AAACGTGTTCGCCCTA-1
28+
AAACTAACGTGGCGAC-1
29+
AAACTCGGTTCGCAAT-1
30+
AAACTCGTGATATAAG-1
31+
AAACTGCTGGCTCCAA-1
32+
AAACTTAATTGCACGC-1
33+
AAACTTGCAAACGTAT-1
34+
AAAGAATGACCTTAGA-1
35+
AAAGAATGTGGACTAA-1
36+
AAAGACATGAAGTTTA-1
37+
AAAGACCCAAGTCGCG-1
38+
AAAGACTGGGCGCTTT-1
39+
AAAGCTTGCCTACATA-1
40+
AAAGGCCCTATAATAC-1
41+
AAAGGCTACGGACCAT-1
42+
AAAGGCTCTCGCGCCG-1
43+
AAAGGGATGTAGCAAG-1
44+
AAAGGGCAGCTTGAAT-1
45+
AAAGGTAAGCTGTACC-1
46+
AAAGGTCAACGACATG-1
47+
AAAGTAGCATTGCTCA-1
48+
AAAGTCACTGATGTAA-1
49+
AAAGTCGACCCTCAGT-1
50+
AAAGTGCCATCAATTA-1
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
ENSMUSG00000051951 Xkr4 Gene Expression
2+
ENSMUSG00000089699 Gm1992 Gene Expression
3+
ENSMUSG00000102343 Gm37381 Gene Expression
4+
ENSMUSG00000025900 Rp1 Gene Expression
5+
ENSMUSG00000025902 Sox17 Gene Expression
6+
ENSMUSG00000104328 Gm37323 Gene Expression
7+
ENSMUSG00000033845 Mrpl15 Gene Expression
8+
ENSMUSG00000025903 Lypla1 Gene Expression
9+
ENSMUSG00000104217 Gm37988 Gene Expression
10+
ENSMUSG00000033813 Tcea1 Gene Expression
11+
ENSMUSG00000002459 Rgs20 Gene Expression
12+
ENSMUSG00000085623 Gm16041 Gene Expression
13+
ENSMUSG00000033793 Atp6v1h Gene Expression
14+
ENSMUSG00000025905 Oprk1 Gene Expression
15+
ENSMUSG00000033774 Npbwr1 Gene Expression
16+
ENSMUSG00000025907 Rb1cc1 Gene Expression
17+
ENSMUSG00000090031 4732440D04Rik Gene Expression
18+
ENSMUSG00000087247 Alkal1 Gene Expression
19+
ENSMUSG00000033740 St18 Gene Expression
20+
ENSMUSG00000051285 Pcmtd1 Gene Expression
21+
ENSMUSG00000097797 Gm26901 Gene Expression
22+
ENSMUSG00000103067 Gm30414 Gene Expression
23+
ENSMUSG00000025909 Sntg1 Gene Expression
24+
ENSMUSG00000061024 Rrs1 Gene Expression
25+
ENSMUSG00000025911 Adhfe1 Gene Expression
26+
ENSMUSG00000067879 3110035E14Rik Gene Expression
27+
ENSMUSG00000099827 Gm29520 Gene Expression
28+
ENSMUSG00000025912 Mybl1 Gene Expression
29+
ENSMUSG00000045210 Vcpip1 Gene Expression
30+
ENSMUSG00000097893 1700034P13Rik Gene Expression
31+
ENSMUSG00000025915 Sgk3 Gene Expression
32+
ENSMUSG00000046101 Mcmdc2 Gene Expression
33+
ENSMUSG00000098234 Snhg6 Gene Expression
34+
ENSMUSG00000099032 Tcf24 Gene Expression
35+
ENSMUSG00000025916 Ppp1r42 Gene Expression
36+
ENSMUSG00000087199 Gm15818 Gene Expression
37+
ENSMUSG00000025917 Cops5 Gene Expression
38+
ENSMUSG00000056763 Cspp1 Gene Expression
39+
ENSMUSG00000067851 Arfgef1 Gene Expression
40+
ENSMUSG00000042501 Cpa6 Gene Expression
41+
ENSMUSG00000048960 Prex2 Gene Expression
42+
ENSMUSG00000057715 A830018L16Rik Gene Expression
43+
ENSMUSG00000097171 Gm17644 Gene Expression
44+
ENSMUSG00000101314 Gm29663 Gene Expression
45+
ENSMUSG00000016918 Sulf1 Gene Expression
46+
ENSMUSG00000025938 Slco5a1 Gene Expression
47+
ENSMUSG00000099498 Gm29283 Gene Expression
48+
ENSMUSG00000042414 Prdm14 Gene Expression
49+
ENSMUSG00000005886 Ncoa2 Gene Expression
50+
ENSMUSG00000101476 Gm29570 Gene Expression

0 commit comments

Comments
 (0)