diff --git a/src/methods_segmentation/scs_segmentation/config.vsh.yaml b/src/methods_segmentation/scs_segmentation/config.vsh.yaml new file mode 100644 index 00000000..4249b7fe --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/config.vsh.yaml @@ -0,0 +1,48 @@ +name: scs_segmentation +label: "SCS Segmentation" +summary: "Output of the segmantation methot SCS" +description: "Output of the segmantation methot SCS" +links: + documentation: "https://github.com/openproblems-bio/task_ist_preprocessing" + repository: "https://github.com/openproblems-bio/task_ist_preprocessing" +references: + doi: "10.1038/s41592-023-01939-3" + +__merge__: /src/api/comp_method_segmentation.yaml + +arguments: + - name: --env_scs + type: string + default: "scs.yaml" + + +resources: + - type: bash_script + path: orchestrator.sh + - type: python_script + path: input.py + - type: file + path: scs.yaml + dest: environment.yaml + - type: python_script + path: script.py + - type: python_script + path: output.py + +engines: + - type: docker + image: openproblems/base_python:1 + setup: + - type: python + pypi: spatialdata + - type: apt + packages: ["git", "curl", "libglib2.0-0", "libgl1"] + __merge__: + - /src/base/setup_txsim_partial.yaml + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, lowcpu, highmem ] \ No newline at end of file diff --git a/src/methods_segmentation/scs_segmentation/input.py b/src/methods_segmentation/scs_segmentation/input.py new file mode 100644 index 00000000..978bd180 --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/input.py @@ -0,0 +1,45 @@ +import spatialdata as sd +from tifffile import imwrite +import sys +import numpy as np +import pandas as pd + +input_path = sys.argv[1] +output_path_tif = sys.argv[3] +output_path_tsv = sys.argv[2] + +sdata = sd.read_zarr(input_path) + +transcripts_coord_systems = sd.transformations.get_transformation(sdata['transcripts'], get_all=True).keys() + +image_coord_systems = sd.transformations.get_transformation(sdata["morphology_mip"], get_all=True).keys() + +print('Transforming transcripts coordinates', flush=True) +transcripts = sd.transform(sdata['transcripts'], to_coordinate_system='global') + +image = sdata['morphology_mip']['scale0'].image.compute().to_numpy() +transformation = sdata['morphology_mip']['scale0'].image.transform.copy() + +mip2d = image[0] +imwrite( + output_path_tif, # .tif or .ome.tif + mip2d, + dtype=mip2d.dtype, # keeps original bit-depth, e.g. uint8 + compression="zlib") + + +transcripts_df = transcripts.compute().loc[:,['x', 'y', 'feature_name']] +transcripts_df['x_int'] = transcripts_df.x.astype(int) +transcripts_df['y_int'] = transcripts_df.y.astype(int) + +print("Counting transcripts per (gene,x,y)") +agg = ( + transcripts_df.groupby(["feature_name", "x_int", "y_int"], sort=False, observed=True) + .size() + .reset_index(name="MIDCounts") + .rename(columns={"feature_name": "geneID", "x_int": "x", "y_int": "y"}) +) +agg['x'] = agg.x - np.min(agg.x) +agg['y'] = agg.y - np.min(agg.y) + +agg[agg.MIDCounts!=0].to_csv(output_path_tsv, sep = "\t", index=False) \ No newline at end of file diff --git a/src/methods_segmentation/scs_segmentation/orchestrator.sh b/src/methods_segmentation/scs_segmentation/orchestrator.sh new file mode 100644 index 00000000..860c00b3 --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/orchestrator.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +## VIASH START +par_input= "../task_ist_preprocessing/resources_test/common/2023_10x_mouse_brain_xenium/dataset.zarr" +par_output= "segmentation.zarr" +meta_resources_dir="../task_ist_preprocessing/src/methods_segmentation/scs" +## VIASH END + +par_intermediate_dir=$(mktemp -d -p "$(pwd)" tmp-processing-XXXXXXXX) +echo "meta dir: $meta_resources_dir" + +# Access the YAML file +CONDA_ENV_FILE="$meta_resources_dir/environment.yaml" + + +echo "running SCS orchestrator" + +# Create intermediate directory +mkdir -p "$par_intermediate_dir" + +which python +# Step 1: Run Python script to reformat input in the first Python environment + python "input.py" \ + "$par_input" "$par_intermediate_dir/temp.tsv" "$par_intermediate_dir/temp.tif" + +export CONDA_DIR=/opt/conda +wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda +# Put conda in path so we can use conda activate +export PATH=$CONDA_DIR/bin:$PATH + +# Create and activate the second Python environment +# Initialize conda for bash +eval "$(/opt/conda/bin/conda shell.bash hook)" +conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main +conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r + +conda env create -f $CONDA_ENV_FILE + +conda activate scs_tf + +pip install spateo-release +pip install scanpy==1.9.3 +pip install tensorflow_addons==0.18.0 +pip install --user scikit-misc==0.3.1 +pip install --user numpy==1.23.5 + +#clone SCS repo +git clone https://github.com/chenhcs/SCS.git + + +mv script.py SCS/ +cd SCS + +# Step 2: Run SCS in the second Python environment +python "script.py" \ + "$par_intermediate_dir/temp.tsv" "$par_intermediate_dir/temp.tif" + +conda deactivate + +which python ## testing the versions are ok again + +cd ../ +# Step 3: Run output reformatting in the first Python environment +/usr/local/bin/python "output.py" \ +"$par_input" "SCS/results/spot2cell_0:0:0:0.txt" "$par_output" diff --git a/src/methods_segmentation/scs_segmentation/output.py b/src/methods_segmentation/scs_segmentation/output.py new file mode 100644 index 00000000..eb968cd5 --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/output.py @@ -0,0 +1,63 @@ +import spatialdata as sd +from tifffile import imwrite +import sys +import numpy as np +import pandas as pd +import xarray as xr +from scipy.sparse import coo_matrix +from pathlib import Path +import os + +def convert_to_lower_dtype(arr): + max_val = arr.max() + if max_val <= np.iinfo(np.uint8).max: + new_dtype = np.uint8 + elif max_val <= np.iinfo(np.uint16).max: + new_dtype = np.uint16 + elif max_val <= np.iinfo(np.uint32).max: + new_dtype = np.uint32 + else: + new_dtype = np.uint64 + + return arr.astype(new_dtype) + +input_path = sys.argv[1] +input_cellspot = sys.argv[2] +output_path = sys.argv[3] + +print(input_path) + +sdata = sd.read_zarr(input_path) +sd_output = sd.SpatialData() + +transformation = sdata['morphology_mip']['scale0'].image.transform.copy() + +scs_output = pd.read_csv(input_cellspot, sep = "\t", header = None) +scs_output.rename(columns = {0: 'spot', 1: 'cell'}, inplace = True) +scs_output[['x', 'y']] = scs_output.spot.str.split(':', n=1, expand=True) + +scs_output.x = scs_output.x.astype(int) +scs_output.y = scs_output.y.astype(int) + +scs_output.cell = scs_output.cell.astype(int) + +##converting back to image-like format +sparse_matrix = coo_matrix( + (scs_output['cell'], (scs_output['x'], scs_output['y'])), + shape=(scs_output.x.max() + 1, scs_output.y.max() + 1) + ) +labels = sparse_matrix.toarray() + +## all non-segmented as -1 +labels = np.where(labels == 0, -1, labels) +labels = convert_to_lower_dtype(labels) + +labels_array =xr.DataArray(labels, name=f'segmentation', dims=('y', 'x')) +parsed_labels = sd.models.Labels2DModel.parse(labels_array, transformations=transformation) +sd_output.labels['segmentation'] = parsed_labels + +print("Writing output", flush=True) +Path(output_path).parent.mkdir(parents=True, exist_ok=True) +if os.path.exists(output_path): + shutil.rmtree(output_path) +sd_output.write(output_path) \ No newline at end of file diff --git a/src/methods_segmentation/scs_segmentation/script.py b/src/methods_segmentation/scs_segmentation/script.py new file mode 100644 index 00000000..ecd43676 --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/script.py @@ -0,0 +1,9 @@ +import sys +from src import scs + +bin_file = sys.argv[1] +image_file = sys.argv[2] + +print(bin_file, flush=True) + +scs.segment_cells(bin_file, image_file, align='rigid') diff --git a/src/methods_segmentation/scs_segmentation/scs.yaml b/src/methods_segmentation/scs_segmentation/scs.yaml new file mode 100644 index 00000000..d6866255 --- /dev/null +++ b/src/methods_segmentation/scs_segmentation/scs.yaml @@ -0,0 +1,25 @@ +name: scs_tf +channels: + - anaconda + - conda-forge + - defaults +dependencies: + - python=3.9.7 + - tensorflow=2.8.1 + - mamba + - readline=8.2 + - ncurses=6.4 + - zlib=1.2.13 + - ca-certificates=2025.7.15 + - sqlite=3.41.1 + - tzdata=2022g + - wheel=0.38.4 + - openssl=1.1.1w + - certifi=2025.8.3 + - xz=5.2.10 + - tk=8.6.12 + - pip=23.0.1 + - setuptools=65.6.3 + - libffi=3.3 + - ipykernel +prefix: /opt/miniconda3/envs/scs_tf