Skip to content

Commit f7be6f2

Browse files
committed
add small epsilon to harmonypy to fix kmeans bug
1 parent d36e9c1 commit f7be6f2

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/methods/harmonypy/script.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44

55
## VIASH START
66
par = {
7-
"input": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/censored_split2.h5ad",
8-
"output": "resources_test/task_cyto_batch_integration/mouse_spleen_flow_cytometry_subset/output_harmony_split2.h5ad",
7+
"input": "/Users/putri.g/Documents/cytobenchmark/debug_general/_viash_par/input_1/censored_split1.h5ad",
8+
"output": "/Users/putri.g/Documents/cytobenchmark/debug_general/_viash_par/output_1/output_harmony_split1.h5ad",
99
}
1010
meta = {"name": "harmonypy"}
1111
## VIASH END
1212

1313
print("Reading and preparing input files", flush=True)
1414
adata = ad.read_h5ad(par["input"])
1515

16+
# harmony can't handle integer batch labels
1617
adata.obs["batch_str"] = adata.obs["batch"].astype(str)
1718

1819
markers_to_correct = adata.var[adata.var["to_correct"]].index.to_numpy()
@@ -21,10 +22,13 @@
2122
adata_to_correct = adata[:, markers_to_correct].copy()
2223

2324
print("Run harmony", flush=True)
24-
# harmony can't handle integer batch labels
25+
26+
# TODO numerical instability in kmeans causing problem with harmony.
27+
# so adding a very small value to all entries to make sure there are no zeros
28+
epsilon = 1e-20
2529

2630
out = harmonypy.run_harmony(
27-
data_mat=adata_to_correct.layers["preprocessed"],
31+
data_mat=adata_to_correct.layers["preprocessed"] + epsilon,
2832
meta_data=adata_to_correct.obs,
2933
vars_use="batch_str",
3034
)

0 commit comments

Comments
 (0)