Skip to content

Commit 75ae100

Browse files
authored
Use new scib package (#13)
* update scib package name * fix batch variable check between integrated and nonintegraed adata * update environments to correct scib versions * using pip version of scib * fix R import issue for Harmony (as mentioned in immunogenomics/harmony#134) * use value counts to check for batch relabeling after integration
1 parent 2b1c055 commit 75ae100

File tree

15 files changed

+258
-232
lines changed

15 files changed

+258
-232
lines changed

data/generate_data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
import scib
44
import warnings
5+
56
warnings.simplefilter(action='ignore', category=FutureWarning)
67

78

@@ -39,7 +40,7 @@ def get_adata_pbmc():
3940
"""
4041
Code from https://scanpy-tutorials.readthedocs.io/en/latest/integrating-data-using-ingest.html
4142
"""
42-
#adata_ref = sc.datasets.pbmc3k_processed()
43+
# adata_ref = sc.datasets.pbmc3k_processed()
4344
# quick fix for broken dataset paths, should be removed with scanpy>=1.6.0
4445
adata_ref = sc.read(
4546
"pbmc3k_processed.h5ad",

envs/scIB-python-paper.yml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,32 @@ channels:
33
- conda-forge
44
- bioconda
55
dependencies:
6-
- python==3.7
7-
- numpy==1.18.1
6+
- python=3.7
7+
- numpy=1.18.1
88
- pandas
99
- seaborn
1010
- matplotlib
11-
- scanpy==1.4.6
12-
- anndata==0.7.1
11+
- scanpy=1.4.6
12+
- anndata=0.7.1
1313
- h5py<3
1414
- scipy
1515
- memory_profiler
16-
- rpy2==3.1.0
16+
- rpy2=3.1.0
1717
- r-stringi
18-
- anndata2ri==1.0.2
19-
- bbknn==1.3.9
18+
- anndata2ri=1.0.2
19+
- bbknn=1.3.9
2020
- libgcc-ng
2121
- gsl
2222
- scikit-learn
2323
- networkx
2424
- r-base
2525
- r-devtools
26-
- r-seurat==3.1.1
26+
- r-seurat=3.1.1
2727
- bioconductor-scater
2828
- bioconductor-scran
2929
- pip
3030
- numba<=0.46
3131
- llvmlite
32-
- tensorflow==1.15
3332
- gxx_linux-64
3433
- gxx_impl_linux-64
3534
- gcc_linux-64
@@ -39,8 +38,8 @@ dependencies:
3938
- igraph
4039
- openblas
4140
- r-essentials
42-
- r-globals==0.12.5
43-
- r-listenv==0.8.0
41+
- r-globals=0.12.5
42+
- r-listenv=0.8.0
4443
- r-rlang
4544
- r-ellipsis
4645
- r-evaluate
@@ -52,12 +51,13 @@ dependencies:
5251
- r-testthat
5352
- r-vctrs
5453
- xlrd
55-
- umap-learn==0.3.10
56-
- louvain==0.6.1
57-
- scvi==0.6.7
58-
- scanorama==1.7.0
54+
- umap-learn=0.3.10
55+
- louvain=0.6.1
56+
- scvi=0.6.7
57+
- scanorama=1.7.0
5958
- pip:
60-
- git+git://github.com/theislab/scib.git
59+
- git+git://github.com/theislab/[email protected]
60+
- tensorflow==1.15
6161
#- trvae==1.1.2
6262
- trvaep==0.1.0
6363
- mnnpy==0.1.9.5

envs/scib-pipeline.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,21 @@ dependencies:
1414
- openblas
1515
- llvmlite
1616
- libgcc-ng
17+
- numba<=0.46 # for mnnpy
18+
- anndata2ri
1719
- r-base
1820
- r-essentials
1921
- r-devtools
2022
- r-stringi
2123
- bioconductor-scater
2224
- bioconductor-scran
2325
# Methods
24-
- scvi==0.6.7
25-
- scanorama==1.7.0
26-
- bbknn==1.3.9
27-
- r-seurat==3.1.1
28-
- numba<=0.46 # for mnnpy
29-
- anndata2ri==1.0.5 # 1.0.6 has issues with HDF5 conversion
26+
- scvi=0.6.7
27+
- scanorama=1.7.0
28+
- bbknn=1.3.9
29+
- r-seurat=3.1.1
3030
- pip:
31-
- git+git://github.com/theislab/scib.git
31+
- scib==1.0.0
3232
- trvaep==0.1.0
3333
- mnnpy==0.1.9.5
3434
- scgen==1.1.5

scripts/integration/runIntegration.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
# coding: utf-8
33

44
import scanpy as sc
5-
import scIB
5+
import scib
66
import warnings
7+
78
warnings.filterwarnings('ignore')
89

910

@@ -16,16 +17,15 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
1617
"""
1718

1819
adata = sc.read(inPath)
19-
20+
2021
if timing:
2122
if celltype is not None:
22-
integrated_tmp = scIB.metrics.measureTM(method, adata, batch, celltype)
23+
integrated_tmp = scib.metrics.measureTM(method, adata, batch, celltype)
2324
else:
24-
integrated_tmp = scIB.metrics.measureTM(method, adata, batch)
25+
integrated_tmp = scib.metrics.measureTM(method, adata, batch)
2526

2627
integrated = integrated_tmp[2][0]
2728

28-
2929
integrated.uns['mem'] = integrated_tmp[0]
3030
integrated.uns['runtime'] = integrated_tmp[1]
3131

@@ -34,10 +34,11 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
3434
integrated = method(adata, batch, celltype)
3535
else:
3636
integrated = method(adata, batch)
37-
37+
3838
sc.write(outPath, integrated)
3939

40-
if __name__=='__main__':
40+
41+
if __name__ == '__main__':
4142
import argparse
4243

4344
parser = argparse.ArgumentParser(description='Run the integration methods')
@@ -59,22 +60,22 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
5960
celltype = args.celltype
6061
method = args.method
6162
methods = {
62-
'scanorama': scIB.integration.runScanorama,
63-
'trvae': scIB.integration.runTrVae,
64-
'trvaep': scIB.integration.runTrVaep,
65-
'scgen': scIB.integration.runScGen,
66-
'mnn': scIB.integration.runMNN,
67-
'bbknn': scIB.integration.runBBKNN,
68-
'scvi': scIB.integration.runScvi,
69-
'scanvi': scIB.integration.runScanvi,
70-
'combat': scIB.integration.runCombat,
71-
'saucie': scIB.integration.runSaucie,
72-
'desc': scIB.integration.runDESC
63+
'scanorama': scib.integration.scanorama,
64+
'trvae': scib.integration.trvae,
65+
'trvaep': scib.integration.trvaep,
66+
'scgen': scib.integration.scgen,
67+
'mnn': scib.integration.mnn,
68+
'bbknn': scib.integration.bbknn,
69+
'scvi': scib.integration.scvi,
70+
'scanvi': scib.integration.scanvi,
71+
'combat': scib.integration.combat,
72+
'saucie': scib.integration.saucie,
73+
'desc': scib.integration.desc
7374
}
74-
75+
7576
if method not in methods.keys():
7677
raise ValueError(f'Method "{method}" does not exist. Please use one of '
7778
f'the following:\n{list(methods.keys())}')
78-
79-
run= methods[method]
79+
80+
run = methods[method]
8081
runIntegration(file, out, run, hvg, batch, celltype)

scripts/integration/runMethods.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ getScriptPath <- function(){
1111
setwd(getScriptPath())
1212

1313
library('optparse')
14+
library(rlang)
1415
require(Seurat)
1516

1617
option_list <- list(make_option(c("-m", "--method"), type="character", default=NA, help="integration method to use"),

scripts/integration/runPost.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/usr/bin/env python
22
# coding: utf-8
33

4-
import scanpy as sc
5-
import scIB
4+
import scib
65
import warnings
6+
77
warnings.filterwarnings('ignore')
88

99

@@ -15,14 +15,14 @@ def runPost(inPath, outPath, conos):
1515
conos: set if input is conos obect
1616
"""
1717
if conos:
18-
adata = scIB.pp.readConos(inPath)
18+
adata = scib.pp.read_conos(inPath)
1919
else:
20-
adata = scIB.pp.readSeurat(inPath)
20+
adata = scib.pp.read_seurat(inPath)
2121

2222
adata.write(outPath)
2323

2424

25-
if __name__=='__main__':
25+
if __name__ == '__main__':
2626
import argparse
2727

2828
parser = argparse.ArgumentParser(description='Run the integration methods')
@@ -35,5 +35,5 @@ def runPost(inPath, outPath, conos):
3535
file = args.input_file
3636
out = args.output_file
3737
conos = args.conos
38-
38+
3939
runPost(file, out, conos)

scripts/integration_fail_file.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
21
from snakemake.io import load_configfile
32
from pathlib import Path
43

5-
if __name__=='__main__':
4+
if __name__ == '__main__':
65
import argparse
76

87
parser = argparse.ArgumentParser(description='Create an empty output file for failed integration runs')
@@ -25,31 +24,30 @@
2524

2625
# Check inputs
2726
if method not in params['METHODS']:
28-
raise ValueError(f'{method} is not a valid method.\n'
27+
raise ValueError(f'{method} is not a valid method.\n'
2928
f'Please choose one of: {list(params["METHODS"].keys())}')
3029

3130
if task not in params['DATA_SCENARIOS']:
32-
raise ValueError(f'{task} is not a valid integration task.\n'
31+
raise ValueError(f'{task} is not a valid integration task.\n'
3332
f'Please choose one of: {list(params["DATA_SCENARIOS"].keys())}')
34-
33+
3534
# Get path values
3635
folder = params['ROOT']
3736
t_folder = task
3837
s_folder = 'scaled' if scale else 'unscaled'
3938
h_folder = 'hvg' if hvgs else 'full_feature'
4039
r_folder = 'R/' if 'R' in params['METHODS'][method] else ''
41-
filename = method+'.h5ad'
40+
filename = method + '.h5ad'
4241

43-
folder_path = '/'.join([folder,task,'integration',s_folder,h_folder])+'/'+r_folder
44-
full_path = folder_path+filename
42+
folder_path = '/'.join([folder, task, 'integration', s_folder, h_folder]) + '/' + r_folder
43+
full_path = folder_path + filename
4544

4645
if 'R' in params['METHODS'][method]:
47-
filename_r = method+'.RDS'
48-
full_path_r = folder_path+filename_r
46+
filename_r = method + '.RDS'
47+
full_path_r = folder_path + filename_r
4948
Path(full_path_r).touch()
50-
Path(full_path_r+".benchmark").touch()
49+
Path(full_path_r + ".benchmark").touch()
5150

52-
#print(full_path)
51+
# print(full_path)
5352
Path(full_path).touch()
54-
Path(full_path+".benchmark").touch()
55-
53+
Path(full_path + ".benchmark").touch()

scripts/merge_benchmarks.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import argparse
33
import os
44

5-
if __name__=='__main__':
5+
if __name__ == '__main__':
66
"""
77
Merge benchmark output for all scenarios, methods and settings
88
"""
@@ -14,7 +14,6 @@
1414
help='root directory for scIB output')
1515
args = parser.parse_args()
1616

17-
1817
print("Searching for .benchmark files...")
1918
bench_files = []
2019
for path, dirs, files in os.walk(args.root):
@@ -43,4 +42,3 @@
4342
results.to_csv(args.output, index_label='scenario')
4443

4544
print("Done!")
46-

scripts/metrics/merge_metrics.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,32 @@
22
# coding: utf-8
33

44
import pandas as pd
5-
import scIB
65
import warnings
6+
77
warnings.filterwarnings('ignore')
88
import argparse
99
from functools import reduce
1010

11-
if __name__=='__main__':
11+
if __name__ == '__main__':
1212
"""
1313
Merge metrics output for all scenarios, methods and settings
1414
"""
15-
15+
1616
parser = argparse.ArgumentParser(description='Collect all metrics')
1717

1818
parser.add_argument('-i', '--input', nargs='+', required=True, help='input directory')
1919
parser.add_argument('-o', '--output', required=True, help='output file')
20-
parser.add_argument('-r', '--root', required=True,
20+
parser.add_argument('-r', '--root', required=True,
2121
help='root directory for inferring column names from path')
2222
args = parser.parse_args()
23-
24-
23+
2524
res_list = []
2625
for file in args.input:
2726
clean_name = file.replace(args.root, "").replace(".csv", "")
2827
res = pd.read_csv(file, index_col=0)
2928
res.rename(columns={res.columns[0]: clean_name}, inplace=True)
3029
res_list.append(res)
31-
32-
results = reduce(lambda left,right: pd.merge(left, right, left_index=True, right_index=True), res_list)
30+
31+
results = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), res_list)
3332
results = results.T
3433
results.to_csv(args.output)
35-
36-

0 commit comments

Comments
 (0)