Skip to content

Commit

Permalink
Use new scib package (#13)
Browse files Browse the repository at this point in the history
* update scib package name

* fix batch variable check between integrated and nonintegraed adata

* update environments to correct scib versions

* using pip version of scib

* fix R import issue for Harmony (as mentioned in immunogenomics/harmony#134)

* use value counts to check for batch relabeling after integration
  • Loading branch information
mumichae authored Mar 3, 2022
1 parent 2b1c055 commit 75ae100
Show file tree
Hide file tree
Showing 15 changed files with 258 additions and 232 deletions.
3 changes: 2 additions & 1 deletion data/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import scib
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)


Expand Down Expand Up @@ -39,7 +40,7 @@ def get_adata_pbmc():
"""
Code from https://scanpy-tutorials.readthedocs.io/en/latest/integrating-data-using-ingest.html
"""
#adata_ref = sc.datasets.pbmc3k_processed()
# adata_ref = sc.datasets.pbmc3k_processed()
# quick fix for broken dataset paths, should be removed with scanpy>=1.6.0
adata_ref = sc.read(
"pbmc3k_processed.h5ad",
Expand Down
32 changes: 16 additions & 16 deletions envs/scIB-python-paper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,32 @@ channels:
- conda-forge
- bioconda
dependencies:
- python==3.7
- numpy==1.18.1
- python=3.7
- numpy=1.18.1
- pandas
- seaborn
- matplotlib
- scanpy==1.4.6
- anndata==0.7.1
- scanpy=1.4.6
- anndata=0.7.1
- h5py<3
- scipy
- memory_profiler
- rpy2==3.1.0
- rpy2=3.1.0
- r-stringi
- anndata2ri==1.0.2
- bbknn==1.3.9
- anndata2ri=1.0.2
- bbknn=1.3.9
- libgcc-ng
- gsl
- scikit-learn
- networkx
- r-base
- r-devtools
- r-seurat==3.1.1
- r-seurat=3.1.1
- bioconductor-scater
- bioconductor-scran
- pip
- numba<=0.46
- llvmlite
- tensorflow==1.15
- gxx_linux-64
- gxx_impl_linux-64
- gcc_linux-64
Expand All @@ -39,8 +38,8 @@ dependencies:
- igraph
- openblas
- r-essentials
- r-globals==0.12.5
- r-listenv==0.8.0
- r-globals=0.12.5
- r-listenv=0.8.0
- r-rlang
- r-ellipsis
- r-evaluate
Expand All @@ -52,12 +51,13 @@ dependencies:
- r-testthat
- r-vctrs
- xlrd
- umap-learn==0.3.10
- louvain==0.6.1
- scvi==0.6.7
- scanorama==1.7.0
- umap-learn=0.3.10
- louvain=0.6.1
- scvi=0.6.7
- scanorama=1.7.0
- pip:
- git+git://github.com/theislab/scib.git
- git+git://github.com/theislab/[email protected]
- tensorflow==1.15
#- trvae==1.1.2
- trvaep==0.1.0
- mnnpy==0.1.9.5
Expand Down
14 changes: 7 additions & 7 deletions envs/scib-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,21 @@ dependencies:
- openblas
- llvmlite
- libgcc-ng
- numba<=0.46 # for mnnpy
- anndata2ri
- r-base
- r-essentials
- r-devtools
- r-stringi
- bioconductor-scater
- bioconductor-scran
# Methods
- scvi==0.6.7
- scanorama==1.7.0
- bbknn==1.3.9
- r-seurat==3.1.1
- numba<=0.46 # for mnnpy
- anndata2ri==1.0.5 # 1.0.6 has issues with HDF5 conversion
- scvi=0.6.7
- scanorama=1.7.0
- bbknn=1.3.9
- r-seurat=3.1.1
- pip:
- git+git://github.com/theislab/scib.git
- scib==1.0.0
- trvaep==0.1.0
- mnnpy==0.1.9.5
- scgen==1.1.5
Expand Down
43 changes: 22 additions & 21 deletions scripts/integration/runIntegration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
# coding: utf-8

import scanpy as sc
import scIB
import scib
import warnings

warnings.filterwarnings('ignore')


Expand All @@ -16,16 +17,15 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
"""

adata = sc.read(inPath)

if timing:
if celltype is not None:
integrated_tmp = scIB.metrics.measureTM(method, adata, batch, celltype)
integrated_tmp = scib.metrics.measureTM(method, adata, batch, celltype)
else:
integrated_tmp = scIB.metrics.measureTM(method, adata, batch)
integrated_tmp = scib.metrics.measureTM(method, adata, batch)

integrated = integrated_tmp[2][0]


integrated.uns['mem'] = integrated_tmp[0]
integrated.uns['runtime'] = integrated_tmp[1]

Expand All @@ -34,10 +34,11 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
integrated = method(adata, batch, celltype)
else:
integrated = method(adata, batch)

sc.write(outPath, integrated)

if __name__=='__main__':

if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Run the integration methods')
Expand All @@ -59,22 +60,22 @@ def runIntegration(inPath, outPath, method, hvg, batch, celltype=None):
celltype = args.celltype
method = args.method
methods = {
'scanorama': scIB.integration.runScanorama,
'trvae': scIB.integration.runTrVae,
'trvaep': scIB.integration.runTrVaep,
'scgen': scIB.integration.runScGen,
'mnn': scIB.integration.runMNN,
'bbknn': scIB.integration.runBBKNN,
'scvi': scIB.integration.runScvi,
'scanvi': scIB.integration.runScanvi,
'combat': scIB.integration.runCombat,
'saucie': scIB.integration.runSaucie,
'desc': scIB.integration.runDESC
'scanorama': scib.integration.scanorama,
'trvae': scib.integration.trvae,
'trvaep': scib.integration.trvaep,
'scgen': scib.integration.scgen,
'mnn': scib.integration.mnn,
'bbknn': scib.integration.bbknn,
'scvi': scib.integration.scvi,
'scanvi': scib.integration.scanvi,
'combat': scib.integration.combat,
'saucie': scib.integration.saucie,
'desc': scib.integration.desc
}

if method not in methods.keys():
raise ValueError(f'Method "{method}" does not exist. Please use one of '
f'the following:\n{list(methods.keys())}')
run= methods[method]

run = methods[method]
runIntegration(file, out, run, hvg, batch, celltype)
1 change: 1 addition & 0 deletions scripts/integration/runMethods.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ getScriptPath <- function(){
setwd(getScriptPath())

library('optparse')
library(rlang)
require(Seurat)

option_list <- list(make_option(c("-m", "--method"), type="character", default=NA, help="integration method to use"),
Expand Down
12 changes: 6 additions & 6 deletions scripts/integration/runPost.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python
# coding: utf-8

import scanpy as sc
import scIB
import scib
import warnings

warnings.filterwarnings('ignore')


Expand All @@ -15,14 +15,14 @@ def runPost(inPath, outPath, conos):
conos: set if input is conos obect
"""
if conos:
adata = scIB.pp.readConos(inPath)
adata = scib.pp.read_conos(inPath)
else:
adata = scIB.pp.readSeurat(inPath)
adata = scib.pp.read_seurat(inPath)

adata.write(outPath)


if __name__=='__main__':
if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Run the integration methods')
Expand All @@ -35,5 +35,5 @@ def runPost(inPath, outPath, conos):
file = args.input_file
out = args.output_file
conos = args.conos

runPost(file, out, conos)
26 changes: 12 additions & 14 deletions scripts/integration_fail_file.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@

from snakemake.io import load_configfile
from pathlib import Path

if __name__=='__main__':
if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Create an empty output file for failed integration runs')
Expand All @@ -25,31 +24,30 @@

# Check inputs
if method not in params['METHODS']:
raise ValueError(f'{method} is not a valid method.\n'
raise ValueError(f'{method} is not a valid method.\n'
f'Please choose one of: {list(params["METHODS"].keys())}')

if task not in params['DATA_SCENARIOS']:
raise ValueError(f'{task} is not a valid integration task.\n'
raise ValueError(f'{task} is not a valid integration task.\n'
f'Please choose one of: {list(params["DATA_SCENARIOS"].keys())}')

# Get path values
folder = params['ROOT']
t_folder = task
s_folder = 'scaled' if scale else 'unscaled'
h_folder = 'hvg' if hvgs else 'full_feature'
r_folder = 'R/' if 'R' in params['METHODS'][method] else ''
filename = method+'.h5ad'
filename = method + '.h5ad'

folder_path = '/'.join([folder,task,'integration',s_folder,h_folder])+'/'+r_folder
full_path = folder_path+filename
folder_path = '/'.join([folder, task, 'integration', s_folder, h_folder]) + '/' + r_folder
full_path = folder_path + filename

if 'R' in params['METHODS'][method]:
filename_r = method+'.RDS'
full_path_r = folder_path+filename_r
filename_r = method + '.RDS'
full_path_r = folder_path + filename_r
Path(full_path_r).touch()
Path(full_path_r+".benchmark").touch()
Path(full_path_r + ".benchmark").touch()

#print(full_path)
# print(full_path)
Path(full_path).touch()
Path(full_path+".benchmark").touch()

Path(full_path + ".benchmark").touch()
4 changes: 1 addition & 3 deletions scripts/merge_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import argparse
import os

if __name__=='__main__':
if __name__ == '__main__':
"""
Merge benchmark output for all scenarios, methods and settings
"""
Expand All @@ -14,7 +14,6 @@
help='root directory for scIB output')
args = parser.parse_args()


print("Searching for .benchmark files...")
bench_files = []
for path, dirs, files in os.walk(args.root):
Expand Down Expand Up @@ -43,4 +42,3 @@
results.to_csv(args.output, index_label='scenario')

print("Done!")

17 changes: 7 additions & 10 deletions scripts/metrics/merge_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,32 @@
# coding: utf-8

import pandas as pd
import scIB
import warnings

warnings.filterwarnings('ignore')
import argparse
from functools import reduce

if __name__=='__main__':
if __name__ == '__main__':
"""
Merge metrics output for all scenarios, methods and settings
"""

parser = argparse.ArgumentParser(description='Collect all metrics')

parser.add_argument('-i', '--input', nargs='+', required=True, help='input directory')
parser.add_argument('-o', '--output', required=True, help='output file')
parser.add_argument('-r', '--root', required=True,
parser.add_argument('-r', '--root', required=True,
help='root directory for inferring column names from path')
args = parser.parse_args()



res_list = []
for file in args.input:
clean_name = file.replace(args.root, "").replace(".csv", "")
res = pd.read_csv(file, index_col=0)
res.rename(columns={res.columns[0]: clean_name}, inplace=True)
res_list.append(res)
results = reduce(lambda left,right: pd.merge(left, right, left_index=True, right_index=True), res_list)

results = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), res_list)
results = results.T
results.to_csv(args.output)


Loading

0 comments on commit 75ae100

Please sign in to comment.