From e871a267e35a1cfb9226fc53ff0acdd1562393c0 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Thu, 12 Nov 2020 23:06:05 -0600 Subject: [PATCH 001/117] add ablation --- generating_Impute_0.1-0.8-ablation.py | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 generating_Impute_0.1-0.8-ablation.py diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py new file mode 100644 index 0000000..6aad7b4 --- /dev/null +++ b/generating_Impute_0.1-0.8-ablation.py @@ -0,0 +1,80 @@ +import argparse + +# python generatingMethodsBatchshell_louvain.py +# python generatingMethodsBatchshell_louvain.py --imputeMode +parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running') +parser.add_argument('--outputDir', type=str, default='', + help='Directory of batch files for cluster running') +parser.add_argument('--imputeMode', action='store_true', default=True, + help='whether impute') +args = parser.parse_args() + +templateStr1 = "#! /bin/bash\n"\ +"######################### Batch Headers #########################\n"\ +"#SBATCH -A xulab\n"\ +"#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute\n"\ +"#SBATCH -J " + +templateStr2 = "\n#SBATCH -o results-%j.out # give the job output a custom name\n"\ +"#SBATCH -t 2-00:00 # two days time limit\n"\ +"#SBATCH -N 1 # number of nodes\n"\ +"#SBATCH -n 1 # number of cores (AKA tasks)\n"\ +"#SBATCH --mem=128G\n"\ +"#################################################################\n"\ +"module load miniconda3\n"\ +"source activate conda_R\n" + +#tuple list +#batchInfo,scGNNparam,outDir +#huge matrix +methodsList = [ + ('run_experiment_2_g_e_L_1 2geL1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 1 --npyDir','npyG2EL_1/'), + ('run_experiment_1_g_e_1 1ge1','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG1E_1/'), + ('run_experiment_2_g_f_1 2gf1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 1 --npyDir','npyG2F_1/'), + ('run_experiment_2_n_e_LK_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'), + ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'), + + ('run_experiment_2_g_e_L_2 2geL2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 2 --npyDir','npyG2EL_2/'), + ('run_experiment_1_g_e_2 1ge2','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG1E_2/'), + ('run_experiment_2_g_f_2 2gf2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 2 --npyDir','npyG2F_2/'), + ('run_experiment_2_n_e_LK_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'), + ('run_experiment_2_g_e_2 2ge2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG2E_2/'), + + ('run_experiment_2_g_e_L_3 2geL3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 3 --npyDir','npyG2EL_3/'), + ('run_experiment_1_g_e_3 1ge3','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG1E_3/'), + ('run_experiment_2_g_f_3 2gf3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 3 --npyDir','npyG2F_3/'), + ('run_experiment_2_n_e_LK_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'), + ('run_experiment_2_g_e_3 2ge3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG2E_3/'), +] + +dropoutList = ['0.1','0.3','0.6','0.8'] + +# generate sbatch files: +for item in methodsList: + batchInfo,scGNNparam,outDirStr = item + tmp = batchInfo.split() + tmpstr1=tmp[0] + tmpstr2=tmp[1] + imputeStr = '' + if args.imputeMode: + tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute') + tmpstr2 = "I"+tmpstr2 + # tmpstr2 = "I"+tmpstr2[2:] + imputeStr = ' --imputeMode ' + outDirStr = "npyImpute"+outDirStr[3:] + outputFilename = args.outputDir + tmpstr1 + abbrStr = tmpstr2 + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() From 0ce6b2ed6850cf55c1ca026d78b5e12215483cdc Mon Sep 17 00:00:00 2001 From: juexinwang Date: Thu, 12 Nov 2020 23:31:39 -0600 Subject: [PATCH 002/117] add ablation tests on imputation --- generating_Impute_0.1-0.8-ablation.py | 6 ++-- submitCluster_imputation_0.1-0.8-ablation.sh | 35 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 submitCluster_imputation_0.1-0.8-ablation.sh diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py index 6aad7b4..86773f7 100644 --- a/generating_Impute_0.1-0.8-ablation.py +++ b/generating_Impute_0.1-0.8-ablation.py @@ -31,19 +31,19 @@ ('run_experiment_2_g_e_L_1 2geL1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 1 --npyDir','npyG2EL_1/'), ('run_experiment_1_g_e_1 1ge1','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG1E_1/'), ('run_experiment_2_g_f_1 2gf1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 1 --npyDir','npyG2F_1/'), - ('run_experiment_2_n_e_LK_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'), + ('run_experiment_2_n_e_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'), ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'), ('run_experiment_2_g_e_L_2 2geL2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 2 --npyDir','npyG2EL_2/'), ('run_experiment_1_g_e_2 1ge2','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG1E_2/'), ('run_experiment_2_g_f_2 2gf2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 2 --npyDir','npyG2F_2/'), - ('run_experiment_2_n_e_LK_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'), + ('run_experiment_2_n_e_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'), ('run_experiment_2_g_e_2 2ge2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG2E_2/'), ('run_experiment_2_g_e_L_3 2geL3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 3 --npyDir','npyG2EL_3/'), ('run_experiment_1_g_e_3 1ge3','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG1E_3/'), ('run_experiment_2_g_f_3 2gf3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 3 --npyDir','npyG2F_3/'), - ('run_experiment_2_n_e_LK_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'), + ('run_experiment_2_n_e_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'), ('run_experiment_2_g_e_3 2ge3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG2E_3/'), ] diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh new file mode 100644 index 0000000..d558529 --- /dev/null +++ b/submitCluster_imputation_0.1-0.8-ablation.sh @@ -0,0 +1,35 @@ +mkdir npyImputeG2E_1 +mkdir npyImputeG2EL_1 +mkdir npyImputeG2F_1 +mkdir npyImputeN2E_1 +mkdir npyImputeG1E_1 + +mkdir npyImputeG2E_2 +mkdir npyImputeG2EL_2 +mkdir npyImputeG2F_2 +mkdir npyImputeN2E_2 +mkdir npyImputeG1E_2 + +mkdir npyImputeG2E_3 +mkdir npyImputeG2EL_3 +mkdir npyImputeG2F_3 +mkdir npyImputeN2E_3 +mkdir npyImputeG1E_3 + +for i in {1..3} +do +for j in {0.1,0.3,0.6,0.8} +do +sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh + +sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh +done +done \ No newline at end of file From 157a7730bd6746e026086cae2b550466cd3eec9a Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 16:48:56 -0600 Subject: [PATCH 003/117] add plot --- main_benchmark.py | 3 +-- results/plot_distribution.py | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 results/plot_distribution.py diff --git a/main_benchmark.py b/main_benchmark.py index 101de9f..31f0942 100644 --- a/main_benchmark.py +++ b/main_benchmark.py @@ -23,8 +23,7 @@ # Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script. parser = argparse.ArgumentParser(description='Graph EM AutoEncoder for scRNA') parser.add_argument('--datasetName', type=str, default='1.Biase', - help='TGFb/sci-CAR/sci-CAR_LTMG/MMPbasal/MMPbasal_all/MMPbasal_allgene/MMPbasal_allcell/MMPepo/MMPbasal_LTMG/MMPbasal_all_LTMG/MMPbasal_2000') -# Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel + help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel') parser.add_argument('--batch-size', type=int, default=12800, metavar='N', help='input batch size for training (default: 12800)') parser.add_argument('--epochs', type=int, default=500, metavar='N', diff --git a/results/plot_distribution.py b/results/plot_distribution.py new file mode 100644 index 0000000..6248850 --- /dev/null +++ b/results/plot_distribution.py @@ -0,0 +1,40 @@ +import numpy as np +import matplotlib.pyplot as plt +import argparse + +parser = argparse.ArgumentParser(description='Infer Spatial from Expression in single cells') + +parser.add_argument('--datasetName', type=str, default='1.Biase', + help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel') +parser.add_argument('--para', type=str, default='LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1', + help='save npy results in directory') +parser.add_argument('--inDir', type=str, default='npyGraphTest/', + help='save npy results in directory') +parser.add_argument('--outDir', type=str, default='DistNpy/', + help='save npy results in directory') +args = parser.parse_args() + + +ix=np.load(args.datasetName+'_'+args.para+'_dropix.npy') +i =np.load(args.datasetName+'_'+args.para+'_dropi.npy') +j =np.load(args.datasetName+'_'+args.para+'_dropj.npy') +recon =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True) +features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True) +features=features.tolist() + +_ = plt.hist(features.ravel()) +plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') +plt.close() + +features_log = np.log(features+1) +_ = plt.hist(features_log.ravel(),bin=100) +plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') +plt.close() + +_ = plt.hist(recon.ravel(),bin=100) +plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') +plt.close() + +recon_exp = np.exp(recon)-1 +plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') +plt.close() \ No newline at end of file From 751360aed9f6d09463454de48dafacb8f8206311 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 17:30:15 -0600 Subject: [PATCH 004/117] update dist --- results/plot_distribution.py | 1 + 1 file changed, 1 insertion(+) diff --git a/results/plot_distribution.py b/results/plot_distribution.py index 6248850..b4d4115 100644 --- a/results/plot_distribution.py +++ b/results/plot_distribution.py @@ -21,6 +21,7 @@ recon =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True) features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True) features=features.tolist() +features=features.todense() _ = plt.hist(features.ravel()) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') From 0534832ac513a2c874f1d2f47712513f61c18787 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 18:38:40 -0600 Subject: [PATCH 005/117] reconstruct --- results/plot_distribution.py => plot_distribution.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename results/plot_distribution.py => plot_distribution.py (100%) diff --git a/results/plot_distribution.py b/plot_distribution.py similarity index 100% rename from results/plot_distribution.py rename to plot_distribution.py From 37c1a8b042c797f3b9cb50ca5993bc06b5724929 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 18:39:25 -0600 Subject: [PATCH 006/117] reconstruct --- plot_distribution.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plot_distribution.py b/plot_distribution.py index b4d4115..f55f1ff 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -15,11 +15,11 @@ args = parser.parse_args() -ix=np.load(args.datasetName+'_'+args.para+'_dropix.npy') -i =np.load(args.datasetName+'_'+args.para+'_dropi.npy') -j =np.load(args.datasetName+'_'+args.para+'_dropj.npy') -recon =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True) -features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True) +ix=np.load(args.inDir+args.datasetName+'_'+args.para+'_dropix.npy') +i =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropi.npy') +j =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropj.npy') +recon =np.load(args.inDir+args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True) +features=np.load(args.inDir+args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True) features=features.tolist() features=features.todense() From 5f1dfc99096461e52cb4c5c08575586e9859332f Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 23:00:04 -0600 Subject: [PATCH 007/117] change numpy hist --- plot_distribution.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/plot_distribution.py b/plot_distribution.py index f55f1ff..c0471d0 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -23,19 +23,47 @@ features=features.tolist() features=features.todense() -_ = plt.hist(features.ravel()) +# Directly use plt histogram +# _ = plt.hist(features.ravel()) +# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') +# plt.close() + +# features_log = np.log(features+1) +# _ = plt.hist(features_log.ravel(),bin=100) +# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') +# plt.close() + +# _ = plt.hist(recon.ravel(),bin=100) +# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') +# plt.close() + +# recon_exp = np.exp(recon)-1 +# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') +# plt.close() + +# Use numpy histogram +hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100)) +plt.bar(bin_edges[:-1], hist, width = 1) +plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') plt.close() features_log = np.log(features+1) -_ = plt.hist(features_log.ravel(),bin=100) +hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1)) +plt.bar(bin_edges[:-1], hist, width = 1) +plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') plt.close() -_ = plt.hist(recon.ravel(),bin=100) +hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1)) +plt.bar(bin_edges[:-1], hist, width = 1) +plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') plt.close() recon_exp = np.exp(recon)-1 +hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1)) +plt.bar(bin_edges[:-1], hist, width = 1) +plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') plt.close() \ No newline at end of file From bc3fc5a8cbc28287158d9703789a74bb5d28bdab Mon Sep 17 00:00:00 2001 From: juexinwang Date: Sun, 15 Nov 2020 23:48:59 -0600 Subject: [PATCH 008/117] change numpy hist --- plot_distribution.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plot_distribution.py b/plot_distribution.py index c0471d0..1b103f7 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -24,6 +24,8 @@ features=features.todense() # Directly use plt histogram +# Careful! plt.hist does not work for huge datasets + # _ = plt.hist(features.ravel()) # plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') # plt.close() @@ -43,6 +45,7 @@ # Use numpy histogram hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100)) +print(hist) plt.bar(bin_edges[:-1], hist, width = 1) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') @@ -50,12 +53,14 @@ features_log = np.log(features+1) hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1)) +print(hist) plt.bar(bin_edges[:-1], hist, width = 1) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') plt.close() hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1)) +print(hist) plt.bar(bin_edges[:-1], hist, width = 1) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') @@ -63,6 +68,7 @@ recon_exp = np.exp(recon)-1 hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1)) +print(hist) plt.bar(bin_edges[:-1], hist, width = 1) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') From 6b0c31344630d6b5ab2f7f726e5c7de4f50fd6ec Mon Sep 17 00:00:00 2001 From: juexinwang Date: Mon, 16 Nov 2020 00:30:56 -0600 Subject: [PATCH 009/117] change numpy hist --- plot_distribution.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/plot_distribution.py b/plot_distribution.py index 1b103f7..7d7764a 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -44,32 +44,33 @@ # plt.close() # Use numpy histogram -hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100)) +hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),10)) print(hist) -plt.bar(bin_edges[:-1], hist, width = 1) +# plt.bar(bin_edges[:-1], hist, width = 1) +plt.bar(bin_edges[:-1], hist) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') plt.close() features_log = np.log(features+1) -hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1)) +hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.01)) print(hist) -plt.bar(bin_edges[:-1], hist, width = 1) +plt.bar(bin_edges[:-1], hist) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') plt.close() -hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1)) +hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.01)) print(hist) -plt.bar(bin_edges[:-1], hist, width = 1) +plt.bar(bin_edges[:-1], hist) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') plt.close() recon_exp = np.exp(recon)-1 -hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1)) +hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),10)) print(hist) -plt.bar(bin_edges[:-1], hist, width = 1) +plt.bar(bin_edges[:-1], hist) plt.xlim(min(bin_edges), max(bin_edges)) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') plt.close() \ No newline at end of file From fd5c4ae4cf852a7e06ae2fe1fd7fcd4a55821a65 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Mon, 16 Nov 2020 10:10:52 -0600 Subject: [PATCH 010/117] change numpy hist --- plot_distribution.py | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/plot_distribution.py b/plot_distribution.py index 7d7764a..06196bc 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -43,34 +43,45 @@ # plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') # plt.close() +# Something wrong, have to change to here: +# plt.bar(bin_edges[:-1], hist) +# plt.xlim(min(bin_edges), max(bin_edges)) + # Use numpy histogram -hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),10)) +hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features)+10,10)) print(hist) -# plt.bar(bin_edges[:-1], hist, width = 1) -plt.bar(bin_edges[:-1], hist) -plt.xlim(min(bin_edges), max(bin_edges)) +x_pos = [i for i, _ in enumerate(hist)] +plt.bar(x_pos, hist) +plt.xticks(x_pos, bin_edges[:-1]) +plt.xticks(rotation=90) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png') plt.close() features_log = np.log(features+1) -hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.01)) +hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.01,0.01)) print(hist) -plt.bar(bin_edges[:-1], hist) -plt.xlim(min(bin_edges), max(bin_edges)) +x_pos = [i for i, _ in enumerate(hist)] +plt.bar(x_pos, hist) +plt.xticks(x_pos, bin_edges[:-1]) +plt.xticks(rotation=90) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') plt.close() -hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.01)) +hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.01,0.01)) print(hist) -plt.bar(bin_edges[:-1], hist) -plt.xlim(min(bin_edges), max(bin_edges)) +x_pos = [i for i, _ in enumerate(hist)] +plt.bar(x_pos, hist) +plt.xticks(x_pos, bin_edges[:-1]) +plt.xticks(rotation=90) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png') plt.close() recon_exp = np.exp(recon)-1 -hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),10)) +hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features)+10,10)) print(hist) -plt.bar(bin_edges[:-1], hist) -plt.xlim(min(bin_edges), max(bin_edges)) +x_pos = [i for i, _ in enumerate(hist)] +plt.bar(x_pos, hist) +plt.xticks(x_pos, bin_edges[:-1]) +plt.xticks(rotation=90) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') plt.close() \ No newline at end of file From 4398da535285e6c2b997eb18d9492d2d358b72f1 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Mon, 16 Nov 2020 10:22:39 -0600 Subject: [PATCH 011/117] change fig --- plot_distribution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plot_distribution.py b/plot_distribution.py index 06196bc..3aad4be 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -77,7 +77,7 @@ plt.close() recon_exp = np.exp(recon)-1 -hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features)+10,10)) +hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(recon_exp)+10,10)) print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) From 8677a704b2d23e2654dc05705f8fdf7bd414dab5 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 09:22:43 -0600 Subject: [PATCH 012/117] add r support --- plot_distribution.py | 27 ++++++++++++++-- plot_distribution.r | 77 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 plot_distribution.r diff --git a/plot_distribution.py b/plot_distribution.py index 3aad4be..aa09b2b 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -1,6 +1,8 @@ import numpy as np import matplotlib.pyplot as plt import argparse +from scipy.stats import chi2_contingency +from scipy.stats import nbinom parser = argparse.ArgumentParser(description='Infer Spatial from Expression in single cells') @@ -18,6 +20,8 @@ ix=np.load(args.inDir+args.datasetName+'_'+args.para+'_dropix.npy') i =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropi.npy') j =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropj.npy') +# recon =np.load('12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_recon.npy',allow_pickle=True) +# features=np.load('/Users/juexinwang/Downloads/temp/12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features.npy',allow_pickle=True) recon =np.load(args.inDir+args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True) features=np.load(args.inDir+args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True) features=features.tolist() @@ -58,7 +62,7 @@ plt.close() features_log = np.log(features+1) -hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.01,0.01)) +hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.1,0.1)) print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) @@ -67,7 +71,7 @@ plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png') plt.close() -hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.01,0.01)) +hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.1,0.1)) print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) @@ -84,4 +88,21 @@ plt.xticks(x_pos, bin_edges[:-1]) plt.xticks(rotation=90) plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png') -plt.close() \ No newline at end of file +plt.close() + +#test +# find x,y in 2D matrix +# numpy.unravel_index(a.argmax(), a.shape) +# data = [[207, 282, 241], [282, 240, 234, 3]] +# chi2_contingency(data) +np.savetxt(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.txt', features, fmt='%d') + +# https://stats.stackexchange.com/questions/260580/negative-binomial-distribution-with-python-scipy-stats +# https://en.wikipedia.org/wiki/Negative_binomial_distribution#Alternative_formulations +# mean = np.mean(features) +# var = np.var(features) +# p = (var-mean)/var +# r = mean**2/(var-mean) +# x = np.arange(nbinom.ppf(0.01, p, r),nbinom.ppf(0.99, p, r)) +# ax.plot(x, nbinom.pmf(x, p, r), 'bo', ms=8, label='nbinom pmf') + diff --git a/plot_distribution.r b/plot_distribution.r new file mode 100644 index 0000000..d2ab09b --- /dev/null +++ b/plot_distribution.r @@ -0,0 +1,77 @@ +# R +# Running after plot_distribution.py + +# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf +# https://arxiv.org/pdf/1810.02618.pdf +# https://rdrr.io/cran/gamlss.dist/man/ZANBI.html + +#install in conda: +# https://anaconda.org/conda-forge/r-fitdistrplus +# https://anaconda.org/conda-forge/r-gamlss +# install.packages("fitdistrplus") +# install.packages("gamlss") +library(fitdistrplus) +library(gamlss) + +args = commandArgs(trailingOnly=TRUE) +if (length(args)==0) { + stop("At least four argument must be supplied (input file).n", call.=FALSE) +} + +datasetName=args[1] +para=args[2] +indir=args[3] +outdir=args[4] + +features = read.table(paste(indir,"/",datasetName,"_",para,"_features.txt",sep=''), header = FALSE, sep = " ") +features = data.matrix(features) +features = as.vector(features) +features = as.numeric(features) + +mu_ = mean(features) +sigma_ = (sd(features)-mean(features))/mean(features)**2 +# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219 +fit_nbi = fitdist(features, 'NBI', start = list(mu = mu_, sigma = sigma_ )) +tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep='')) +plot(fit_zinb) +dev.off() + +# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221 +nu_ = 1-length(which(features!=0))/(length(features)) +fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_)) +gofstat(fit_zinb) +tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff",sep='')) +plot(fit_zinb) +dev.off() + +fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_)) +gofstat(fit_zinb_) +tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff_",sep='')) +plot(fit_zinb_) +dev.off() + + +# NBI: +# Goodness-of-fit statistics +# 1-mle-NBI +# Kolmogorov-Smirnov statistic 3.671374e-01 +# Cramer-von Mises statistic 1.016737e+05 +# Anderson-Darling statistic Inf + +# Goodness-of-fit criteria +# 1-mle-NBI +# Akaike's Information Criterion 25429885 +# Bayesian Information Criterion 25429912 + + +# ZINB +# Goodness-of-fit statistics +# 1-mle-ZINBI +# Kolmogorov-Smirnov statistic 4.532250e-01 +# Cramer-von Mises statistic 1.873046e+05 +# Anderson-Darling statistic Inf + +# Goodness-of-fit criteria +# 1-mle-ZINBI +# Akaike's Information Criterion 25969108 +# Bayesian Information Criterion 25969135 \ No newline at end of file From a2b27d120b46497a0b4231736930d05e170c449f Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 11:13:50 -0600 Subject: [PATCH 013/117] add generating distribution --- generating_distribution.py | 85 ++++++++++++++++++++++++++++++++++++++ plot_distribution.r | 1 + 2 files changed, 86 insertions(+) create mode 100644 generating_distribution.py diff --git a/generating_distribution.py b/generating_distribution.py new file mode 100644 index 0000000..625d4ef --- /dev/null +++ b/generating_distribution.py @@ -0,0 +1,85 @@ +import argparse + +# python generatingMethodsBatchshell_louvain.py +# python generatingMethodsBatchshell_louvain.py --imputeMode +parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running') +parser.add_argument('--outputDir', type=str, default='', + help='Directory of batch files for cluster running') +args = parser.parse_args() + +templateStr1 = "#! /bin/bash\n"\ +"######################### Batch Headers #########################\n"\ +"#SBATCH -A xulab\n"\ +"#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute\n"\ +"#SBATCH -J " + +templateStr2 = "\n#SBATCH -o results-%j.out # give the job output a custom name\n"\ +"#SBATCH -t 2-00:00 # two days time limit\n"\ +"#SBATCH -N 1 # number of nodes\n"\ +"#SBATCH -n 1 # number of cores (AKA tasks)\n"\ +"#SBATCH --mem=128G\n"\ +"#################################################################\n"\ +"module load miniconda3\n"\ +"source activate conda_R\n" + +#tuple list +#batchInfo,scGNNparam,outDir +#huge matrix +methodsList = [ + ('plot_G2E_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2E_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2E_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2E_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + + ('plot_G2EL_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2EL_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2EL_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + ('plot_G2EL_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'), + + ('plot_G1E_0.1 G1E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'), + ('plot_G1E_0.3 G1E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'), + ('plot_G1E_0.6 G1E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'), + ('plot_G1E_0.8 G1E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'), + + ('plot_G2F_0.1 G2F1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'), + ('plot_G2F_0.3 G2F3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'), + ('plot_G2F_0.6 G2F6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'), + ('plot_G2F_0.8 G2F8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'), + + ('plot_N2E_0.1 N2E1','noregu_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'), + ('plot_N2E_0.3 N2E3','noregu_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'), + ('plot_N2E_0.6 N2E6','noregu_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'), + ('plot_N2E_0.8 N2E8','noregu_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'), + +] + +seedList = ['_1/','_2/','_3/'] + +# generate sbatch files: +for item in methodsList: + batchInfo,param,dirStr = item + tmp = batchInfo.split() + tmpstr1=tmp[0] + tmpstr2=tmp[1] + imputeStr = '' + outputFilename = args.outputDir + tmpstr1 + abbrStr = tmpstr2 + + commandLine = '' + for seed in seedList: + commandLine += "python3 -W ignore main_benchmark.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_12.sh",'w') as fw: + fw.write(outStr) + fw.close() + + commandLine = '' + for seed in seedList: + commandLine += "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_13.sh",'w') as fw: + fw.write(outStr) + fw.close() + diff --git a/plot_distribution.r b/plot_distribution.r index d2ab09b..a679862 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -32,6 +32,7 @@ mu_ = mean(features) sigma_ = (sd(features)-mean(features))/mean(features)**2 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219 fit_nbi = fitdist(features, 'NBI', start = list(mu = mu_, sigma = sigma_ )) +gofstat(fit_nbi) tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep='')) plot(fit_zinb) dev.off() From be583819ed9a227e23327bc516e9e9d002643e91 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 11:35:23 -0600 Subject: [PATCH 014/117] add distribution sbatch file --- submitCluster_distribution.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 submitCluster_distribution.sh diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh new file mode 100644 index 0000000..d4e9bae --- /dev/null +++ b/submitCluster_distribution.sh @@ -0,0 +1,16 @@ +#submit plotting + +for i in {0.1,0.3,0.6,0.8} +do +sbatch plot_G2E_$i\_12.sh +sbatch plot_G2EL_$i\_12.sh +sbatch plot_G1E_$i\_12.sh +sbatch plot_G2F_$i\_12.sh +sbatch plot_N2E_$i\_12.sh + +sbatch plot_G2E_$i\_13.sh +sbatch plot_G2EL_$i\_13.sh +sbatch plot_G1E_$i\_13.sh +sbatch plot_G2F_$i\_13.sh +sbatch plot_N2E_$i\_13.sh +done \ No newline at end of file From 28451f50673f719158d3ba06709265dcd19f8788 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 12:20:16 -0600 Subject: [PATCH 015/117] update fig --- plot_distribution.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plot_distribution.r b/plot_distribution.r index a679862..9559896 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -34,7 +34,7 @@ sigma_ = (sd(features)-mean(features))/mean(features)**2 fit_nbi = fitdist(features, 'NBI', start = list(mu = mu_, sigma = sigma_ )) gofstat(fit_nbi) tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep='')) -plot(fit_zinb) +plot(fit_nbi) dev.off() # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221 From d805f4bf113d052fe7357656f059688cbbd4f093 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 15:01:24 -0600 Subject: [PATCH 016/117] debug --- generating_distribution.py | 4 ++-- plot_distribution.r | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/generating_distribution.py b/generating_distribution.py index 625d4ef..30cd87e 100644 --- a/generating_distribution.py +++ b/generating_distribution.py @@ -67,7 +67,7 @@ commandLine = '' for seed in seedList: - commandLine += "python3 -W ignore main_benchmark.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "python3 -W ignore plot_distribution.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" with open(outputFilename+"_12.sh",'w') as fw: @@ -76,7 +76,7 @@ commandLine = '' for seed in seedList: - commandLine += "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "python3 -W ignore plot_distribution.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" with open(outputFilename+"_13.sh",'w') as fw: diff --git a/plot_distribution.r b/plot_distribution.r index 9559896..cb9872c 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -28,6 +28,7 @@ features = data.matrix(features) features = as.vector(features) features = as.numeric(features) +print(paste("\n",indir,"/",datasetName,"_",para,"_features.txt") mu_ = mean(features) sigma_ = (sd(features)-mean(features))/mean(features)**2 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219 From e823a2a231228139dfffe80949d4a673231cf46a Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 15:20:47 -0600 Subject: [PATCH 017/117] update --- generating_distribution.py | 4 ++-- plot_distribution.py | 6 +++--- plot_distribution.r | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generating_distribution.py b/generating_distribution.py index 30cd87e..7b42241 100644 --- a/generating_distribution.py +++ b/generating_distribution.py @@ -68,7 +68,7 @@ commandLine = '' for seed in seedList: commandLine += "python3 -W ignore plot_distribution.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" - commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + commandLine += "Rscript plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" with open(outputFilename+"_12.sh",'w') as fw: fw.write(outStr) @@ -77,7 +77,7 @@ commandLine = '' for seed in seedList: commandLine += "python3 -W ignore plot_distribution.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" - commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + commandLine += "Rscript plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" with open(outputFilename+"_13.sh",'w') as fw: fw.write(outStr) diff --git a/plot_distribution.py b/plot_distribution.py index aa09b2b..081f6ad 100644 --- a/plot_distribution.py +++ b/plot_distribution.py @@ -53,7 +53,7 @@ # Use numpy histogram hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features)+10,10)) -print(hist) +# print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) plt.xticks(x_pos, bin_edges[:-1]) @@ -63,7 +63,7 @@ features_log = np.log(features+1) hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.1,0.1)) -print(hist) +# print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) plt.xticks(x_pos, bin_edges[:-1]) @@ -72,7 +72,7 @@ plt.close() hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.1,0.1)) -print(hist) +# print(hist) x_pos = [i for i, _ in enumerate(hist)] plt.bar(x_pos, hist) plt.xticks(x_pos, bin_edges[:-1]) diff --git a/plot_distribution.r b/plot_distribution.r index cb9872c..473b3cf 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -28,7 +28,7 @@ features = data.matrix(features) features = as.vector(features) features = as.numeric(features) -print(paste("\n",indir,"/",datasetName,"_",para,"_features.txt") +print(paste(indir,"/",datasetName,"_",para,"_features.txt",sep='')) mu_ = mean(features) sigma_ = (sd(features)-mean(features))/mean(features)**2 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219 From d2c8eb91c5ff670ade0929d2614a1f65907c1362 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 17:32:34 -0600 Subject: [PATCH 018/117] change orders --- plot_distribution.r | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/plot_distribution.r b/plot_distribution.r index 473b3cf..409f9b6 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -12,6 +12,7 @@ # install.packages("gamlss") library(fitdistrplus) library(gamlss) +suppressWarnings() args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { @@ -39,6 +40,13 @@ plot(fit_nbi) dev.off() # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221 +fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_)) +gofstat(fit_zinb_) +tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI_.tiff",sep='')) +plot(fit_zinb_) +dev.off() + + nu_ = 1-length(which(features!=0))/(length(features)) fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_)) gofstat(fit_zinb) @@ -46,11 +54,7 @@ tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff",sep='')) plot(fit_zinb) dev.off() -fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_)) -gofstat(fit_zinb_) -tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff_",sep='')) -plot(fit_zinb_) -dev.off() + # NBI: From c404291ad3f164d9a35346cf82b3e5c1c10e5a40 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 17:35:21 -0600 Subject: [PATCH 019/117] change orders --- submitCluster_distribution.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh index d4e9bae..d526005 100644 --- a/submitCluster_distribution.sh +++ b/submitCluster_distribution.sh @@ -3,12 +3,17 @@ for i in {0.1,0.3,0.6,0.8} do sbatch plot_G2E_$i\_12.sh + +sbatch plot_G2E_$i\_13.sh +done + +for i in {0.1,0.3,0.6,0.8} +do sbatch plot_G2EL_$i\_12.sh sbatch plot_G1E_$i\_12.sh sbatch plot_G2F_$i\_12.sh sbatch plot_N2E_$i\_12.sh -sbatch plot_G2E_$i\_13.sh sbatch plot_G2EL_$i\_13.sh sbatch plot_G1E_$i\_13.sh sbatch plot_G2F_$i\_13.sh From 6cb5bd68ae3e6749a36b5647495054ef3e08ea7e Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 17:44:25 -0600 Subject: [PATCH 020/117] change orders --- generating_distribution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generating_distribution.py b/generating_distribution.py index 7b42241..0ab95f7 100644 --- a/generating_distribution.py +++ b/generating_distribution.py @@ -10,7 +10,7 @@ templateStr1 = "#! /bin/bash\n"\ "######################### Batch Headers #########################\n"\ "#SBATCH -A xulab\n"\ -"#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute\n"\ +"#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute\n"\ "#SBATCH -J " templateStr2 = "\n#SBATCH -o results-%j.out # give the job output a custom name\n"\ From 4b201320673d90d2f9ef68ae19a3698430b33395 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 18:54:55 -0600 Subject: [PATCH 021/117] change orders --- benchmark_util.py | 4 +++- plot_distribution.r | 1 - results/results_Reading.py | 1 + results/results_impute.py | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmark_util.py b/benchmark_util.py index d85d409..63d6eb4 100644 --- a/benchmark_util.py +++ b/benchmark_util.py @@ -562,6 +562,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): all_index = i[ix], j[ix] x, y = X_mean[all_index], X[all_index] result = np.abs(x - np.log(y+1)) + resultL2 = (x - np.log(y+1))**2 # If the input is a sparse matrix else: all_index = i[ix], j[ix] @@ -570,8 +571,9 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): yuse = scipy.sparse.lil_matrix.todense(y) yuse = np.asarray(yuse).reshape(-1) result = np.abs(x - np.log(yuse+1)) + resultL2 = (x - np.log(yuse+1))**2 # return np.median(np.abs(x - yuse)) - return np.mean(result), np.median(result), np.min(result), np.max(result) + return np.mean(result), np.median(result), np.min(result), np.max(result),np.mean(resultL2), np.median(resultL2), np.min(resultL2), np.max(resultL2) # cosine similarity def imputation_cosine_log(X_mean, X, X_zero, i, j, ix): diff --git a/plot_distribution.r b/plot_distribution.r index 409f9b6..4fe8b23 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -12,7 +12,6 @@ # install.packages("gamlss") library(fitdistrplus) library(gamlss) -suppressWarnings() args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { diff --git a/results/results_Reading.py b/results/results_Reading.py index 88f34aa..50ebc2b 100644 --- a/results/results_Reading.py +++ b/results/results_Reading.py @@ -13,6 +13,7 @@ args = parser.parse_args() # Note: +# Main Check results # Generate results in python other than in shell for better organization # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel diff --git a/results/results_impute.py b/results/results_impute.py index 61796dc..6c001ad 100644 --- a/results/results_impute.py +++ b/results/results_impute.py @@ -56,8 +56,8 @@ dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy') featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy') -l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) -print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='') +l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) +print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax), end='') def imputeResult(inputData): ''' From b72a43bfdc804a018874b0f1c3ed72c79fb91fe0 Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 17 Nov 2020 21:49:41 -0600 Subject: [PATCH 022/117] Add RMSE --- benchmark_util.py | 6 +++--- plot_distribution.r | 5 ++++- results/results_impute.py | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/benchmark_util.py b/benchmark_util.py index 63d6eb4..a72418b 100644 --- a/benchmark_util.py +++ b/benchmark_util.py @@ -562,7 +562,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): all_index = i[ix], j[ix] x, y = X_mean[all_index], X[all_index] result = np.abs(x - np.log(y+1)) - resultL2 = (x - np.log(y+1))**2 + rmse = ((x - np.log(y+1))**2/len(result))**0.5 # If the input is a sparse matrix else: all_index = i[ix], j[ix] @@ -571,9 +571,9 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): yuse = scipy.sparse.lil_matrix.todense(y) yuse = np.asarray(yuse).reshape(-1) result = np.abs(x - np.log(yuse+1)) - resultL2 = (x - np.log(yuse+1))**2 + rmse = ((x - np.log(yuse+1))**2/len(result))**0.5 # return np.median(np.abs(x - yuse)) - return np.mean(result), np.median(result), np.min(result), np.max(result),np.mean(resultL2), np.median(resultL2), np.min(resultL2), np.max(resultL2) + return np.mean(result), np.median(result), np.min(result), np.max(result), rmse # cosine similarity def imputation_cosine_log(X_mean, X, X_zero, i, j, ix): diff --git a/plot_distribution.r b/plot_distribution.r index 4fe8b23..fdc5a17 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -79,4 +79,7 @@ dev.off() # Goodness-of-fit criteria # 1-mle-ZINBI # Akaike's Information Criterion 25969108 -# Bayesian Information Criterion 25969135 \ No newline at end of file +# Bayesian Information Criterion 25969135 + +# Can learn from * +# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html \ No newline at end of file diff --git a/results/results_impute.py b/results/results_impute.py index 6c001ad..f265477 100644 --- a/results/results_impute.py +++ b/results/results_impute.py @@ -56,8 +56,8 @@ dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy') featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy') -l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) -print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax), end='') +l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) +print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='') def imputeResult(inputData): ''' From 5fedb49729c99e960a006e030176a848634c9bb0 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 10:34:29 -0600 Subject: [PATCH 023/117] add recheck --- results/results_Reading_recheck.py | 307 ++++++++++++++++++ .../submitCluster_Result_Impute_recheck.sh | 17 + 2 files changed, 324 insertions(+) create mode 100644 results/results_Reading_recheck.py create mode 100644 results/submitCluster_Result_Impute_recheck.sh diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py new file mode 100644 index 0000000..c6584b2 --- /dev/null +++ b/results/results_Reading_recheck.py @@ -0,0 +1,307 @@ +import os +import argparse +parser = argparse.ArgumentParser(description='Read Results in different methods') +parser.add_argument('--methodName', type=int, default=0, + help="method used: 0-62") +parser.add_argument('--imputeMode', default=True, action='store_true', + help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true') +parser.add_argument('--runMode',action='store_true', default=False, help="Run or prepare cluster script") +parser.add_argument('--splitMode', default=False, action='store_true', + help='whether split, used for long queue') +parser.add_argument('--batchStr', type=int, default=0, + help="method used: 1-13") +args = parser.parse_args() + +# Note: +# Generate results in python other than in shell for better organization +# We are not use runpy.run_path('main_result.py') for it is hard to pass arguments +# We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel +# So we use os.system('') here + +if args.splitMode: + #The split of batch, more batches, more parallel + + if args.batchStr == 8: + datasetList = [ + '9.Chung', + # '9.Chung --discreteTag' + ] + elif args.batchStr == 11: + datasetList = [ + '11.Kolodziejczyk', + # '11.Kolodziejczyk --discreteTag' + ] + elif args.batchStr == 12: + datasetList = [ + '12.Klein', + # '12.Klein --discreteTag' + ] + elif args.batchStr == 13: + datasetList = [ + '13.Zeisel', + # '13.Zeisel --discreteTag' + ] +else: + datasetList = [ + '9.Chung', + '11.Kolodziejczyk', + '12.Klein', + '13.Zeisel', + ] + +if args.imputeMode: + pyStr = 'results_impute.py' + + npyList = [ + '../npyImputeG2E_1/ --ratio 0.1', #1 + '../npyImputeG2E_1/ --ratio 0.3', #2 + '../npyImputeG2E_1/ --ratio 0.6', #3 + '../npyImputeG2E_1/ --ratio 0.8', #4 + '../npyImputeG2EL_1/ --ratio 0.1', #5 + '../npyImputeG2EL_1/ --ratio 0.3', #6 + '../npyImputeG2EL_1/ --ratio 0.6', #7 + '../npyImputeG2EL_1/ --ratio 0.8', #8 + '../npyImputeG1E_1/ --ratio 0.1', #9 + '../npyImputeG1E_1/ --ratio 0.3', #10 + '../npyImputeG1E_1/ --ratio 0.6', #11 + '../npyImputeG1E_1/ --ratio 0.8', #12 + '../npyImputeG2F_1/ --ratio 0.1', #13 + '../npyImputeG2F_1/ --ratio 0.3', #14 + '../npyImputeG2F_1/ --ratio 0.6', #15 + '../npyImputeG2F_1/ --ratio 0.8', #16 + '../npyImputeN2E_1/ --ratio 0.1', #17 + '../npyImputeN2E_1/ --ratio 0.3', #18 + '../npyImputeN2E_1/ --ratio 0.6', #19 + '../npyImputeN2E_1/ --ratio 0.8', #20 + + '../npyImputeG2E_2/ --ratio 0.1', #21 + '../npyImputeG2E_2/ --ratio 0.3', #22 + '../npyImputeG2E_2/ --ratio 0.6', #23 + '../npyImputeG2E_2/ --ratio 0.8', #24 + '../npyImputeG2EL_2/ --ratio 0.1', #25 + '../npyImputeG2EL_2/ --ratio 0.3', #26 + '../npyImputeG2EL_2/ --ratio 0.6', #27 + '../npyImputeG2EL_2/ --ratio 0.8', #28 + '../npyImputeG1E_2/ --ratio 0.1', #29 + '../npyImputeG1E_2/ --ratio 0.3', #30 + '../npyImputeG1E_2/ --ratio 0.6', #31 + '../npyImputeG1E_2/ --ratio 0.8', #32 + '../npyImputeG2F_2/ --ratio 0.1', #33 + '../npyImputeG2F_2/ --ratio 0.3', #34 + '../npyImputeG2F_2/ --ratio 0.6', #35 + '../npyImputeG2F_2/ --ratio 0.8', #36 + '../npyImputeN2E_2/ --ratio 0.1', #37 + '../npyImputeN2E_2/ --ratio 0.3', #38 + '../npyImputeN2E_2/ --ratio 0.6', #39 + '../npyImputeN2E_2/ --ratio 0.8', #40 + + '../npyImputeG2E_3/ --ratio 0.1', #41 + '../npyImputeG2E_3/ --ratio 0.3', #42 + '../npyImputeG2E_3/ --ratio 0.6', #43 + '../npyImputeG2E_3/ --ratio 0.8', #44 + '../npyImputeG2EL_3/ --ratio 0.1', #45 + '../npyImputeG2EL_3/ --ratio 0.3', #46 + '../npyImputeG2EL_3/ --ratio 0.6', #47 + '../npyImputeG2EL_3/ --ratio 0.8', #48 + '../npyImputeG1E_3/ --ratio 0.1', #49 + '../npyImputeG1E_3/ --ratio 0.3', #50 + '../npyImputeG1E_3/ --ratio 0.6', #51 + '../npyImputeG1E_3/ --ratio 0.8', #52 + '../npyImputeG2F_3/ --ratio 0.1', #53 + '../npyImputeG2F_3/ --ratio 0.3', #54 + '../npyImputeG2F_3/ --ratio 0.6', #55 + '../npyImputeG2F_3/ --ratio 0.8', #56 + '../npyImputeN2E_3/ --ratio 0.1', #57 + '../npyImputeN2E_3/ --ratio 0.3', #58 + '../npyImputeN2E_3/ --ratio 0.6', #59 + '../npyImputeN2E_3/ --ratio 0.8', #60 + + ] + +else: + pyStr = 'results_celltype.py' + + npyList = [ + '../npyG1B/', #0 + '../npyG1E/', #1 + '../npyG1F/', #2 + '../npyR1B/', #3 + '../npyR1E/', #4 + '../npyR1F/', #5 + '../npyN1B/', #6 + '../npyN1E/', #7 + '../npyN1F/', #8 + '../npyG2B/', #9 + '../npyG2E/', #10 + '../npyG2F/', #11 + '../npyR2B/', #12 + '../npyR2E/', #13 + '../npyR2F/', #14 + '../npyN2B/', #15 + '../npyN2E/', #16 + '../npyN2F/', #17 + + '../npyG1B_LK/', #18 + '../npyG1E_LK/', #19 + '../npyG1F_LK/', #20 + '../npyR1B_LK/', #21 + '../npyR1E_LK/', #22 + '../npyR1F_LK/', #23 + '../npyN1B_LK/', #24 + '../npyN1E_LK/', #25 + '../npyN1F_LK/', #26 + '../npyG2B_LK/', #27 + '../npyG2E_LK/', #28 + '../npyG2F_LK/', #29 + '../npyR2B_LK/', #30 + '../npyR2E_LK/', #31 + '../npyR2F_LK/', #32 + '../npyN2B_LK/', #33 + '../npyN2E_LK/', #34 + '../npyN2F_LK/', #35 + + '../npyG1B_LB/', #36 + '../npyG1E_LB/', #37 + '../npyG1F_LB/', #38 + '../npyR1B_LB/', #39 + '../npyR1E_LB/', #40 + '../npyR1F_LB/', #41 + '../npyN1B_LB/', #42 + '../npyN1E_LB/', #43 + '../npyN1F_LB/', #44 + '../npyG2B_LB/', #45 + '../npyG2E_LB/', #46 + '../npyG2F_LB/', #47 + '../npyR2B_LB/', #48 + '../npyR2E_LB/', #49 + '../npyR2F_LB/', #50 + '../npyN2B_LB/', #51 + '../npyN2E_LB/', #52 + '../npyN2F_LB/', #53 + ] + +reguDict={} + +for i in range(0,16): + reguDict[i]='LTMG' +for i in range(16,20): + reguDict[i]='noregu' +for i in range(20,36): + reguDict[i]='LTMG' +for i in range(36,40): + reguDict[i]='noregu' +for i in range(40,56): + reguDict[i]='LTMG' +for i in range(56,60): + reguDict[i]='noregu' + +reguStr='' +if args.methodName in reguDict: + reguStr=' --regulized-type ' + reguDict[args.methodName] + ' ' + +npyStr = npyList[args.methodName] + +benchmarkStr = '' + +if args.runMode: + labelFileDir = '/home/wangjue/biodata/scData/allBench/' +else: + labelFileDir = '/home/jwang/data/scData/' + +def getBenchmarkStr(count): + benchmarkStr = '' + if args.batchStr == 0: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '1.Biase/Biase_cell_label.csv '\ + '--n-clusters 3 ' + elif args.batchStr == 1: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '2.Li/Li_cell_label.csv '\ + '--n-clusters 9 ' + elif args.batchStr == 2: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '3.Treutlein/Treutlein_cell_label.csv '\ + '--n-clusters 5 ' + elif args.batchStr == 3: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '4.Yan/Yan_cell_label.csv '\ + '--n-clusters 7 ' + elif args.batchStr == 4: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '5.Goolam/Goolam_cell_label.csv '\ + '--n-clusters 5 ' + elif args.batchStr == 5: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '6.Guo/Guo_cell_label.csv '\ + '--n-clusters 9 ' + elif args.batchStr == 6: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '7.Deng/Deng_cell_label.csv '\ + '--n-clusters 10 ' + elif args.batchStr == 7: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '8.Pollen/Pollen_cell_label.csv '\ + '--n-clusters 11 ' + elif args.batchStr == 8: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '9.Chung/Chung_cell_label.csv '\ + '--n-clusters 4 ' + elif args.batchStr == 9: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '10.Usoskin/Usoskin_cell_label.csv '\ + '--n-clusters 11 ' + elif args.batchStr == 10: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '11.Kolodziejczyk/Kolodziejczyk_cell_label.csv '\ + '--n-clusters 3 ' + elif args.batchStr == 11: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '12.Klein/Klein_cell_label.csv '\ + '--n-clusters 4 ' + elif args.batchStr == 12: + benchmarkStr = ' --benchmark '\ + '--labelFilename ' + labelFileDir + '13.Zeisel/Zeisel_cell_label.csv '\ + '--n-clusters 7 ' + + return benchmarkStr + + +if not args.runMode: + if args.imputeMode: + imputeStr = 'I' + else: + imputeStr = 'C' + splitStr = '' + if args.splitMode: + splitStr = '_'+str(args.batchStr) + templateStr = "#! /bin/bash\n"\ + "######################### Batch Headers #########################\n"\ + "#SBATCH -A xulab\n"\ + "#SBATCH -p Lewis,BioCompute # use the BioCompute partition\n"\ + "#SBATCH -J R" + imputeStr + '_' + str(args.methodName) + splitStr + " \n"\ + "#SBATCH -o results-%j.out # give the job output a custom name\n"\ + "#SBATCH -t 2-00:00 # two days time limit\n"\ + "#SBATCH -N 1 # number of nodes\n"\ + "#SBATCH -n 1 # number of cores (AKA tasks)\n"\ + "#SBATCH --mem=128G\n"\ + "#################################################################\n"\ + "module load miniconda3\n"\ + "source activate conda_R\n" + print(templateStr) + +count = 0 +for datasetStr in datasetList: + commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --npyDir ' + npyStr + if args.runMode: + os.system(commandStr) + else: + print(commandStr) + for i in range(10): + commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr + if args.runMode: + os.system(commandStr) + else: + print(commandStr) + count += 1 + + diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh new file mode 100644 index 0000000..7cbc427 --- /dev/null +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -0,0 +1,17 @@ +for i in {0..59} +do +for j in {8,11,12,13} +do +python results_Reading_23dropout.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh +done +done + +# submit +for i in {0..59} +do +for j in {8,11,12,13} +do +sbatch run_Results_Impute_$i-$j.sh +sleep 1 +done +done \ No newline at end of file From 00129a4eae66402a8ce233ae0c1875d9410801b5 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 10:43:21 -0600 Subject: [PATCH 024/117] fix a bug --- results/submitCluster_Result_Impute_recheck.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index 7cbc427..1277b79 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -2,16 +2,16 @@ for i in {0..59} do for j in {8,11,12,13} do -python results_Reading_23dropout.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh +python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh done done # submit -for i in {0..59} -do -for j in {8,11,12,13} -do -sbatch run_Results_Impute_$i-$j.sh -sleep 1 -done -done \ No newline at end of file +# for i in {0..59} +# do +# for j in {8,11,12,13} +# do +# sbatch run_Results_Impute_$i-$j.sh +# sleep 1 +# done +# done \ No newline at end of file From f0e7bdfc8da54e4a73823c8c67255cc0edf3f8de Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 11:28:43 -0600 Subject: [PATCH 025/117] change to new format --- results/results_Reading_recheck.py | 40 +++++++++++++++--------------- results/results_impute_graph.py | 12 ++++----- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py index c6584b2..124743c 100644 --- a/results/results_Reading_recheck.py +++ b/results/results_Reading_recheck.py @@ -50,7 +50,7 @@ ] if args.imputeMode: - pyStr = 'results_impute.py' + pyStr = 'results_impute_graph.py' npyList = [ '../npyImputeG2E_1/ --ratio 0.1', #1 @@ -210,55 +210,55 @@ def getBenchmarkStr(count): benchmarkStr = '' - if args.batchStr == 0: + if args.batchStr == 1: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '1.Biase/Biase_cell_label.csv '\ '--n-clusters 3 ' - elif args.batchStr == 1: + elif args.batchStr == 2: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '2.Li/Li_cell_label.csv '\ '--n-clusters 9 ' - elif args.batchStr == 2: + elif args.batchStr == 3: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '3.Treutlein/Treutlein_cell_label.csv '\ '--n-clusters 5 ' - elif args.batchStr == 3: + elif args.batchStr == 4: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '4.Yan/Yan_cell_label.csv '\ '--n-clusters 7 ' - elif args.batchStr == 4: + elif args.batchStr == 5: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '5.Goolam/Goolam_cell_label.csv '\ '--n-clusters 5 ' - elif args.batchStr == 5: + elif args.batchStr == 6: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '6.Guo/Guo_cell_label.csv '\ '--n-clusters 9 ' - elif args.batchStr == 6: + elif args.batchStr == 7: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '7.Deng/Deng_cell_label.csv '\ '--n-clusters 10 ' - elif args.batchStr == 7: + elif args.batchStr == 8: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '8.Pollen/Pollen_cell_label.csv '\ '--n-clusters 11 ' - elif args.batchStr == 8: + elif args.batchStr == 9: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '9.Chung/Chung_cell_label.csv '\ '--n-clusters 4 ' - elif args.batchStr == 9: + elif args.batchStr == 10: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '10.Usoskin/Usoskin_cell_label.csv '\ '--n-clusters 11 ' - elif args.batchStr == 10: + elif args.batchStr == 11: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '11.Kolodziejczyk/Kolodziejczyk_cell_label.csv '\ '--n-clusters 3 ' - elif args.batchStr == 11: + elif args.batchStr == 12: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '12.Klein/Klein_cell_label.csv '\ '--n-clusters 4 ' - elif args.batchStr == 12: + elif args.batchStr == 13: benchmarkStr = ' --benchmark '\ '--labelFilename ' + labelFileDir + '13.Zeisel/Zeisel_cell_label.csv '\ '--n-clusters 7 ' @@ -296,12 +296,12 @@ def getBenchmarkStr(count): os.system(commandStr) else: print(commandStr) - for i in range(10): - commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr - if args.runMode: - os.system(commandStr) - else: - print(commandStr) + # for i in range(10): + # commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr + # if args.runMode: + # os.system(commandStr) + # else: + # print(commandStr) count += 1 diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py index 4145dd1..9f7101d 100644 --- a/results/results_impute_graph.py +++ b/results/results_impute_graph.py @@ -63,20 +63,20 @@ # dropi = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropi.npy') # dropj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropj.npy') # dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropix.npy') -dropi = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropi.npy') -dropj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropj.npy') -dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropix.npy') +dropi = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropi.npy') +dropj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropj.npy') +dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropix.npy') # featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_'+args.regupara+'_recon'+args.reconstr+'.npy') -featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_recon'+args.reconstr+'.npy') +featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy') # featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_recon'+args.reconstr+'.npy') # featuresImpute = pd.read_csv(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.regupara+'_0.0_0.0_recon'+args.reconstr+'.csv') # featuresImpute = featuresImpute.to_numpy() -l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) -print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='') +l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) +print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='') def imputeResult(inputData): ''' From 92912a1ba8e58507fba07076916848601932fe35 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 15:14:07 -0600 Subject: [PATCH 026/117] fix a bug --- benchmark_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark_util.py b/benchmark_util.py index a72418b..f85f128 100644 --- a/benchmark_util.py +++ b/benchmark_util.py @@ -573,7 +573,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): result = np.abs(x - np.log(yuse+1)) rmse = ((x - np.log(yuse+1))**2/len(result))**0.5 # return np.median(np.abs(x - yuse)) - return np.mean(result), np.median(result), np.min(result), np.max(result), rmse + return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse) # cosine similarity def imputation_cosine_log(X_mean, X, X_zero, i, j, ix): From 59d9cc1722817a6d2701e89c069941904d46e04d Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 15:54:08 -0600 Subject: [PATCH 027/117] add 9 and 11 data --- generating_Impute_0.1-0.8-ablation.py | 26 ++++++++++++---- submitCluster_imputation_0.1-0.8-ablation.sh | 32 ++++++++++++++------ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py index 86773f7..05e3135 100644 --- a/generating_Impute_0.1-0.8-ablation.py +++ b/generating_Impute_0.1-0.8-ablation.py @@ -63,18 +63,32 @@ imputeStr = ' --imputeMode ' outDirStr = "npyImpute"+outDirStr[3:] outputFilename = args.outputDir + tmpstr1 - abbrStr = tmpstr2 + abbrStr = tmpstr2 for dropoutPara in dropoutList: - commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" - with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: + with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw: fw.write(outStr) fw.close() for dropoutPara in dropoutList: - commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" - with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: + with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw: fw.write(outStr) - fw.close() + fw.close() + + # for dropoutPara in dropoutList: + # commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + # outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + # with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: + # fw.write(outStr) + # fw.close() + + # for dropoutPara in dropoutList: + # commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + # outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + # with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: + # fw.write(outStr) + # fw.close() diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh index d558529..b6e3ea5 100644 --- a/submitCluster_imputation_0.1-0.8-ablation.sh +++ b/submitCluster_imputation_0.1-0.8-ablation.sh @@ -20,16 +20,28 @@ for i in {1..3} do for j in {0.1,0.3,0.6,0.8} do -sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh -sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh -sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh -sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh -sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh +sbatch run_experimentImpute_1_g_e_$i\_9_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_9_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_9_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_9_$j\.sh -sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh -sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh -sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh -sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh -sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh +sbatch run_experimentImpute_1_g_e_$i\_11_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_11_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_11_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_11_$j\.sh + +# sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh +# sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh +# sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh +# sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh +# sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh + +# sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh +# sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh +# sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh +# sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh +# sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh done done \ No newline at end of file From b04bbb4c99a156267a2e53d3a1d5a0e5c4be73aa Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 21:17:24 -0600 Subject: [PATCH 028/117] add 9 and 11 for recheck --- generating_Impute_0.1-0.8-ablation.py | 24 +++++++++---------- generating_distribution.py | 20 +++++++++++++++- plot_distribution.r | 1 - results/results_impute_graph.py | 2 -- .../submitCluster_Result_Impute_recheck.sh | 18 +++++++------- submitCluster_distribution.sh | 13 +++++++++- submitCluster_imputation_0.1-0.8-ablation.sh | 20 ++++++++-------- 7 files changed, 62 insertions(+), 36 deletions(-) diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py index 05e3135..ce1d245 100644 --- a/generating_Impute_0.1-0.8-ablation.py +++ b/generating_Impute_0.1-0.8-ablation.py @@ -79,16 +79,16 @@ fw.write(outStr) fw.close() - # for dropoutPara in dropoutList: - # commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" - # outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" - # with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: - # fw.write(outStr) - # fw.close() + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() - # for dropoutPara in dropoutList: - # commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" - # outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" - # with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: - # fw.write(outStr) - # fw.close() + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() diff --git a/generating_distribution.py b/generating_distribution.py index 0ab95f7..724bf79 100644 --- a/generating_distribution.py +++ b/generating_distribution.py @@ -63,7 +63,25 @@ tmpstr2=tmp[1] imputeStr = '' outputFilename = args.outputDir + tmpstr1 - abbrStr = tmpstr2 + abbrStr = tmpstr2 + + commandLine = '' + for seed in seedList: + commandLine += "python3 -W ignore plot_distribution.py --datasetName 9.Chung --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "Rscript plot_distribution.r 9.Chung "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_9.sh",'w') as fw: + fw.write(outStr) + fw.close() + + commandLine = '' + for seed in seedList: + commandLine += "python3 -W ignore plot_distribution.py --datasetName 11.Kolodziejczyk --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" + commandLine += "Rscript plot_distribution.r 11.Kolodziejczyk "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_12.sh",'w') as fw: + fw.write(outStr) + fw.close() commandLine = '' for seed in seedList: diff --git a/plot_distribution.r b/plot_distribution.r index fdc5a17..c09f27b 100644 --- a/plot_distribution.r +++ b/plot_distribution.r @@ -45,7 +45,6 @@ tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI_.tiff",sep='')) plot(fit_zinb_) dev.off() - nu_ = 1-length(which(features!=0))/(length(features)) fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_)) gofstat(fit_zinb) diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py index 9f7101d..a0a11fc 100644 --- a/results/results_impute_graph.py +++ b/results/results_impute_graph.py @@ -67,8 +67,6 @@ dropj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropj.npy') dropix = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropix.npy') - - # featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_'+args.regupara+'_recon'+args.reconstr+'.npy') featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy') # featuresImpute = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_recon'+args.reconstr+'.npy') diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index 1277b79..93349eb 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -6,12 +6,12 @@ python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > ru done done -# submit -# for i in {0..59} -# do -# for j in {8,11,12,13} -# do -# sbatch run_Results_Impute_$i-$j.sh -# sleep 1 -# done -# done \ No newline at end of file +submit +for i in {0..59} +do +for j in {8,11,12,13} +do +sbatch run_Results_Impute_$i-$j.sh +sleep 1 +done +done \ No newline at end of file diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh index d526005..f631ff7 100644 --- a/submitCluster_distribution.sh +++ b/submitCluster_distribution.sh @@ -2,13 +2,24 @@ for i in {0.1,0.3,0.6,0.8} do +sbatch plot_G2E_$i\_9.sh +sbatch plot_G2E_$i\_11.sh sbatch plot_G2E_$i\_12.sh - sbatch plot_G2E_$i\_13.sh done for i in {0.1,0.3,0.6,0.8} do +sbatch plot_G2EL_$i\_9.sh +sbatch plot_G1E_$i\_9.sh +sbatch plot_G2F_$i\_9.sh +sbatch plot_N2E_$i\_9.sh + +sbatch plot_G2EL_$i\_11.sh +sbatch plot_G1E_$i\_11.sh +sbatch plot_G2F_$i\_11.sh +sbatch plot_N2E_$i\_11.sh + sbatch plot_G2EL_$i\_12.sh sbatch plot_G1E_$i\_12.sh sbatch plot_G2F_$i\_12.sh diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh index b6e3ea5..a822d20 100644 --- a/submitCluster_imputation_0.1-0.8-ablation.sh +++ b/submitCluster_imputation_0.1-0.8-ablation.sh @@ -32,16 +32,16 @@ sbatch run_experimentImpute_2_g_e_L_$i\_11_$j\.sh sbatch run_experimentImpute_2_g_f_$i\_11_$j\.sh sbatch run_experimentImpute_2_n_e_$i\_11_$j\.sh -# sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh -# sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh -# sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh -# sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh -# sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh +sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh -# sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh -# sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh -# sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh -# sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh -# sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh +sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh +sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh +sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh done done \ No newline at end of file From d0f43edd1b1a396337c346d95eddb2bbdbaaecf7 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 22 Nov 2020 22:23:48 -0600 Subject: [PATCH 029/117] fix a bug --- generating_distribution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generating_distribution.py b/generating_distribution.py index 724bf79..a69efbb 100644 --- a/generating_distribution.py +++ b/generating_distribution.py @@ -79,7 +79,7 @@ commandLine += "python3 -W ignore plot_distribution.py --datasetName 11.Kolodziejczyk --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n" commandLine += "Rscript plot_distribution.r 11.Kolodziejczyk "+param+" "+dirStr+seed+" "+dirStr+seed+"\n" outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" - with open(outputFilename+"_12.sh",'w') as fw: + with open(outputFilename+"_11.sh",'w') as fw: fw.write(outStr) fw.close() From bfdcd91a1c2e6a6b2bab0f5630cff8bda8c739d8 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 23 Nov 2020 07:51:42 -0600 Subject: [PATCH 030/117] fix a typo --- results/submitCluster_Result_Impute_recheck.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index 93349eb..e6d5272 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -6,7 +6,7 @@ python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > ru done done -submit +# submit for i in {0..59} do for j in {8,11,12,13} From f505af38e5bfb796441dc247359b3008461516fc Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 23 Nov 2020 09:26:58 -0600 Subject: [PATCH 031/117] update converge type --- main_benchmark.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/main_benchmark.py b/main_benchmark.py index 31f0942..0a295fc 100644 --- a/main_benchmark.py +++ b/main_benchmark.py @@ -36,7 +36,7 @@ help='EM process type (default: celltypeEM) or EM') parser.add_argument('--alpha', type=float, default=0.5, help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1') -parser.add_argument('--converge-type', type=str, default='either', +parser.add_argument('--converge-type', type=str, default='celltype', help='type of converge: celltype/graph/both/either (default: celltype) ') parser.add_argument('--converge-graphratio', type=float, default=0.01, help='ratio of cell type change in EM iteration (default: 0.01), 0-1') @@ -587,22 +587,34 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): # graph criteria if args.converge_type == 'graph': if graphChange < graphChangeThreshold: - print('Converge now!') + print('Graph Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult break # celltype criteria elif args.converge_type == 'celltype': if ari>args.converge_celltyperatio: - print('Converge now!') + print('Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult break # if both criteria are meets elif args.converge_type == 'both': if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio: - print('Converge now!') + print('Graph and Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult break # if either criteria are meets elif args.converge_type == 'either': if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio: - print('Converge now!') + print('Graph or Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult break # Update From 9f7ae1e66e944e1210973cfd9ba6d1230e89435a Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 23 Nov 2020 15:40:53 -0600 Subject: [PATCH 032/117] change ranking --- results/submitCluster_Result_Impute_recheck.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index e6d5272..ba68356 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -1,16 +1,16 @@ -for i in {0..59} -do -for j in {8,11,12,13} -do -python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh -done -done +# for i in {0..59} +# do +# for j in {8,11,12,13} +# do +# python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh +# done +# done # submit -for i in {0..59} -do for j in {8,11,12,13} do +for i in {0..59} +do sbatch run_Results_Impute_$i-$j.sh sleep 1 done From 7863f74df12c7cf284da11f9f9be49068a0854ec Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 23 Nov 2020 18:22:42 -0600 Subject: [PATCH 033/117] add cosine --- results/results_impute_graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py index a0a11fc..b964534 100644 --- a/results/results_impute_graph.py +++ b/results/results_impute_graph.py @@ -74,7 +74,8 @@ # featuresImpute = featuresImpute.to_numpy() l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) -print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='') +cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) +print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse), end='') def imputeResult(inputData): ''' From e18d2913f236e30c90ef9faa59f9f0f0205a0b2b Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 11:22:39 -0600 Subject: [PATCH 034/117] add magic --- codesfromJGandYJ/impute code/MAGIC_impute.py | 73 ++++++-------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py index c0c1f22..0743d27 100644 --- a/codesfromJGandYJ/impute code/MAGIC_impute.py +++ b/codesfromJGandYJ/impute code/MAGIC_impute.py @@ -5,71 +5,40 @@ import numpy as np import argparse import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') #from benchmark_util import impute_dropout -def impute_dropout(X, rate=0.1): - """ - X: original testing set - ======== - returns: - X_zero: copy of X with zeros - i, j, ix: indices of where dropout is applied - """ - #If the input is a dense matrix - if isinstance(X, np.ndarray): - X_zero = np.copy(X) - # select non-zero subset - i,j = np.nonzero(X_zero) - # If the input is a sparse matrix - else: - X_zero = scipy.sparse.lil_matrix.copy(X) - # select non-zero subset - i,j = X_zero.nonzero() - # choice number 1 : select 10 percent of the non zero values (so that distributions overlap enough) - ix = np.random.choice(range(len(i)), int(np.floor(0.1 * len(i))), replace=False) - X_zero[i[ix], j[ix]] *= np.random.binomial(1, rate) - # choice number 2, focus on a few but corrupt binomially - #ix = np.random.choice(range(len(i)), int(slice_prop * np.floor(len(i))), replace=False) - #X_zero[i[ix], j[ix]] = np.random.binomial(X_zero[i[ix], j[ix]].astype(np.int), rate) - return X_zero, i, j, ix - parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() -# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0) -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -x=np.log(x+1) +def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/hpc/scratch/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) -# Load single-cell RNA-seq data -# Default is KNN=5 -magic_operator = magic.MAGIC() -# magic_operator = magic.MAGIC(knn=10) -X_magic = magic_operator.fit_transform(x, genes="all_genes") -recon = X_magic + # Load single-cell RNA-seq data + # Default is KNN=5 + magic_operator = magic.MAGIC() + # magic_operator = magic.MAGIC(knn=10) + X_magic = magic_operator.fit_transform(x, genes="all_genes") + recon = X_magic -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr + np.save('/storage/hpc/scratch/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon) -np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/magic/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),recon) +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio) # From scVI # # Load single-cell RNA-seq data From 013240c84700876cd2e5370d051fe45438484d05 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 11:24:55 -0600 Subject: [PATCH 035/117] add bash --- codesfromJGandYJ/impute code/other_magic.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 codesfromJGandYJ/impute code/other_magic.sh diff --git a/codesfromJGandYJ/impute code/other_magic.sh b/codesfromJGandYJ/impute code/other_magic.sh new file mode 100644 index 0000000..fd9f5e4 --- /dev/null +++ b/codesfromJGandYJ/impute code/other_magic.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J Magic +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore MAGIC_impute.py From 8d313d70ca96c0a6e35f8dc99b394c52f68d4013 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 13:04:55 -0600 Subject: [PATCH 036/117] add bash --- codesfromJGandYJ/impute code/MAGIC_impute.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py index 0743d27..954cd4c 100644 --- a/codesfromJGandYJ/impute code/MAGIC_impute.py +++ b/codesfromJGandYJ/impute code/MAGIC_impute.py @@ -15,7 +15,7 @@ def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1): - filename = '/storage/hpc/scratch/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) x = np.load(filename,allow_pickle=True) x = x.tolist() x=x.todense() @@ -29,7 +29,7 @@ def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1): X_magic = magic_operator.fit_transform(x, genes="all_genes") recon = X_magic - np.save('/storage/hpc/scratch/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon) + np.save('/storage/htc/joshilab/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon) datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] From bef9ce4cb127e9ebcaa09d7ec9dd5ebba63e9ffd Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 18:06:56 -0600 Subject: [PATCH 037/117] add scvi --- codesfromJGandYJ/impute code/scVi_impute.py | 102 ------------------ .../{impute code => impute}/MAGIC_impute.py | 6 +- .../{impute code => impute}/SAVER_impute.py | 0 .../{impute code => impute}/SCIMPUTE.py | 0 .../{impute code => impute}/dca_impute.py | 0 .../deepimpute_impute.py | 0 .../{impute code => impute}/other_magic.sh | 0 codesfromJGandYJ/impute/other_scvi.sh | 14 +++ .../saucie_impute_t.py | 0 codesfromJGandYJ/impute/scVi_impute.py | 92 ++++++++++++++++ 10 files changed, 109 insertions(+), 105 deletions(-) delete mode 100644 codesfromJGandYJ/impute code/scVi_impute.py rename codesfromJGandYJ/{impute code => impute}/MAGIC_impute.py (89%) rename codesfromJGandYJ/{impute code => impute}/SAVER_impute.py (100%) rename codesfromJGandYJ/{impute code => impute}/SCIMPUTE.py (100%) rename codesfromJGandYJ/{impute code => impute}/dca_impute.py (100%) rename codesfromJGandYJ/{impute code => impute}/deepimpute_impute.py (100%) rename codesfromJGandYJ/{impute code => impute}/other_magic.sh (100%) create mode 100644 codesfromJGandYJ/impute/other_scvi.sh rename codesfromJGandYJ/{impute code => impute}/saucie_impute_t.py (100%) create mode 100644 codesfromJGandYJ/impute/scVi_impute.py diff --git a/codesfromJGandYJ/impute code/scVi_impute.py b/codesfromJGandYJ/impute code/scVi_impute.py deleted file mode 100644 index 6ce9383..0000000 --- a/codesfromJGandYJ/impute code/scVi_impute.py +++ /dev/null @@ -1,102 +0,0 @@ -import os -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset -from scvi.models import VAE -from scvi.inference import UnsupervisedTrainer -import torch -import csv -import argparse -import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout - - -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/', - help='output filefolder') -args = parser.parse_args() - -# Ref: -# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb - -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -filenameFull = filename -save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/{}/'.format(args.data) - -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr - -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -x=np.log(x+1) - - -featuresOriginal = np.copy(x) -features, dropi, dropj, dropix = impute_dropout(featuresOriginal, rate=float(args.ratio)) - -#transpose and add names for rows and cols -features=np.transpose(features) -rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1]) -features=np.concatenate([rowname,features],axis=1) -colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]]) -features=np.concatenate([colname,features],axis=0) - -#write -dropout_filename = save_path+datasetNameStr+"_dropout.csv" -with open(dropout_filename, "w") as f: - writer = csv.writer(f) - writer.writerows(features) - -# gene_dataset = CortexDataset(save_path=save_path, total_genes=558) -gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/") - -n_epochs = 400 -lr = 1e-3 -use_batches = False -use_cuda = True - -vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches) -trainer = UnsupervisedTrainer( - vae, - gene_dataset, - train_size=0.75, - use_cuda=use_cuda, - frequency=5, -) - -trainer.train(n_epochs=n_epochs, lr=lr) - - -full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset))) -latent, batch_indices, labels = full.sequential().get_latent() -batch_indices = batch_indices.ravel() - -# use imputation -imputed_values = full.sequential().imputation() -normalized_values = full.sequential().get_sample_scale() - -np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed_values) -np.save(save_path+'{}_{}_recon_normalized.npy'.format(datasetNameStr,args.ratio),normalized_values) -np.save(save_path+'{}_{}_featuresOriginal.npy'.format(datasetNameStr,args.ratio),featuresOriginal) -np.save(save_path+'{}_{}_dropi.npy'.format(datasetNameStr,args.ratio),dropi) -np.save(save_path+'{}_{}_dropj.npy'.format(datasetNameStr,args.ratio),dropj) -np.save(save_path+'{}_{}_dropix.npy'.format(datasetNameStr,args.ratio),dropix) - -# celltype: -#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent) diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute/MAGIC_impute.py similarity index 89% rename from codesfromJGandYJ/impute code/MAGIC_impute.py rename to codesfromJGandYJ/impute/MAGIC_impute.py index 954cd4c..da7b573 100644 --- a/codesfromJGandYJ/impute code/MAGIC_impute.py +++ b/codesfromJGandYJ/impute/MAGIC_impute.py @@ -7,10 +7,10 @@ import sys #from benchmark_util import impute_dropout -parser = argparse.ArgumentParser(description='') +parser = argparse.ArgumentParser(description='MAGIC Impute') +# In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() diff --git a/codesfromJGandYJ/impute code/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py similarity index 100% rename from codesfromJGandYJ/impute code/SAVER_impute.py rename to codesfromJGandYJ/impute/SAVER_impute.py diff --git a/codesfromJGandYJ/impute code/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py similarity index 100% rename from codesfromJGandYJ/impute code/SCIMPUTE.py rename to codesfromJGandYJ/impute/SCIMPUTE.py diff --git a/codesfromJGandYJ/impute code/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py similarity index 100% rename from codesfromJGandYJ/impute code/dca_impute.py rename to codesfromJGandYJ/impute/dca_impute.py diff --git a/codesfromJGandYJ/impute code/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py similarity index 100% rename from codesfromJGandYJ/impute code/deepimpute_impute.py rename to codesfromJGandYJ/impute/deepimpute_impute.py diff --git a/codesfromJGandYJ/impute code/other_magic.sh b/codesfromJGandYJ/impute/other_magic.sh similarity index 100% rename from codesfromJGandYJ/impute code/other_magic.sh rename to codesfromJGandYJ/impute/other_magic.sh diff --git a/codesfromJGandYJ/impute/other_scvi.sh b/codesfromJGandYJ/impute/other_scvi.sh new file mode 100644 index 0000000..888d89b --- /dev/null +++ b/codesfromJGandYJ/impute/other_scvi.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J scvi +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore scVi_impute.py diff --git a/codesfromJGandYJ/impute code/saucie_impute_t.py b/codesfromJGandYJ/impute/saucie_impute_t.py similarity index 100% rename from codesfromJGandYJ/impute code/saucie_impute_t.py rename to codesfromJGandYJ/impute/saucie_impute_t.py diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py new file mode 100644 index 0000000..21594ef --- /dev/null +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -0,0 +1,92 @@ +import os +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset +from scvi.models import VAE +from scvi.inference import UnsupervisedTrainer +import torch +import csv +import argparse +import sys +from benchmark_util import impute_dropout + +# pip install scvi==0.6.3 +parser = argparse.ArgumentParser(description='') +# In this script, not using arguments +parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +args = parser.parse_args() + +# Ref: +# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb + + +save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' + +def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + + features = np.copy(x) + + #transpose and add names for rows and cols + features=np.transpose(features) + rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1]) + features=np.concatenate([rowname,features],axis=1) + colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]]) + features=np.concatenate([colname,features],axis=0) + + #write + dropout_filename = save_path+"dropout.csv" + with open(dropout_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(features) + + # gene_dataset = CortexDataset(save_path=save_path, total_genes=558) + gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/") + + n_epochs = 400 + lr = 1e-3 + use_batches = False + use_cuda = False + + vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches) + trainer = UnsupervisedTrainer( + vae, + gene_dataset, + train_size=0.75, + use_cuda=use_cuda, + frequency=5, + ) + + trainer.train(n_epochs=n_epochs, lr=lr) + + full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset))) + latent, batch_indices, labels = full.sequential().get_latent() + batch_indices = batch_indices.ravel() + + # use imputation + imputed_values = full.sequential().imputation() + normalized_values = full.sequential().get_sample_scale() + + np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) + np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon_normalized.npy'.format(datasetName,ratio,seed),normalized_values) + + +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1] + +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio) + +# celltype: +#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent) From ec3ab735197e2f6c336f0030d12db8927ba17fd7 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 18:11:51 -0600 Subject: [PATCH 038/117] add scvi --- codesfromJGandYJ/impute/scVi_impute.py | 1 - 1 file changed, 1 deletion(-) diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py index 21594ef..0bdc546 100644 --- a/codesfromJGandYJ/impute/scVi_impute.py +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -9,7 +9,6 @@ import csv import argparse import sys -from benchmark_util import impute_dropout # pip install scvi==0.6.3 parser = argparse.ArgumentParser(description='') From 87b5205991bd121c65d4d45cf1561bcd0ba10301 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 18:16:19 -0600 Subject: [PATCH 039/117] add scvi --- codesfromJGandYJ/impute/scVi_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py index 0bdc546..28fd724 100644 --- a/codesfromJGandYJ/impute/scVi_impute.py +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -48,7 +48,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1): writer.writerows(features) # gene_dataset = CortexDataset(save_path=save_path, total_genes=558) - gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/") + gene_dataset = CsvDataset(dropout_filename, save_path=save_path) n_epochs = 400 lr = 1e-3 From 09a1ae34cbe12ba49a3486344cc81575e5ca9324 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 19:34:53 -0600 Subject: [PATCH 040/117] add asucie --- codesfromJGandYJ/impute/SAUCIE_impute.py | 44 +++++++++++++++++ codesfromJGandYJ/impute/other_saucie.py | 20 ++++++++ codesfromJGandYJ/impute/saucie_impute_t.py | 55 ---------------------- codesfromJGandYJ/impute/scVi_impute.py | 2 +- 4 files changed, 65 insertions(+), 56 deletions(-) create mode 100644 codesfromJGandYJ/impute/SAUCIE_impute.py create mode 100644 codesfromJGandYJ/impute/other_saucie.py delete mode 100644 codesfromJGandYJ/impute/saucie_impute_t.py diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py new file mode 100644 index 0000000..e79f754 --- /dev/null +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -0,0 +1,44 @@ +import sys +import tensorflow as tf +import SAUCIE +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import argparse + +parser = argparse.ArgumentParser(description='Impute use SAUCIE') +# In this script, not using arguments +parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +args = parser.parse_args() + + +def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + + x=np.transpose(x) + + saucie = SAUCIE.SAUCIE(x.shape[1]) + loadtrain = SAUCIE.Loader(x, shuffle=True) + saucie.train(loadtrain, steps=1000) + + loadeval = SAUCIE.Loader(x, shuffle=False) + reconstruction = saucie.get_reconstruction(loadeval) + + reconstruction=np.transpose(reconstruction) + + np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction) + +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1] + +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.py new file mode 100644 index 0000000..51a90c9 --- /dev/null +++ b/codesfromJGandYJ/impute/other_saucie.py @@ -0,0 +1,20 @@ +#!/bin/bash +#------------------------------------------------------------------------------- +# SBATCH CONFIG +#------------------------------------------------------------------------------- +## resources +#SBATCH -A xulab +#SBATCH --partition gpu4 +#SBATCH --cpus-per-task=1 # cores per task +#SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) +#SBATCH --time 2-00:00 # days-hours:minutes +#SBATCH --gres gpu:1 #gpu:1 any gpu +## labels and outputs +#SBATCH --job-name=modelpyenetCB-%j.out +#SBATCH --output=results-%j.out # %j is the unique jobID +################################################################# + +module load miniconda3 +source activate /storage/htc/joshilab/wangjue/conda_R_gpu +module load cuda/cuda-10.1.243 +python3 -W ignore SAUCIE_impute.py \ No newline at end of file diff --git a/codesfromJGandYJ/impute/saucie_impute_t.py b/codesfromJGandYJ/impute/saucie_impute_t.py deleted file mode 100644 index 5831c63..0000000 --- a/codesfromJGandYJ/impute/saucie_impute_t.py +++ /dev/null @@ -1,55 +0,0 @@ -import sys -import tensorflow as tf -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/SAUCIE-master/SAUCIE-master/') -from model import SAUCIE -from loader import Loader -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -import argparse -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout - -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') -parser.add_argument('--datasetName', type=str, default='MMPbasal',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -args = parser.parse_args() - -# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0) -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -x=np.log(x+1) - -x=np.transpose(x) - -saucie = SAUCIE(x.shape[1]) -loadtrain = Loader(x, shuffle=True) -saucie.train(loadtrain, steps=1000) - -loadeval = Loader(x, shuffle=False) -reconstruction = saucie.get_reconstruction(loadeval) - -reconstruction=np.transpose(reconstruction) - -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr - -# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix) -# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='') - -np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction) - - diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py index 28fd724..3044585 100644 --- a/codesfromJGandYJ/impute/scVi_impute.py +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -11,7 +11,7 @@ import sys # pip install scvi==0.6.3 -parser = argparse.ArgumentParser(description='') +parser = argparse.ArgumentParser(description='scVi imputation') # In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') From 0ed5e109737027f2734dd1d65a4fc6267ab4ee26 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 22:26:16 -0600 Subject: [PATCH 041/117] tmp dca --- codesfromJGandYJ/impute/dca_impute.py | 81 +++++++------------------ codesfromJGandYJ/impute/other_saucie.py | 1 + 2 files changed, 22 insertions(+), 60 deletions(-) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index 0496364..db577e2 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -1,23 +1,3 @@ -#from dca.api import dca -#import anndata -#import matplotlib.pyplot as plt -#import numpy as np -#import time -#import pandas as pd - -#Ref: -# https://github.com/theislab/dca/blob/master/tutorial.ipynb -#z = pd.read_csv('/home/wangjue/biodata/scData/MMPbasal.csv') -#z = z.to_numpy() -#z = z[:,:-1] - -#selected = np.std(z, axis=0).argsort()[-2000:][::-1] -#expression_data = z[:, selected] - -#train = anndata.AnnData(expression_data) -#res = dca(train, verbose=True) -#train.X - import os import numpy as np import pandas as pd @@ -26,54 +6,35 @@ import csv import argparse import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') +parser = argparse.ArgumentParser(description='Imputation DCA') +# In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/dca/', - help='output filefolder') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -filenameFull = filename -save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data) - -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr - - - -features=x.T +def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) -#write -dropout_filename = save_path+datasetNameStr+"_dropout.csv" -with open(dropout_filename, "w") as f: - writer = csv.writer(f) - writer.writerows(features) + save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data) + features=x.T + #write + dropout_filename = save_path+datasetNameStr+"_dropout.csv" + with open(dropout_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(features) -os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr) + os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr) -filename=save_path+datasetNameStr+"/mean.tsv" -imputed_values = pd.read_csv(filename,sep="\t") -imputed_values=imputed_values.T + filename=save_path+datasetNameStr+"/mean.tsv" + imputed_values = pd.read_csv(filename,sep="\t") + imputed_values=imputed_values.T -np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values) \ No newline at end of file + np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values) \ No newline at end of file diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.py index 51a90c9..45bef22 100644 --- a/codesfromJGandYJ/impute/other_saucie.py +++ b/codesfromJGandYJ/impute/other_saucie.py @@ -8,6 +8,7 @@ #SBATCH --cpus-per-task=1 # cores per task #SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) #SBATCH --time 2-00:00 # days-hours:minutes +#SBATCH -J SAUCIE #SBATCH --gres gpu:1 #gpu:1 any gpu ## labels and outputs #SBATCH --job-name=modelpyenetCB-%j.out From 42578da759d619d2c4ee8d02c4cd7b93ec76a3eb Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 23:50:41 -0600 Subject: [PATCH 042/117] add dca/deepimpute --- codesfromJGandYJ/impute/SAUCIE_impute.py | 2 +- codesfromJGandYJ/impute/dca_impute.py | 20 +++-- codesfromJGandYJ/impute/deepimpute_impute.py | 80 +++++++++----------- codesfromJGandYJ/impute/other_dca.sh | 14 ++++ codesfromJGandYJ/impute/other_deepimpute.py | 14 ++++ codesfromJGandYJ/impute/scVi_impute.py | 4 +- util_function.py | 42 ++++++++++ 7 files changed, 123 insertions(+), 53 deletions(-) create mode 100644 codesfromJGandYJ/impute/other_dca.sh create mode 100644 codesfromJGandYJ/impute/other_deepimpute.py diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index e79f754..3aadce7 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -36,7 +36,7 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] -ratioList = [0.1] +ratioList = [0.1, 0.3, 0.6, 0.8] for datasetName in datasetNameList: for seed in seedList: diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index db577e2..95fbca7 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -13,6 +13,7 @@ parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() +save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) @@ -21,20 +22,27 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): x=x.todense() x=np.asarray(x) - save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data) - features=x.T #write - dropout_filename = save_path+datasetNameStr+"_dropout.csv" + dropout_filename = save_path+"dca_input.csv" with open(dropout_filename, "w") as f: writer = csv.writer(f) writer.writerows(features) - os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr) + os.system("dca "+dropout_filename+ " "+save_path+"dca_output.csv") - filename=save_path+datasetNameStr+"/mean.tsv" + filename=save_path+"dca_output.csv" imputed_values = pd.read_csv(filename,sep="\t") imputed_values=imputed_values.T - np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values) \ No newline at end of file + np.save('/storage/htc/joshilab/wangjue/scGNN/dca/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) + +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] + +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_dca(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py index 6c31962..14f5a35 100644 --- a/codesfromJGandYJ/impute/deepimpute_impute.py +++ b/codesfromJGandYJ/impute/deepimpute_impute.py @@ -7,53 +7,45 @@ import csv import argparse import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout - -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') +parser = argparse.ArgumentParser(description='Impute Deepimpute') +# In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/deepimpute/', - help='output filefolder') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() # Ref: # https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb - -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -#x=np.log(x+1) -filenameFull = filename -save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data) - -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr - -features=x -dropout_filename = save_path+datasetNameStr+"_dropout.csv" -with open(dropout_filename, "w") as f: - writer = csv.writer(f) - writer.writerows(features) - -data = pd.read_csv(dropout_filename, header=None) -model = MultiNet() -model.fit(data) -imputed = model.predict(data) - - -np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed) - +save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' + +def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + + save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data) + + features=x + dropout_filename = save_path+"deepimpute.csv" + with open(dropout_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(features) + + data = pd.read_csv(dropout_filename, header=None) + model = MultiNet() + model.fit(data) + imputed = model.predict(data) + + np.save('/storage/htc/joshilab/wangjue/scGNN/deepimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed) + +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] + +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh new file mode 100644 index 0000000..be4dfe9 --- /dev/null +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J dca +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore dca_impute.py diff --git a/codesfromJGandYJ/impute/other_deepimpute.py b/codesfromJGandYJ/impute/other_deepimpute.py new file mode 100644 index 0000000..b55d6c6 --- /dev/null +++ b/codesfromJGandYJ/impute/other_deepimpute.py @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J deepimpute +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore deepimpute_impute.py diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py index 3044585..5710e36 100644 --- a/codesfromJGandYJ/impute/scVi_impute.py +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -42,7 +42,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1): features=np.concatenate([colname,features],axis=0) #write - dropout_filename = save_path+"dropout.csv" + dropout_filename = save_path+"scvi.csv" with open(dropout_filename, "w") as f: writer = csv.writer(f) writer.writerows(features) @@ -80,7 +80,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1): datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] -ratioList = [0.1] +ratioList = [0.1, 0.3, 0.6, 0.8] for datasetName in datasetNameList: for seed in seedList: diff --git a/util_function.py b/util_function.py index 103b585..a997186 100644 --- a/util_function.py +++ b/util_function.py @@ -199,6 +199,48 @@ def __getitem__(self, idx): return sample,idx +class scDatasetDropoutSparse(Dataset): + def __init__(self, data=None, discreteTag=False, ratio=0.1, seed=1, transform=None): + """ + Args: + Sparse + datasetName (String): TGFb, etc. + transform (callable, optional): + """ + + self.featuresOriginal = data.transpose() + self.ratio = ratio + # Random seed + # np.random.uniform(1, 2) + self.features, self.i, self.j, self.ix = impute_dropout(self.featuresOriginal, seed=seed, rate=self.ratio) + # Now lines are cells, and cols are genes + # self.features = self.features.transpose() + self.transform = transform + # check whether log or not + self.discreteTag = discreteTag + + def __len__(self): + return self.features.shape[0] + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + + sample = self.features[idx,:] + if type(sample)==sp.lil_matrix: + sample = torch.from_numpy(sample.toarray()) + else: + sample = torch.from_numpy(sample) + + # transform after get the data + if self.transform: + sample = self.transform(sample) + + if not self.discreteTag: + sample = torch.log(sample+1) + + return sample,idx + class scDataset(Dataset): def __init__(self, data=None, transform=None): """ From e9379022f354014ecd00b69f87ffbaa035f72818 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 23:53:10 -0600 Subject: [PATCH 043/117] add deep impute and asucie --- .../impute/{other_deepimpute.py => other_deepimpute.sh} | 0 codesfromJGandYJ/impute/{other_saucie.py => other_saucie.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename codesfromJGandYJ/impute/{other_deepimpute.py => other_deepimpute.sh} (100%) rename codesfromJGandYJ/impute/{other_saucie.py => other_saucie.sh} (100%) diff --git a/codesfromJGandYJ/impute/other_deepimpute.py b/codesfromJGandYJ/impute/other_deepimpute.sh similarity index 100% rename from codesfromJGandYJ/impute/other_deepimpute.py rename to codesfromJGandYJ/impute/other_deepimpute.sh diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.sh similarity index 100% rename from codesfromJGandYJ/impute/other_saucie.py rename to codesfromJGandYJ/impute/other_saucie.sh From 66e22044bd7e6174fb388fbe1859d9fc563918d9 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 24 Nov 2020 23:57:59 -0600 Subject: [PATCH 044/117] update deepimpute --- codesfromJGandYJ/impute/deepimpute_impute.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py index 14f5a35..03b3da4 100644 --- a/codesfromJGandYJ/impute/deepimpute_impute.py +++ b/codesfromJGandYJ/impute/deepimpute_impute.py @@ -25,8 +25,6 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1): x=x.todense() x=np.asarray(x) x=np.log(x+1) - - save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data) features=x dropout_filename = save_path+"deepimpute.csv" From 65b5203419da4fb60cd140184f9996f7a2ed907c Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 00:02:05 -0600 Subject: [PATCH 045/117] update dca --- codesfromJGandYJ/impute/other_dca.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh index be4dfe9..561a761 100644 --- a/codesfromJGandYJ/impute/other_dca.sh +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -10,5 +10,5 @@ #SBATCH --mem=128G ################################################################# module load miniconda3 -source activate conda_R +source activate /storage/htc/joshilab/wangjue/conda_R_gpu python3 -W ignore dca_impute.py From 10d6387f873f88b76f393b100e84d5ce9d621dd9 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 00:16:24 -0600 Subject: [PATCH 046/117] update deepimpute to raw counts --- codesfromJGandYJ/impute/deepimpute_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py index 03b3da4..bcf1a3a 100644 --- a/codesfromJGandYJ/impute/deepimpute_impute.py +++ b/codesfromJGandYJ/impute/deepimpute_impute.py @@ -24,7 +24,7 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1): x = x.tolist() x=x.todense() x=np.asarray(x) - x=np.log(x+1) + # x=np.log(x+1) features=x dropout_filename = save_path+"deepimpute.csv" From d36c62a5fb4eff8f1c7c34b71c8466e2288a178c Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 00:21:07 -0600 Subject: [PATCH 047/117] update GPU settings --- codesfromJGandYJ/impute/other_dca.sh | 23 +++++++++++++++-------- codesfromJGandYJ/impute/other_saucie.sh | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh index 561a761..c30f0ef 100644 --- a/codesfromJGandYJ/impute/other_dca.sh +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -1,14 +1,21 @@ -#! /bin/bash -######################### Batch Headers ######################### +#!/bin/bash +#------------------------------------------------------------------------------- +# SBATCH CONFIG +#------------------------------------------------------------------------------- +## resources #SBATCH -A xulab -#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH --partition gpu3,gpu4 +#SBATCH --cpus-per-task=1 # cores per task +#SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) +#SBATCH --time 2-00:00 # days-hours:minutes #SBATCH -J dca -#SBATCH -o results-%j.out # give the job output a custom name -#SBATCH -t 2-00:00 # two days time limit -#SBATCH -N 1 # number of nodes -#SBATCH -n 1 # number of cores (AKA tasks) -#SBATCH --mem=128G +#SBATCH --gres gpu:1 #gpu:1 any gpu +## labels and outputs +#SBATCH --job-name=modelpyenetCB-%j.out +#SBATCH --output=results-%j.out # %j is the unique jobID ################################################################# + module load miniconda3 source activate /storage/htc/joshilab/wangjue/conda_R_gpu +module load cuda/cuda-10.1.243 python3 -W ignore dca_impute.py diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh index 45bef22..4716bb6 100644 --- a/codesfromJGandYJ/impute/other_saucie.sh +++ b/codesfromJGandYJ/impute/other_saucie.sh @@ -4,7 +4,7 @@ #------------------------------------------------------------------------------- ## resources #SBATCH -A xulab -#SBATCH --partition gpu4 +#SBATCH --partition gpu3,gpu4 #SBATCH --cpus-per-task=1 # cores per task #SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) #SBATCH --time 2-00:00 # days-hours:minutes From d8469d67c183170f0fe030bb39ef683b81f7cf3b Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 10:28:41 -0600 Subject: [PATCH 048/117] update saucie --- codesfromJGandYJ/impute/other_dca.sh | 2 +- codesfromJGandYJ/impute/other_saucie.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh index c30f0ef..0dad353 100644 --- a/codesfromJGandYJ/impute/other_dca.sh +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -16,6 +16,6 @@ ################################################################# module load miniconda3 -source activate /storage/htc/joshilab/wangjue/conda_R_gpu +source activate /storage/htc/joshilab/wangjue/conda_R_dca module load cuda/cuda-10.1.243 python3 -W ignore dca_impute.py diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh index 4716bb6..75ce679 100644 --- a/codesfromJGandYJ/impute/other_saucie.sh +++ b/codesfromJGandYJ/impute/other_saucie.sh @@ -16,6 +16,6 @@ ################################################################# module load miniconda3 -source activate /storage/htc/joshilab/wangjue/conda_R_gpu +source activate /storage/htc/joshilab/wangjue/conda_R_saucie module load cuda/cuda-10.1.243 python3 -W ignore SAUCIE_impute.py \ No newline at end of file From 992580ea323f686b2b0a16cea604b844be9e36e7 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 10:37:28 -0600 Subject: [PATCH 049/117] update saucie directory --- codesfromJGandYJ/impute/SAUCIE_impute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 3aadce7..4bcbbcd 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import pandas as pd import argparse +sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/") parser = argparse.ArgumentParser(description='Impute use SAUCIE') # In this script, not using arguments From d9d4932eb773b8ad6d316df442d01bc6154325d1 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 10:41:54 -0600 Subject: [PATCH 050/117] update saucie directory --- codesfromJGandYJ/impute/SAUCIE_impute.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 4bcbbcd..5fc479e 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -1,11 +1,12 @@ import sys +sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/") import tensorflow as tf import SAUCIE import numpy as np import matplotlib.pyplot as plt import pandas as pd import argparse -sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/") + parser = argparse.ArgumentParser(description='Impute use SAUCIE') # In this script, not using arguments From 786b79cb11c18d772fa3be697fdefa379d8c080e Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 10:45:48 -0600 Subject: [PATCH 051/117] update saucie directory --- codesfromJGandYJ/impute/SAUCIE_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 5fc479e..36cd755 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -1,5 +1,5 @@ import sys -sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/") +sys.path.append("/storage/htc/joshilab/wangjue/") import tensorflow as tf import SAUCIE import numpy as np From a9c96833f963cb99591350e83edb47b23d6d14bb Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 10:50:41 -0600 Subject: [PATCH 052/117] update saucie directory --- codesfromJGandYJ/impute/other_saucie.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh index 75ce679..01fec69 100644 --- a/codesfromJGandYJ/impute/other_saucie.sh +++ b/codesfromJGandYJ/impute/other_saucie.sh @@ -16,6 +16,7 @@ ################################################################# module load miniconda3 -source activate /storage/htc/joshilab/wangjue/conda_R_saucie +# source activate /storage/htc/joshilab/wangjue/conda_R_saucie +source activate /storage/htc/joshilab/wangjue/conda_R_gpu module load cuda/cuda-10.1.243 python3 -W ignore SAUCIE_impute.py \ No newline at end of file From c9326143f7330b0c54611afd69bf28ff59362076 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 25 Nov 2020 12:55:24 -0600 Subject: [PATCH 053/117] update saucie directory --- codesfromJGandYJ/impute/SAUCIE_impute.py | 1 - codesfromJGandYJ/impute/dca_impute.py | 1 - codesfromJGandYJ/impute/other_saucie.sh | 29 ++++++++++-------------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 36cd755..7d3f5cb 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -1,6 +1,5 @@ import sys sys.path.append("/storage/htc/joshilab/wangjue/") -import tensorflow as tf import SAUCIE import numpy as np import matplotlib.pyplot as plt diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index 95fbca7..8fd9519 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt -import torch import csv import argparse import sys diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh index 01fec69..f517112 100644 --- a/codesfromJGandYJ/impute/other_saucie.sh +++ b/codesfromJGandYJ/impute/other_saucie.sh @@ -1,22 +1,17 @@ -#!/bin/bash -#------------------------------------------------------------------------------- -# SBATCH CONFIG -#------------------------------------------------------------------------------- -## resources +#! /bin/bash +######################### Batch Headers ######################### #SBATCH -A xulab -#SBATCH --partition gpu3,gpu4 -#SBATCH --cpus-per-task=1 # cores per task -#SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) -#SBATCH --time 2-00:00 # days-hours:minutes -#SBATCH -J SAUCIE -#SBATCH --gres gpu:1 #gpu:1 any gpu -## labels and outputs -#SBATCH --job-name=modelpyenetCB-%j.out -#SBATCH --output=results-%j.out # %j is the unique jobID +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J saucie +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G ################################################################# module load miniconda3 -# source activate /storage/htc/joshilab/wangjue/conda_R_saucie -source activate /storage/htc/joshilab/wangjue/conda_R_gpu -module load cuda/cuda-10.1.243 +source activate /storage/htc/joshilab/wangjue/conda_R_saucie +# source activate /storage/htc/joshilab/wangjue/conda_R_gpu +# module load cuda/cuda-10.1.243 python3 -W ignore SAUCIE_impute.py \ No newline at end of file From c74ffbdee8e3833f09c00da69df45b202270b1a7 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 07:59:45 -0600 Subject: [PATCH 054/117] modify saucie --- codesfromJGandYJ/impute/SAUCIE_impute.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 7d3f5cb..ae7018d 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -1,6 +1,7 @@ import sys sys.path.append("/storage/htc/joshilab/wangjue/") import SAUCIE +import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import pandas as pd @@ -13,7 +14,7 @@ parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() - +# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) x = np.load(filename,allow_pickle=True) @@ -21,18 +22,17 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): x=x.todense() x=np.asarray(x) x=np.log(x+1) - x=np.transpose(x) - - saucie = SAUCIE.SAUCIE(x.shape[1]) - loadtrain = SAUCIE.Loader(x, shuffle=True) - saucie.train(loadtrain, steps=1000) - - loadeval = SAUCIE.Loader(x, shuffle=False) - reconstruction = saucie.get_reconstruction(loadeval) - + loader_train = SAUCIE.Loader(x, shuffle=True) + loader_eval = SAUCIE.Loader(x, shuffle=False) + # clear the computational graph + tf.reset_default_graph() + # build the SAUCIE model + model = SAUCIE.SAUCIE(x.shape[1]) + # train the model! + model.train(loader_train, steps=2000) + reconstruction = model.get_reconstruction(loader_eval) reconstruction=np.transpose(reconstruction) - np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction) datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] From 1fe435aac44b6f4e8e143831bd83803d57cd574e Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 09:36:03 -0600 Subject: [PATCH 055/117] add dca update --- codesfromJGandYJ/impute/dca_impute.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index 8fd9519..3adba5d 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -19,8 +19,7 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): x = np.load(filename,allow_pickle=True) x = x.tolist() x=x.todense() - x=np.asarray(x) - + # x=np.asarray(x) features=x.T #write @@ -29,9 +28,9 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): writer = csv.writer(f) writer.writerows(features) - os.system("dca "+dropout_filename+ " "+save_path+"dca_output.csv") + os.system("dca "+dropout_filename+ " "+save_path+"tmpdca") - filename=save_path+"dca_output.csv" + filename=save_path+"tmpdca/mean.tsv" imputed_values = pd.read_csv(filename,sep="\t") imputed_values=imputed_values.T From 3a12bb0a432bf60e6ef15f276e1fc19d94e6c89d Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 09:43:41 -0600 Subject: [PATCH 056/117] add dca --- codesfromJGandYJ/impute/other_dca.sh | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh index 0dad353..02f64ca 100644 --- a/codesfromJGandYJ/impute/other_dca.sh +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -1,21 +1,15 @@ -#!/bin/bash -#------------------------------------------------------------------------------- -# SBATCH CONFIG -#------------------------------------------------------------------------------- -## resources +#! /bin/bash +######################### Batch Headers ######################### #SBATCH -A xulab -#SBATCH --partition gpu3,gpu4 -#SBATCH --cpus-per-task=1 # cores per task -#SBATCH --mem-per-cpu=12G # memory per core (default is 1GB/core) -#SBATCH --time 2-00:00 # days-hours:minutes -#SBATCH -J dca -#SBATCH --gres gpu:1 #gpu:1 any gpu -## labels and outputs -#SBATCH --job-name=modelpyenetCB-%j.out -#SBATCH --output=results-%j.out # %j is the unique jobID +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J DCA +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G ################################################################# module load miniconda3 source activate /storage/htc/joshilab/wangjue/conda_R_dca -module load cuda/cuda-10.1.243 python3 -W ignore dca_impute.py From 068559dbe21602b7e89a774af64c0d74490e8fe9 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 10:32:43 -0600 Subject: [PATCH 057/117] update dca --- codesfromJGandYJ/impute/dca_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index 3adba5d..c0d62ba 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -19,7 +19,7 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): x = np.load(filename,allow_pickle=True) x = x.tolist() x=x.todense() - # x=np.asarray(x) + x=np.asarray(x) features=x.T #write From c3252d1150ecea9f8e6b409bd23d568110da91a0 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 10:33:33 -0600 Subject: [PATCH 058/117] update dca --- codesfromJGandYJ/impute/dca_impute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index c0d62ba..a9c16b2 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -14,6 +14,7 @@ save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' +# Ref: https://github.com/theislab/dca def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) x = np.load(filename,allow_pickle=True) From 87608115870e5fd63dd069384c59cae818fded74 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 15:32:48 -0600 Subject: [PATCH 059/117] add saver in impute --- codesfromJGandYJ/impute/SAUCIE_impute.py | 6 +- codesfromJGandYJ/impute/SAVER_impute.py | 81 ++++++++++++----------- codesfromJGandYJ/impute/SCIMPUTE.py | 69 ++++++++++--------- codesfromJGandYJ/impute/dca_impute.py | 9 ++- codesfromJGandYJ/impute/other_saver.sh | 14 ++++ codesfromJGandYJ/impute/other_scimpute.sh | 14 ++++ codesfromJGandYJ/impute/saver.r | 16 +++++ codesfromJGandYJ/impute/scimpute.r | 1 + 8 files changed, 133 insertions(+), 77 deletions(-) create mode 100644 codesfromJGandYJ/impute/other_saver.sh create mode 100644 codesfromJGandYJ/impute/other_scimpute.sh create mode 100644 codesfromJGandYJ/impute/saver.r create mode 100644 codesfromJGandYJ/impute/scimpute.r diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index ae7018d..07d7bdc 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -7,6 +7,11 @@ import pandas as pd import argparse +# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb +# Notes: Have to use very old tensorflow downloaded from conda: +# python==3.6.12 +# tensorflow==1.4.0 +# numpy==1.19.4 parser = argparse.ArgumentParser(description='Impute use SAUCIE') # In this script, not using arguments @@ -14,7 +19,6 @@ parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() -# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) x = np.load(filename,allow_pickle=True) diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py index 5d32405..3425dfc 100644 --- a/codesfromJGandYJ/impute/SAVER_impute.py +++ b/codesfromJGandYJ/impute/SAVER_impute.py @@ -5,51 +5,52 @@ import csv import argparse import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout +# Ref: +# https://mohuangx.github.io/SAVER/articles/saver-tutorial.html +# Use python to generate input for saver.r, then output -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') +parser = argparse.ArgumentParser(description='Impute SAVER') +# In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/', - help='output filefolder') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() -# Ref: -# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb - -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -x=np.log(x+1) -filenameFull = filename -save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saver/{}/'.format(args.data) - -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr - -features=x - - - -#write -dropout_filename = save_path+datasetNameStr+"_dropout.csv" -with open(dropout_filename, "w") as f: - writer = csv.writer(f) - writer.writerows(features) +save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' + +def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + features=x.T + + #write + dropout_filename = save_path+"saver_input.csv" + with open(dropout_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(features) + + #run the R script + os.system("Rscript saver.r "+save_path+"saver_input.csv "+save_path+"saver_output.csv ") + + filename=save_path+"saver_output.csv" + imputed_values = pd.read_csv(filename,sep="\t") + imputed_values=imputed_values.T + + np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) + +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] + +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_saver(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py index 246239d..730bba9 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE.py +++ b/codesfromJGandYJ/impute/SCIMPUTE.py @@ -5,52 +5,51 @@ import csv import argparse import sys -sys.path.append('../') -sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/') -from benchmark_util import impute_dropout +# Notes in install scimpute: +# Have to add in R: +# Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS=TRUE) +# Ref: https://github.com/Vivianstats/scImpute -parser = argparse.ArgumentParser(description='') -parser.add_argument('--data', type=str, default='data1',help='data1,2,3') +parser = argparse.ArgumentParser(description='Impute scImpute') +# In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--discreteTag', action='store_true', default=False, - help='whether input is raw or 0/1 (default: False)') -parser.add_argument('--ratio', type=str, default='0.1', - help='dropoutratio') -parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/', - help='output filefolder') +parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') args = parser.parse_args() -# Ref: -# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb +save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' -if args.discreteTag: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName) -else: - filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName) -x = np.load(filename,allow_pickle=True) -x = x.tolist() -x=x.todense() -x=np.asarray(x) -x=np.log(x+1) -filenameFull = filename -save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scimpute/{}/'.format(args.data) +def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) -discreteStr = '' -if args.discreteTag: - discreteStr = 'D' -datasetNameStr = args.datasetName+discreteStr + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + features=x.T -features=x + #write + dropout_filename = save_path+"saver_input.csv" + with open(dropout_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(features) + #run the R script + os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv") + filename=save_path+"scimpute_input.csv" + imputed_values = pd.read_csv(filename,sep="\t") + imputed_values=imputed_values.T -#write -dropout_filename = save_path+datasetNameStr+"_dropout.csv" -with open(dropout_filename, "w") as f: - writer = csv.writer(f) - writer.writerows(features) - + np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] +for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index a9c16b2..46ecc8a 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -6,6 +6,14 @@ import argparse import sys +# Ref: https://github.com/theislab/dca +# Notes: As tensorflow comes to 2.0 version, lots of things chagned, here is the version tested in Nov.26, 2020 +# python==3.7.9 +# tensorflow==1.15.4 +# keras==2.3.1 +# theano==1.0.5 +# scanpy==1.5.1 + parser = argparse.ArgumentParser(description='Imputation DCA') # In this script, not using arguments parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') @@ -14,7 +22,6 @@ save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' -# Ref: https://github.com/theislab/dca def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) x = np.load(filename,allow_pickle=True) diff --git a/codesfromJGandYJ/impute/other_saver.sh b/codesfromJGandYJ/impute/other_saver.sh new file mode 100644 index 0000000..17aa82b --- /dev/null +++ b/codesfromJGandYJ/impute/other_saver.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute +#SBATCH -J Saver +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 12 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore SAVER_impute.py diff --git a/codesfromJGandYJ/impute/other_scimpute.sh b/codesfromJGandYJ/impute/other_scimpute.sh new file mode 100644 index 0000000..8dad300 --- /dev/null +++ b/codesfromJGandYJ/impute/other_scimpute.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute +#SBATCH -J scimpute +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 12 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W ignore SCIMPUTE_impute.py \ No newline at end of file diff --git a/codesfromJGandYJ/impute/saver.r b/codesfromJGandYJ/impute/saver.r new file mode 100644 index 0000000..1b0953b --- /dev/null +++ b/codesfromJGandYJ/impute/saver.r @@ -0,0 +1,16 @@ +# Usage: +# Rscript saver.r input.txt output.txt +# test if there is one argument: if not, return an error +args = commandArgs(trailingOnly=TRUE) +if (length(args)==0) { + stop("At least one argument must be supplied (input file)\n", call.=FALSE) +} + +library(SAVER) +inputfile = args[1] +outputfile = args[2] +raw.data <- read.csv(inputfile, header = FALSE, sep=',') +expr <- as.matrix(raw.data) +# Use 12 cores in saver +expr.saver <- saver(expr, ncores = 12, estimates.only = TRUE) +write.table(expr.saver, file=outputfile, row.names = F, col.names = F, sep = "\t") \ No newline at end of file diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r new file mode 100644 index 0000000..503fa1d --- /dev/null +++ b/codesfromJGandYJ/impute/scimpute.r @@ -0,0 +1 @@ +#TODO \ No newline at end of file From 64458d4fdb4e154ad3b9c679ef987cecd5e8dfd7 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 15:50:16 -0600 Subject: [PATCH 060/117] add scimpute in imputation --- codesfromJGandYJ/impute/SAVER_impute.py | 1 + codesfromJGandYJ/impute/SCIMPUTE.py | 8 ++++---- codesfromJGandYJ/impute/scimpute.r | 22 +++++++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py index 3425dfc..05a5b2a 100644 --- a/codesfromJGandYJ/impute/SAVER_impute.py +++ b/codesfromJGandYJ/impute/SAVER_impute.py @@ -7,6 +7,7 @@ import sys # Ref: +# https://github.com/mohuangx/SAVER # https://mohuangx.github.io/SAVER/articles/saver-tutorial.html # Use python to generate input for saver.r, then output diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py index 730bba9..7f44de8 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE.py +++ b/codesfromJGandYJ/impute/SCIMPUTE.py @@ -36,13 +36,13 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): writer.writerows(features) #run the R script - os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv") + os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"/tmpscimpute/ scimpute_output.csv") - filename=save_path+"scimpute_input.csv" - imputed_values = pd.read_csv(filename,sep="\t") + filename=save_path+"/tmpscimpute/scimpute_output.csv" + imputed_values = pd.read_csv(filename,sep=",") imputed_values=imputed_values.T - np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) + np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r index 503fa1d..d4ed3f3 100644 --- a/codesfromJGandYJ/impute/scimpute.r +++ b/codesfromJGandYJ/impute/scimpute.r @@ -1 +1,21 @@ -#TODO \ No newline at end of file +# Usage: +# Rscript scImpute.r input.txt output.txt +# test if there is one argument: if not, return an error +args = commandArgs(trailingOnly=TRUE) +if (length(args)==0) { + stop("At least one argument must be supplied (input file)\n", call.=FALSE) +} + +library(scImpute) +inputfile = args[1] +outputDir = args[2] +outputfile = args[3] +scimpute(# full path to raw count matrix + count_path = inputfile, + infile = "csv", # format of input file + outfile = "csv", # format of output file + out_dir = "./", # full path to output directory + labeled = outputDir, # cell type labels not available + drop_thre = 0.5, # threshold set on dropout probability + Kcluster = 2, # 2 cell subpopulations + ncores = 12) # number of cores used in parallel computation From 583ab13e20e9ff9c5f5f77409123c2120c81a582 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 15:53:08 -0600 Subject: [PATCH 061/117] add scimpute in imputation --- codesfromJGandYJ/impute/{SCIMPUTE.py => SCIMPUTE_impute.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename codesfromJGandYJ/impute/{SCIMPUTE.py => SCIMPUTE_impute.py} (100%) diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py similarity index 100% rename from codesfromJGandYJ/impute/SCIMPUTE.py rename to codesfromJGandYJ/impute/SCIMPUTE_impute.py From 3cf5d3c8db87033cb540ee5b59eee6c8c410e4b8 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 15:59:06 -0600 Subject: [PATCH 062/117] add scimpute in imputation --- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index 7f44de8..83519b2 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -30,7 +30,7 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): features=x.T #write - dropout_filename = save_path+"saver_input.csv" + dropout_filename = save_path+"scimpute_input.csv" with open(dropout_filename, "w") as f: writer = csv.writer(f) writer.writerows(features) From 7eb9a207b16e95e5fd78aab9db9518ae5de28b1f Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 16:06:43 -0600 Subject: [PATCH 063/117] add scimpute in imputation --- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index 83519b2..87826fb 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -27,7 +27,17 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): x=x.todense() x=np.asarray(x) x=np.log(x+1) - features=x.T + + features = np.copy(x) + + #transpose and add names for rows and cols + features=np.transpose(features) + rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1]) + features=np.concatenate([rowname,features],axis=1) + colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]]) + features=np.concatenate([colname,features],axis=0) + + features=features.T #write dropout_filename = save_path+"scimpute_input.csv" From 9f064264db1878e70416db2fc153e4824b855bf6 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 17:36:55 -0600 Subject: [PATCH 064/117] add scimpute in imputation debug --- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 11 ++++++++--- codesfromJGandYJ/impute/scimpute.r | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index 87826fb..b19a8da 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -54,9 +54,14 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) -datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] -seedList = ['1','2','3'] -ratioList = [0.1, 0.3, 0.6, 0.8] +# datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +# seedList = ['1','2','3'] +# ratioList = [0.1, 0.3, 0.6, 0.8] + +# Debug +datasetNameList = ['12.Klein'] +seedList = ['1'] +ratioList = [0.1] for datasetName in datasetNameList: for seed in seedList: diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r index d4ed3f3..d7fbb16 100644 --- a/codesfromJGandYJ/impute/scimpute.r +++ b/codesfromJGandYJ/impute/scimpute.r @@ -14,8 +14,8 @@ scimpute(# full path to raw count matrix count_path = inputfile, infile = "csv", # format of input file outfile = "csv", # format of output file - out_dir = "./", # full path to output directory - labeled = outputDir, # cell type labels not available + out_dir = outputDir, # full path to output directory + labeled = FALSE, # cell type labels not available drop_thre = 0.5, # threshold set on dropout probability Kcluster = 2, # 2 cell subpopulations ncores = 12) # number of cores used in parallel computation From df944a433c8a8c985abe750d753a6fb4d39f943c Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 18:08:27 -0600 Subject: [PATCH 065/117] move not using scripts in results to new folder --- results/{ => NotForFinalUsage}/calculateROGUE.R | 0 results/{ => NotForFinalUsage}/compare_varID.py | 0 results/{ => NotForFinalUsage}/jobinfo_imp_23dropout.txt | 0 results/{ => NotForFinalUsage}/jobinfo_imp_explore.txt | 0 results/{ => NotForFinalUsage}/jobinfo_imp_louvain_2.txt | 0 results/{ => NotForFinalUsage}/results_ROGUE.py | 0 results/{ => NotForFinalUsage}/results_Reading.py | 0 results/{ => NotForFinalUsage}/results_Reading_23.py | 0 results/{ => NotForFinalUsage}/results_Reading_23dropout.py | 0 results/{ => NotForFinalUsage}/results_Reading_explore.py | 0 results/{ => NotForFinalUsage}/results_Reading_graph.py | 0 results/{ => NotForFinalUsage}/results_imputation.sh | 0 results/{ => NotForFinalUsage}/results_imputation_0.3.sh | 0 results/{ => NotForFinalUsage}/results_imputation_grid.sh | 0 results/{ => NotForFinalUsage}/results_impute.py | 0 results/{ => NotForFinalUsage}/results_impute_graph_ROC.py | 0 results/{ => NotForFinalUsage}/results_impute_graph_ROC.sh | 0 results/{ => NotForFinalUsage}/submitCluster_Result_Celltype.sh | 0 results/{ => NotForFinalUsage}/submitCluster_Result_Impute.sh | 0 results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23.sh | 0 .../submitCluster_Result_Impute_23dropout.sh | 0 .../{ => NotForFinalUsage}/submitCluster_Result_Impute_explore.sh | 0 .../{ => NotForFinalUsage}/submitCluster_Result_Impute_graph.sh | 0 results/{ => NotForFinalUsage}/summary.sh | 0 results/{ => NotForFinalUsage}/summary_cmd.py | 0 25 files changed, 0 insertions(+), 0 deletions(-) rename results/{ => NotForFinalUsage}/calculateROGUE.R (100%) rename results/{ => NotForFinalUsage}/compare_varID.py (100%) rename results/{ => NotForFinalUsage}/jobinfo_imp_23dropout.txt (100%) rename results/{ => NotForFinalUsage}/jobinfo_imp_explore.txt (100%) rename results/{ => NotForFinalUsage}/jobinfo_imp_louvain_2.txt (100%) rename results/{ => NotForFinalUsage}/results_ROGUE.py (100%) rename results/{ => NotForFinalUsage}/results_Reading.py (100%) rename results/{ => NotForFinalUsage}/results_Reading_23.py (100%) rename results/{ => NotForFinalUsage}/results_Reading_23dropout.py (100%) rename results/{ => NotForFinalUsage}/results_Reading_explore.py (100%) rename results/{ => NotForFinalUsage}/results_Reading_graph.py (100%) rename results/{ => NotForFinalUsage}/results_imputation.sh (100%) rename results/{ => NotForFinalUsage}/results_imputation_0.3.sh (100%) rename results/{ => NotForFinalUsage}/results_imputation_grid.sh (100%) rename results/{ => NotForFinalUsage}/results_impute.py (100%) rename results/{ => NotForFinalUsage}/results_impute_graph_ROC.py (100%) rename results/{ => NotForFinalUsage}/results_impute_graph_ROC.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Celltype.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23dropout.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_explore.sh (100%) rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_graph.sh (100%) rename results/{ => NotForFinalUsage}/summary.sh (100%) rename results/{ => NotForFinalUsage}/summary_cmd.py (100%) diff --git a/results/calculateROGUE.R b/results/NotForFinalUsage/calculateROGUE.R similarity index 100% rename from results/calculateROGUE.R rename to results/NotForFinalUsage/calculateROGUE.R diff --git a/results/compare_varID.py b/results/NotForFinalUsage/compare_varID.py similarity index 100% rename from results/compare_varID.py rename to results/NotForFinalUsage/compare_varID.py diff --git a/results/jobinfo_imp_23dropout.txt b/results/NotForFinalUsage/jobinfo_imp_23dropout.txt similarity index 100% rename from results/jobinfo_imp_23dropout.txt rename to results/NotForFinalUsage/jobinfo_imp_23dropout.txt diff --git a/results/jobinfo_imp_explore.txt b/results/NotForFinalUsage/jobinfo_imp_explore.txt similarity index 100% rename from results/jobinfo_imp_explore.txt rename to results/NotForFinalUsage/jobinfo_imp_explore.txt diff --git a/results/jobinfo_imp_louvain_2.txt b/results/NotForFinalUsage/jobinfo_imp_louvain_2.txt similarity index 100% rename from results/jobinfo_imp_louvain_2.txt rename to results/NotForFinalUsage/jobinfo_imp_louvain_2.txt diff --git a/results/results_ROGUE.py b/results/NotForFinalUsage/results_ROGUE.py similarity index 100% rename from results/results_ROGUE.py rename to results/NotForFinalUsage/results_ROGUE.py diff --git a/results/results_Reading.py b/results/NotForFinalUsage/results_Reading.py similarity index 100% rename from results/results_Reading.py rename to results/NotForFinalUsage/results_Reading.py diff --git a/results/results_Reading_23.py b/results/NotForFinalUsage/results_Reading_23.py similarity index 100% rename from results/results_Reading_23.py rename to results/NotForFinalUsage/results_Reading_23.py diff --git a/results/results_Reading_23dropout.py b/results/NotForFinalUsage/results_Reading_23dropout.py similarity index 100% rename from results/results_Reading_23dropout.py rename to results/NotForFinalUsage/results_Reading_23dropout.py diff --git a/results/results_Reading_explore.py b/results/NotForFinalUsage/results_Reading_explore.py similarity index 100% rename from results/results_Reading_explore.py rename to results/NotForFinalUsage/results_Reading_explore.py diff --git a/results/results_Reading_graph.py b/results/NotForFinalUsage/results_Reading_graph.py similarity index 100% rename from results/results_Reading_graph.py rename to results/NotForFinalUsage/results_Reading_graph.py diff --git a/results/results_imputation.sh b/results/NotForFinalUsage/results_imputation.sh similarity index 100% rename from results/results_imputation.sh rename to results/NotForFinalUsage/results_imputation.sh diff --git a/results/results_imputation_0.3.sh b/results/NotForFinalUsage/results_imputation_0.3.sh similarity index 100% rename from results/results_imputation_0.3.sh rename to results/NotForFinalUsage/results_imputation_0.3.sh diff --git a/results/results_imputation_grid.sh b/results/NotForFinalUsage/results_imputation_grid.sh similarity index 100% rename from results/results_imputation_grid.sh rename to results/NotForFinalUsage/results_imputation_grid.sh diff --git a/results/results_impute.py b/results/NotForFinalUsage/results_impute.py similarity index 100% rename from results/results_impute.py rename to results/NotForFinalUsage/results_impute.py diff --git a/results/results_impute_graph_ROC.py b/results/NotForFinalUsage/results_impute_graph_ROC.py similarity index 100% rename from results/results_impute_graph_ROC.py rename to results/NotForFinalUsage/results_impute_graph_ROC.py diff --git a/results/results_impute_graph_ROC.sh b/results/NotForFinalUsage/results_impute_graph_ROC.sh similarity index 100% rename from results/results_impute_graph_ROC.sh rename to results/NotForFinalUsage/results_impute_graph_ROC.sh diff --git a/results/submitCluster_Result_Celltype.sh b/results/NotForFinalUsage/submitCluster_Result_Celltype.sh similarity index 100% rename from results/submitCluster_Result_Celltype.sh rename to results/NotForFinalUsage/submitCluster_Result_Celltype.sh diff --git a/results/submitCluster_Result_Impute.sh b/results/NotForFinalUsage/submitCluster_Result_Impute.sh similarity index 100% rename from results/submitCluster_Result_Impute.sh rename to results/NotForFinalUsage/submitCluster_Result_Impute.sh diff --git a/results/submitCluster_Result_Impute_23.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_23.sh similarity index 100% rename from results/submitCluster_Result_Impute_23.sh rename to results/NotForFinalUsage/submitCluster_Result_Impute_23.sh diff --git a/results/submitCluster_Result_Impute_23dropout.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh similarity index 100% rename from results/submitCluster_Result_Impute_23dropout.sh rename to results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh diff --git a/results/submitCluster_Result_Impute_explore.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh similarity index 100% rename from results/submitCluster_Result_Impute_explore.sh rename to results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh diff --git a/results/submitCluster_Result_Impute_graph.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh similarity index 100% rename from results/submitCluster_Result_Impute_graph.sh rename to results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh diff --git a/results/summary.sh b/results/NotForFinalUsage/summary.sh similarity index 100% rename from results/summary.sh rename to results/NotForFinalUsage/summary.sh diff --git a/results/summary_cmd.py b/results/NotForFinalUsage/summary_cmd.py similarity index 100% rename from results/summary_cmd.py rename to results/NotForFinalUsage/summary_cmd.py From 4db2a4eb107c0f0a2de5d4d0eb85defdcf8aa7ab Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 19:14:45 -0600 Subject: [PATCH 066/117] reorganize old codes --- {otherresults => bak/otherresults}/BAK_MAGIC.py | 0 {otherresults => bak/otherresults}/MAGIC_analysis.sh | 0 .../otherresults}/MAGIC_analysis_usage.sh | 0 {otherresults => bak/otherresults}/MAGIC_impute.py | 0 {otherresults => bak/otherresults}/MAGIC_impute_usage.py | 0 .../otherresults}/Other_Results_Evaluation.sh | 0 .../otherresults}/Other_results_Reading.py | 0 .../otherresults}/Other_results_celltype.py | 0 .../otherresults}/Other_results_impute.py | 0 {otherresults => bak/otherresults}/README.md | 0 {otherresults => bak/otherresults}/SAUCIE_analysis.sh | 0 {otherresults => bak/otherresults}/SAUCIE_celltype.py | 0 {otherresults => bak/otherresults}/SAUCIE_impute.py | 0 {otherresults => bak/otherresults}/SAVER_impute.R | 0 {otherresults => bak/otherresults}/SCIMPUTE_impute.R | 0 {otherresults => bak/otherresults}/dca_impute.py | 0 {otherresults => bak/otherresults}/scVi_impute.py | 0 .../otherresults}/simulation_generator.R | 0 .../NotForFinalUsage => bak/results}/calculateROGUE.R | 0 .../NotForFinalUsage => bak/results}/compare_varID.py | 0 .../results}/jobinfo_imp_23dropout.txt | 0 .../results}/jobinfo_imp_explore.txt | 0 .../results}/jobinfo_imp_louvain_2.txt | 0 .../NotForFinalUsage => bak/results}/results_ROGUE.py | 0 .../NotForFinalUsage => bak/results}/results_Reading.py | 0 .../results}/results_Reading_23.py | 0 .../results}/results_Reading_23dropout.py | 0 .../results}/results_Reading_explore.py | 0 .../results}/results_Reading_graph.py | 0 .../results}/results_imputation.sh | 0 .../results}/results_imputation_0.3.sh | 0 .../results}/results_imputation_grid.sh | 0 .../NotForFinalUsage => bak/results}/results_impute.py | 0 .../results}/results_impute_graph_ROC.py | 0 .../results}/results_impute_graph_ROC.sh | 0 .../results}/submitCluster_Result_Celltype.sh | 0 .../results}/submitCluster_Result_Impute.sh | 0 .../results}/submitCluster_Result_Impute_23.sh | 0 .../results}/submitCluster_Result_Impute_23dropout.sh | 0 .../results}/submitCluster_Result_Impute_explore.sh | 0 .../results}/submitCluster_Result_Impute_graph.sh | 0 {results/NotForFinalUsage => bak/results}/summary.sh | 0 {results/NotForFinalUsage => bak/results}/summary_cmd.py | 0 results/results_Reading_recheck.py | 9 +++++++-- results/submitCluster_Result_Impute_recheck.sh | 4 ++-- 45 files changed, 9 insertions(+), 4 deletions(-) rename {otherresults => bak/otherresults}/BAK_MAGIC.py (100%) rename {otherresults => bak/otherresults}/MAGIC_analysis.sh (100%) rename {otherresults => bak/otherresults}/MAGIC_analysis_usage.sh (100%) rename {otherresults => bak/otherresults}/MAGIC_impute.py (100%) rename {otherresults => bak/otherresults}/MAGIC_impute_usage.py (100%) rename {otherresults => bak/otherresults}/Other_Results_Evaluation.sh (100%) rename {otherresults => bak/otherresults}/Other_results_Reading.py (100%) rename {otherresults => bak/otherresults}/Other_results_celltype.py (100%) rename {otherresults => bak/otherresults}/Other_results_impute.py (100%) rename {otherresults => bak/otherresults}/README.md (100%) rename {otherresults => bak/otherresults}/SAUCIE_analysis.sh (100%) rename {otherresults => bak/otherresults}/SAUCIE_celltype.py (100%) rename {otherresults => bak/otherresults}/SAUCIE_impute.py (100%) rename {otherresults => bak/otherresults}/SAVER_impute.R (100%) rename {otherresults => bak/otherresults}/SCIMPUTE_impute.R (100%) rename {otherresults => bak/otherresults}/dca_impute.py (100%) rename {otherresults => bak/otherresults}/scVi_impute.py (100%) rename {otherresults => bak/otherresults}/simulation_generator.R (100%) rename {results/NotForFinalUsage => bak/results}/calculateROGUE.R (100%) rename {results/NotForFinalUsage => bak/results}/compare_varID.py (100%) rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_23dropout.txt (100%) rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_explore.txt (100%) rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_louvain_2.txt (100%) rename {results/NotForFinalUsage => bak/results}/results_ROGUE.py (100%) rename {results/NotForFinalUsage => bak/results}/results_Reading.py (100%) rename {results/NotForFinalUsage => bak/results}/results_Reading_23.py (100%) rename {results/NotForFinalUsage => bak/results}/results_Reading_23dropout.py (100%) rename {results/NotForFinalUsage => bak/results}/results_Reading_explore.py (100%) rename {results/NotForFinalUsage => bak/results}/results_Reading_graph.py (100%) rename {results/NotForFinalUsage => bak/results}/results_imputation.sh (100%) rename {results/NotForFinalUsage => bak/results}/results_imputation_0.3.sh (100%) rename {results/NotForFinalUsage => bak/results}/results_imputation_grid.sh (100%) rename {results/NotForFinalUsage => bak/results}/results_impute.py (100%) rename {results/NotForFinalUsage => bak/results}/results_impute_graph_ROC.py (100%) rename {results/NotForFinalUsage => bak/results}/results_impute_graph_ROC.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Celltype.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_23.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_23dropout.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_explore.sh (100%) rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_graph.sh (100%) rename {results/NotForFinalUsage => bak/results}/summary.sh (100%) rename {results/NotForFinalUsage => bak/results}/summary_cmd.py (100%) diff --git a/otherresults/BAK_MAGIC.py b/bak/otherresults/BAK_MAGIC.py similarity index 100% rename from otherresults/BAK_MAGIC.py rename to bak/otherresults/BAK_MAGIC.py diff --git a/otherresults/MAGIC_analysis.sh b/bak/otherresults/MAGIC_analysis.sh similarity index 100% rename from otherresults/MAGIC_analysis.sh rename to bak/otherresults/MAGIC_analysis.sh diff --git a/otherresults/MAGIC_analysis_usage.sh b/bak/otherresults/MAGIC_analysis_usage.sh similarity index 100% rename from otherresults/MAGIC_analysis_usage.sh rename to bak/otherresults/MAGIC_analysis_usage.sh diff --git a/otherresults/MAGIC_impute.py b/bak/otherresults/MAGIC_impute.py similarity index 100% rename from otherresults/MAGIC_impute.py rename to bak/otherresults/MAGIC_impute.py diff --git a/otherresults/MAGIC_impute_usage.py b/bak/otherresults/MAGIC_impute_usage.py similarity index 100% rename from otherresults/MAGIC_impute_usage.py rename to bak/otherresults/MAGIC_impute_usage.py diff --git a/otherresults/Other_Results_Evaluation.sh b/bak/otherresults/Other_Results_Evaluation.sh similarity index 100% rename from otherresults/Other_Results_Evaluation.sh rename to bak/otherresults/Other_Results_Evaluation.sh diff --git a/otherresults/Other_results_Reading.py b/bak/otherresults/Other_results_Reading.py similarity index 100% rename from otherresults/Other_results_Reading.py rename to bak/otherresults/Other_results_Reading.py diff --git a/otherresults/Other_results_celltype.py b/bak/otherresults/Other_results_celltype.py similarity index 100% rename from otherresults/Other_results_celltype.py rename to bak/otherresults/Other_results_celltype.py diff --git a/otherresults/Other_results_impute.py b/bak/otherresults/Other_results_impute.py similarity index 100% rename from otherresults/Other_results_impute.py rename to bak/otherresults/Other_results_impute.py diff --git a/otherresults/README.md b/bak/otherresults/README.md similarity index 100% rename from otherresults/README.md rename to bak/otherresults/README.md diff --git a/otherresults/SAUCIE_analysis.sh b/bak/otherresults/SAUCIE_analysis.sh similarity index 100% rename from otherresults/SAUCIE_analysis.sh rename to bak/otherresults/SAUCIE_analysis.sh diff --git a/otherresults/SAUCIE_celltype.py b/bak/otherresults/SAUCIE_celltype.py similarity index 100% rename from otherresults/SAUCIE_celltype.py rename to bak/otherresults/SAUCIE_celltype.py diff --git a/otherresults/SAUCIE_impute.py b/bak/otherresults/SAUCIE_impute.py similarity index 100% rename from otherresults/SAUCIE_impute.py rename to bak/otherresults/SAUCIE_impute.py diff --git a/otherresults/SAVER_impute.R b/bak/otherresults/SAVER_impute.R similarity index 100% rename from otherresults/SAVER_impute.R rename to bak/otherresults/SAVER_impute.R diff --git a/otherresults/SCIMPUTE_impute.R b/bak/otherresults/SCIMPUTE_impute.R similarity index 100% rename from otherresults/SCIMPUTE_impute.R rename to bak/otherresults/SCIMPUTE_impute.R diff --git a/otherresults/dca_impute.py b/bak/otherresults/dca_impute.py similarity index 100% rename from otherresults/dca_impute.py rename to bak/otherresults/dca_impute.py diff --git a/otherresults/scVi_impute.py b/bak/otherresults/scVi_impute.py similarity index 100% rename from otherresults/scVi_impute.py rename to bak/otherresults/scVi_impute.py diff --git a/otherresults/simulation_generator.R b/bak/otherresults/simulation_generator.R similarity index 100% rename from otherresults/simulation_generator.R rename to bak/otherresults/simulation_generator.R diff --git a/results/NotForFinalUsage/calculateROGUE.R b/bak/results/calculateROGUE.R similarity index 100% rename from results/NotForFinalUsage/calculateROGUE.R rename to bak/results/calculateROGUE.R diff --git a/results/NotForFinalUsage/compare_varID.py b/bak/results/compare_varID.py similarity index 100% rename from results/NotForFinalUsage/compare_varID.py rename to bak/results/compare_varID.py diff --git a/results/NotForFinalUsage/jobinfo_imp_23dropout.txt b/bak/results/jobinfo_imp_23dropout.txt similarity index 100% rename from results/NotForFinalUsage/jobinfo_imp_23dropout.txt rename to bak/results/jobinfo_imp_23dropout.txt diff --git a/results/NotForFinalUsage/jobinfo_imp_explore.txt b/bak/results/jobinfo_imp_explore.txt similarity index 100% rename from results/NotForFinalUsage/jobinfo_imp_explore.txt rename to bak/results/jobinfo_imp_explore.txt diff --git a/results/NotForFinalUsage/jobinfo_imp_louvain_2.txt b/bak/results/jobinfo_imp_louvain_2.txt similarity index 100% rename from results/NotForFinalUsage/jobinfo_imp_louvain_2.txt rename to bak/results/jobinfo_imp_louvain_2.txt diff --git a/results/NotForFinalUsage/results_ROGUE.py b/bak/results/results_ROGUE.py similarity index 100% rename from results/NotForFinalUsage/results_ROGUE.py rename to bak/results/results_ROGUE.py diff --git a/results/NotForFinalUsage/results_Reading.py b/bak/results/results_Reading.py similarity index 100% rename from results/NotForFinalUsage/results_Reading.py rename to bak/results/results_Reading.py diff --git a/results/NotForFinalUsage/results_Reading_23.py b/bak/results/results_Reading_23.py similarity index 100% rename from results/NotForFinalUsage/results_Reading_23.py rename to bak/results/results_Reading_23.py diff --git a/results/NotForFinalUsage/results_Reading_23dropout.py b/bak/results/results_Reading_23dropout.py similarity index 100% rename from results/NotForFinalUsage/results_Reading_23dropout.py rename to bak/results/results_Reading_23dropout.py diff --git a/results/NotForFinalUsage/results_Reading_explore.py b/bak/results/results_Reading_explore.py similarity index 100% rename from results/NotForFinalUsage/results_Reading_explore.py rename to bak/results/results_Reading_explore.py diff --git a/results/NotForFinalUsage/results_Reading_graph.py b/bak/results/results_Reading_graph.py similarity index 100% rename from results/NotForFinalUsage/results_Reading_graph.py rename to bak/results/results_Reading_graph.py diff --git a/results/NotForFinalUsage/results_imputation.sh b/bak/results/results_imputation.sh similarity index 100% rename from results/NotForFinalUsage/results_imputation.sh rename to bak/results/results_imputation.sh diff --git a/results/NotForFinalUsage/results_imputation_0.3.sh b/bak/results/results_imputation_0.3.sh similarity index 100% rename from results/NotForFinalUsage/results_imputation_0.3.sh rename to bak/results/results_imputation_0.3.sh diff --git a/results/NotForFinalUsage/results_imputation_grid.sh b/bak/results/results_imputation_grid.sh similarity index 100% rename from results/NotForFinalUsage/results_imputation_grid.sh rename to bak/results/results_imputation_grid.sh diff --git a/results/NotForFinalUsage/results_impute.py b/bak/results/results_impute.py similarity index 100% rename from results/NotForFinalUsage/results_impute.py rename to bak/results/results_impute.py diff --git a/results/NotForFinalUsage/results_impute_graph_ROC.py b/bak/results/results_impute_graph_ROC.py similarity index 100% rename from results/NotForFinalUsage/results_impute_graph_ROC.py rename to bak/results/results_impute_graph_ROC.py diff --git a/results/NotForFinalUsage/results_impute_graph_ROC.sh b/bak/results/results_impute_graph_ROC.sh similarity index 100% rename from results/NotForFinalUsage/results_impute_graph_ROC.sh rename to bak/results/results_impute_graph_ROC.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Celltype.sh b/bak/results/submitCluster_Result_Celltype.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Celltype.sh rename to bak/results/submitCluster_Result_Celltype.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute.sh b/bak/results/submitCluster_Result_Impute.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Impute.sh rename to bak/results/submitCluster_Result_Impute.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_23.sh b/bak/results/submitCluster_Result_Impute_23.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Impute_23.sh rename to bak/results/submitCluster_Result_Impute_23.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh b/bak/results/submitCluster_Result_Impute_23dropout.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh rename to bak/results/submitCluster_Result_Impute_23dropout.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh b/bak/results/submitCluster_Result_Impute_explore.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh rename to bak/results/submitCluster_Result_Impute_explore.sh diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh b/bak/results/submitCluster_Result_Impute_graph.sh similarity index 100% rename from results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh rename to bak/results/submitCluster_Result_Impute_graph.sh diff --git a/results/NotForFinalUsage/summary.sh b/bak/results/summary.sh similarity index 100% rename from results/NotForFinalUsage/summary.sh rename to bak/results/summary.sh diff --git a/results/NotForFinalUsage/summary_cmd.py b/bak/results/summary_cmd.py similarity index 100% rename from results/NotForFinalUsage/summary_cmd.py rename to bak/results/summary_cmd.py diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py index 124743c..e64b509 100644 --- a/results/results_Reading_recheck.py +++ b/results/results_Reading_recheck.py @@ -12,7 +12,12 @@ help="method used: 1-13") args = parser.parse_args() -# Note: +# New notes: +# We used this in paper revision, will generate lots of .sh files. +# This file is called by submitCluster_Result_Impute_recheck.sh, and only check .out files. +# The results can be get by cat *.out + +# Old Note: # Generate results in python other than in shell for better organization # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel @@ -21,7 +26,7 @@ if args.splitMode: #The split of batch, more batches, more parallel - if args.batchStr == 8: + if args.batchStr == 9: datasetList = [ '9.Chung', # '9.Chung --discreteTag' diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index ba68356..f0d494e 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -1,13 +1,13 @@ # for i in {0..59} # do -# for j in {8,11,12,13} +# for j in {9,11,12,13} # do # python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh # done # done # submit -for j in {8,11,12,13} +for j in {9,11,12,13} do for i in {0..59} do From 5e4cedf078034a99c42cd4dd507596f713f98fe2 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 20:07:55 -0600 Subject: [PATCH 067/117] debugscimpute --- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 8 ++++---- codesfromJGandYJ/impute/scimpute.r | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index b19a8da..8e2480d 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -46,11 +46,11 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): writer.writerows(features) #run the R script - os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"/tmpscimpute/ scimpute_output.csv") + os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"tmpscimpute/") - filename=save_path+"/tmpscimpute/scimpute_output.csv" - imputed_values = pd.read_csv(filename,sep=",") - imputed_values=imputed_values.T + filename=save_path+"tmpscimpute/scimpute_count.csv" + imputed_values = pd.read_csv(filename,sep=",",index_col=0) + imputed_values = imputed_values.to_numpy() np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r index d7fbb16..ec91006 100644 --- a/codesfromJGandYJ/impute/scimpute.r +++ b/codesfromJGandYJ/impute/scimpute.r @@ -1,5 +1,5 @@ # Usage: -# Rscript scImpute.r input.txt output.txt +# Rscript scImpute.r input.txt outputdir # test if there is one argument: if not, return an error args = commandArgs(trailingOnly=TRUE) if (length(args)==0) { @@ -9,7 +9,6 @@ if (length(args)==0) { library(scImpute) inputfile = args[1] outputDir = args[2] -outputfile = args[3] scimpute(# full path to raw count matrix count_path = inputfile, infile = "csv", # format of input file From ab1d2c80ea2a7b0275b1a72f172a425dcebbd603 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 20:53:39 -0600 Subject: [PATCH 068/117] fix saver issue --- codesfromJGandYJ/impute/SAVER_impute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py index 05a5b2a..eca2323 100644 --- a/codesfromJGandYJ/impute/SAVER_impute.py +++ b/codesfromJGandYJ/impute/SAVER_impute.py @@ -39,7 +39,7 @@ def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1): os.system("Rscript saver.r "+save_path+"saver_input.csv "+save_path+"saver_output.csv ") filename=save_path+"saver_output.csv" - imputed_values = pd.read_csv(filename,sep="\t") + imputed_values = pd.read_csv(filename,sep="\t",header=None) imputed_values=imputed_values.T np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) From dec349ffcd6a36fba65e4c05f37f4d22c33869f2 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 21:44:53 -0600 Subject: [PATCH 069/117] scimpute for all possible scenarios --- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index 8e2480d..9d8649f 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -54,14 +54,9 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) -# datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] -# seedList = ['1','2','3'] -# ratioList = [0.1, 0.3, 0.6, 0.8] - -# Debug -datasetNameList = ['12.Klein'] -seedList = ['1'] -ratioList = [0.1] +datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] +seedList = ['1','2','3'] +ratioList = [0.1, 0.3, 0.6, 0.8] for datasetName in datasetNameList: for seed in seedList: From 9c78428ec3573852d9f2ed75537e4314403988d4 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 23:27:48 -0600 Subject: [PATCH 070/117] imputation on other results --- benchmark_util.py | 4 +- results/results_impute_others_all.py | 59 ++++++++++++++++++++++++++++ results/submit_Impute_others.sh | 14 +++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 results/results_impute_others_all.py create mode 100644 results/submit_Impute_others.sh diff --git a/benchmark_util.py b/benchmark_util.py index f85f128..6088885 100644 --- a/benchmark_util.py +++ b/benchmark_util.py @@ -530,6 +530,7 @@ def imputation_error(X_mean, X, X_zero, i, j, ix): all_index = i[ix], j[ix] x, y = X_mean[all_index], X[all_index] result = np.abs(x - y) + rmse = ((x - y)**2/len(result))**0.5 # If the input is a sparse matrix else: all_index = i[ix], j[ix] @@ -538,8 +539,9 @@ def imputation_error(X_mean, X, X_zero, i, j, ix): yuse = scipy.sparse.lil_matrix.todense(y) yuse = np.asarray(yuse).reshape(-1) result = np.abs(x - yuse) + rmse = ((x - yuse)**2/len(result))**0.5 # return np.median(np.abs(x - yuse)) - return np.mean(result), np.median(result), np.min(result), np.max(result) + return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse) # IMPUTATION METRICS diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py new file mode 100644 index 0000000..4bd8f6f --- /dev/null +++ b/results/results_impute_others_all.py @@ -0,0 +1,59 @@ +import os +import numpy as np +import pandas as pd +import argparse +import scipy.sparse +import sys +sys.path.append('../') +from util_function import * +from benchmark_util import * +from R_util import generateLouvainCluster +from sklearn.cluster import KMeans +import argparse +parser = argparse.ArgumentParser(description='Read Results in different methods') +args = parser.parse_args() + +# Notes: +# Call by submit_Impute_others.sh + + +datasetList = [ + '9.Chung', + '11.Kolodziejczyk', + '12.Klein', + '13.Zeisel', +] + +oridirStr = '../npyImputeG2E' +medirStr = '../' + +seedList = ['1','2','3'] +ratioList = [0.1,0.3,0.6,0.8] +methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute'] + +def outResults(datasetName,seed,ratio,method): + featuresOriginal = load_data(datasetName, discreteTag=False) + + features = None + dropi = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropi.npy') + dropj = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropj.npy') + dropix = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropix.npy') + + # scGNN results + # featuresImpute = np.load(npyDir+datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy') + featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') + + if method=='dca' or method=='deepimpute': + l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) + cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) + else: + l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) + cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) + print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse)) + + +for method in methodList: + for datasetName in datasetList: + for seed in seedList: + for ratio in ratioList: + outResults(datasetName=datasetName, seed=seed, ratio=ratio, method=method) \ No newline at end of file diff --git a/results/submit_Impute_others.sh b/results/submit_Impute_others.sh new file mode 100644 index 0000000..38d0534 --- /dev/null +++ b/results/submit_Impute_others.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J OthersResults +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python3 -W results_impute_others_all.py \ No newline at end of file From 1e30f6ff127648580d8d72fe56aef99ce02f0f06 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 23:32:41 -0600 Subject: [PATCH 071/117] imputation on other results --- results/submit_Impute_others.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/submit_Impute_others.sh b/results/submit_Impute_others.sh index 38d0534..55e89f4 100644 --- a/results/submit_Impute_others.sh +++ b/results/submit_Impute_others.sh @@ -11,4 +11,4 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W results_impute_others_all.py \ No newline at end of file +python3 -W ignore results_impute_others_all.py \ No newline at end of file From 9dcf7062452c81b292cce23b0c67ddda2eb6c913 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 26 Nov 2020 23:37:54 -0600 Subject: [PATCH 072/117] imputation on other results --- results/results_impute_others_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index 4bd8f6f..067e0aa 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -28,7 +28,7 @@ medirStr = '../' seedList = ['1','2','3'] -ratioList = [0.1,0.3,0.6,0.8] +ratioList = ['0.1','0.3','0.6','0.8'] methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute'] def outResults(datasetName,seed,ratio,method): From d170da2d7d0fbed399da1b9d41bc492899a45ea9 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 27 Nov 2020 07:03:53 -0600 Subject: [PATCH 073/117] fix a log error in imputation of scGNN, rerun --- benchmark_util.py | 2 +- results/results_impute_graph.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark_util.py b/benchmark_util.py index 6088885..d2fc1ba 100644 --- a/benchmark_util.py +++ b/benchmark_util.py @@ -577,7 +577,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix): # return np.median(np.abs(x - yuse)) return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse) -# cosine similarity +# cosine similarity with log def imputation_cosine_log(X_mean, X, X_zero, i, j, ix): """ X_mean: imputed dataset diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py index b964534..3f33f3c 100644 --- a/results/results_impute_graph.py +++ b/results/results_impute_graph.py @@ -74,7 +74,7 @@ # featuresImpute = featuresImpute.to_numpy() l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) -cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) +cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse), end='') def imputeResult(inputData): From 5fa98224abc05058b3a2f4b2063ff6c5761f55b3 Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 27 Nov 2020 07:18:41 -0600 Subject: [PATCH 074/117] update sbatch infor --- results/submitCluster_Result_Impute_recheck.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh index f0d494e..157350a 100644 --- a/results/submitCluster_Result_Impute_recheck.sh +++ b/results/submitCluster_Result_Impute_recheck.sh @@ -1,10 +1,10 @@ -# for i in {0..59} -# do -# for j in {9,11,12,13} -# do -# python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh -# done -# done +for i in {0..59} +do +for j in {9,11,12,13} +do +python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh +done +done # submit for j in {9,11,12,13} From 58808fe9f9058f00275904a63cca765f47857384 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 07:30:22 -0600 Subject: [PATCH 075/117] Partly add scNMF and scGAN --- results/results_impute_others_all.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index 067e0aa..fe54037 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -29,7 +29,7 @@ seedList = ['1','2','3'] ratioList = ['0.1','0.3','0.6','0.8'] -methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute'] +methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc'] def outResults(datasetName,seed,ratio,method): featuresOriginal = load_data(datasetName, discreteTag=False) @@ -41,11 +41,22 @@ def outResults(datasetName,seed,ratio,method): # scGNN results # featuresImpute = np.load(npyDir+datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy') - featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') + if method == 'scvinorm': + featuresImpute = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy') + # not using now + elif method == 'scIGANs': + featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') + elif method == 'netNMFsc': + featuresImpute = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy') + featruesImpute = featruesImpute.T + else: + featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') + # No log if method=='dca' or method=='deepimpute': l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) + # log else: l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix) From 49787b6d96d2f6533ca99bf3456cfa0d2af846e7 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 12:44:06 -0600 Subject: [PATCH 076/117] update a new version of main_benchmark with timer and mem infor --- main_benchmark_timer.py | 738 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 main_benchmark_timer.py diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py new file mode 100644 index 0000000..6de83ef --- /dev/null +++ b/main_benchmark_timer.py @@ -0,0 +1,738 @@ +import time +import resource +import datetime +import argparse +import sys +import numpy as np +import pickle as pkl +import networkx as nx +import scipy.sparse as sp +import torch +from torch.utils.data import Dataset, DataLoader +from torch import nn, optim +from torch.nn import functional as F +from sklearn.decomposition import PCA +from sklearn.metrics import silhouette_samples, silhouette_score +from sklearn.cluster import KMeans,SpectralClustering,AffinityPropagation,AgglomerativeClustering,Birch,DBSCAN,FeatureAgglomeration,MeanShift,OPTICS +from model import AE, VAE, VAE2d +from util_function import * +from graph_function import * +from benchmark_util import * +from gae_embedding import GAEembedding,measure_clustering_results,test_clustering_benchmark_results +from LTMG_R import * +import pandas as pd + +# Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script. +parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem') +parser.add_argument('--datasetName', type=str, default='1.Biase', + help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel') +parser.add_argument('--batch-size', type=int, default=12800, metavar='N', + help='input batch size for training (default: 12800)') +parser.add_argument('--epochs', type=int, default=500, metavar='N', + help='number of epochs to train in Regulatory Autoencoder (default: 500)') +parser.add_argument('--EM-epochs', type=int, default=200, metavar='N', + help='number of epochs to train in iteration EM (default: 200)') +parser.add_argument('--EM-iteration', type=int, default=10, metavar='N', + help='number of epochs in EM iteration (default: 10)') +parser.add_argument('--EMtype', type=str, default='EM', + help='EM process type (default: celltypeEM) or EM') +parser.add_argument('--alpha', type=float, default=0.5, + help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1') +parser.add_argument('--converge-type', type=str, default='celltype', + help='type of converge: celltype/graph/both/either (default: celltype) ') +parser.add_argument('--converge-graphratio', type=float, default=0.01, + help='ratio of cell type change in EM iteration (default: 0.01), 0-1') +parser.add_argument('--converge-celltyperatio', type=float, default=0.95, + help='ratio of cell type change in EM iteration (default: 0.99), 0-1') +parser.add_argument('--cluster-epochs', type=int, default=200, metavar='N', + help='number of epochs in cluster autoencoder training (default: 200)') +parser.add_argument('--no-cuda', action='store_true', default=True, + help='enables CUDA training') +parser.add_argument('--seed', type=int, default=1, metavar='S', + help='random seed (default: 1)') +parser.add_argument('--regulized-type', type=str, default='LTMG', + help='regulized type (default: LTMG) in EM, otherwise: noregu/LTMG/LTMG01') +parser.add_argument('--reduction', type=str, default='sum', + help='reduction type: mean/sum, default(sum)') +parser.add_argument('--model', type=str, default='AE', + help='VAE/AE (default: AE)') +parser.add_argument('--gammaPara', type=float, default=0.1, + help='regulized parameter (default: 0.1)') +parser.add_argument('--alphaRegularizePara', type=float, default=0.9, + help='regulized parameter (default: 0.9)') + +# imputation related +parser.add_argument('--EMregulized-type', type=str, default='Celltype', + help='regulized type (default: noregu) in EM, otherwise: noregu/Graph/GraphR/Celltype/CelltypeR') +# parser.add_argument('--adjtype', type=str, default='unweighted', +# help='adjtype (default: weighted) otherwise: unweighted') +# parser.add_argument('--aePara', type=str, default='start', +# help='whether use parameter of first feature autoencoder: start/end/cont') +parser.add_argument('--gammaImputePara', type=float, default=0.0, + help='regulized parameter (default: 0.0)') +parser.add_argument('--graphImputePara', type=float, default=0.3, + help='graph parameter (default: 0.3)') +parser.add_argument('--celltypeImputePara', type=float, default=0.1, + help='celltype parameter (default: 0.1)') +parser.add_argument('--L1Para', type=float, default=1.0, + help='L1 regulized parameter (default: 0.001)') +parser.add_argument('--L2Para', type=float, default=0.0, + help='L2 regulized parameter (default: 0.001)') +parser.add_argument('--EMreguTag', action='store_true', default=False, + help='whether regu in EM process') +parser.add_argument('--discreteTag', action='store_true', default=False, + help='whether input is raw or 0/1 (default: False)') +#Build cell graph +parser.add_argument('--k', type=int, default=10, + help='parameter k in KNN graph (default: 10)') +parser.add_argument('--knn-distance', type=str, default='euclidean', + help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)') +parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread', + help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)') +parser.add_argument('--zerofillFlag', action='store_true', default=False, + help='fill zero or not before EM process (default: False)') + +#Debug related +parser.add_argument('--precisionModel', type=str, default='Float', + help='Single Precision/Double precision: Float/Double (default:Float)') +parser.add_argument('--coresUsage', type=str, default='1', + help='how many cores used: all/1/... (default:1)') +parser.add_argument('--npyDir', type=str, default='npyGraphTest/', + help='save npy results in directory') +parser.add_argument('--log-interval', type=int, default=100, metavar='N', + help='how many batches to wait before logging training status') +parser.add_argument('--saveinternal', action='store_true', default=False, + help='whether save internal interation results or not') +parser.add_argument('--debuginfo', action='store_true', default=False, + help='whether output debuginfo in cpu time and memory info') + +#LTMG related +parser.add_argument('--inferLTMGTag', action='store_true', default=False, + help='Whether infer LTMG') +parser.add_argument('--LTMGDir', type=str, default='/home/jwang/data/scData/', + help='directory of LTMGDir, default:(/home/wangjue/biodata/scData/allBench/)') +parser.add_argument('--expressionFile', type=str, default='Biase_expression.csv', + help='expression File in csv') +parser.add_argument('--ltmgFile', type=str, default='ltmg.csv', + help='expression File in csv') + +#Clustering related +parser.add_argument('--useGAEembedding', action='store_true', default=False, + help='whether use GAE embedding for clustering(default: False)') +parser.add_argument('--useBothembedding', action='store_true', default=False, + help='whether use both embedding and Graph embedding for clustering(default: False)') +parser.add_argument('--n-clusters', default=20, type=int, help='number of clusters if predifined for KMeans/Birch ') +parser.add_argument('--clustering-method', type=str, default='LouvainK', + help='Clustering method: Louvain/KMeans/SpectralClustering/AffinityPropagation/AgglomerativeClustering/AgglomerativeClusteringK/Birch/BirchN/MeanShift/OPTICS/LouvainK/LouvainB') +parser.add_argument('--maxClusterNumber', type=int, default=30, + help='max cluster for celltypeEM without setting number of clusters (default: 30)') +parser.add_argument('--minMemberinCluster', type=int, default=5, + help='max cluster for celltypeEM without setting number of clusters (default: 100)') +parser.add_argument('--resolution', type=str, default='auto', + help='the number of resolution on Louvain (default: auto/0.5/0.8)') + + +#Benchmark related +parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv', + help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)') + +#Aggrelated +parser.add_argument('--linkage', type=str, default='ward', + help='linkage should be: ward, average, complete, single') + +#GAE related +parser.add_argument('--GAEmodel', type=str, default='gcn_vae', help="models used") +parser.add_argument('--GAEepochs', type=int, default=200, help='Number of epochs to train.') +parser.add_argument('--GAEhidden1', type=int, default=32, help='Number of units in hidden layer 1.') +parser.add_argument('--GAEhidden2', type=int, default=16, help='Number of units in hidden layer 2.') +parser.add_argument('--GAElr', type=float, default=0.01, help='Initial learning rate.') +parser.add_argument('--GAEdropout', type=float, default=0., help='Dropout rate (1 - keep probability).') +parser.add_argument('--GAElr_dw', type=float, default=0.001, help='Initial learning rate for regularization.') + +#Start Impute or not, only used for evaluating Impute +parser.add_argument('--imputeMode', default=False, action='store_true', + help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true') +parser.add_argument('--dropoutRatio', type=float, default=0.1, + help='dropout ratio for impute (default: 0.1)') + +args = parser.parse_args() +args.cuda = not args.no_cuda and torch.cuda.is_available() + +#TODO +#As we have lots of parameters, should check args +checkargs(args) + +torch.manual_seed(args.seed) +device = torch.device("cuda" if args.cuda else "cpu") + +if not args.coresUsage == 'all': + torch.set_num_threads(int(args.coresUsage)) + +kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} +print(args) +start_time = time.time() +print ('---0:00:00---scRNA starts loading.') + +if not args.imputeMode: + # if args.discreteTag: + # scData = scBenchDataset(args.datasetName, args.discreteTag) + # else: + # scData = scBenchDataset(args.datasetName, args.discreteTag, transform=logtransform) + scData = scBenchDataset(args.datasetName, args.discreteTag) +else: + # if args.discreteTag: + # scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio) + # else: + # scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio, transform=logtransform) + scData = scDatasetDropout(datasetName=args.datasetName, discreteTag=args.discreteTag, ratio=args.dropoutRatio, seed=args.seed) +train_loader = DataLoader(scData, batch_size=args.batch_size, shuffle=False, **kwargs) + +if args.inferLTMGTag: + #run LTMG in R + runLTMG(args.LTMGDir+'test/'+args.expressionFile,args.LTMGDir+'test/') + ltmgFile = args.ltmgFile +else: + ltmgFile = args.datasetName+'/T2000_UsingOriginalMatrix/T2000_LTMG.txt' + +regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile) +regulationMatrix = torch.from_numpy(regulationMatrix) + +# Original +if args.model == 'VAE': + # model = VAE(dim=scData.features.shape[1]).to(device) + model = VAE2d(dim=scData.features.shape[1]).to(device) +elif args.model == 'AE': + model = AE(dim=scData.features.shape[1]).to(device) +if args.precisionModel == 'Double': + model=model.double() +optimizer = optim.Adam(model.parameters(), lr=1e-3) + +#Benchmark +bench_pd=pd.read_csv(args.benchmark,index_col=0) +#t1=pd.read_csv('/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',index_col=0) +bench_celltype=bench_pd.iloc[:,0].to_numpy() + +#whether to output debuginfo in running time and memory consumption +def debuginfoStr(info): + if args.debuginfo: + print ('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+'---'+info) + mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print('Init Mem consumption: '+str(mem)) + +debuginfoStr('scRNA has been successfully loaded') + +#TODO: have to improve save npy +def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): + ''' + EMFlag indicates whether in EM processes. + If in EM, use regulized-type parsed from program entrance, + Otherwise, noregu + taskType: celltype or imputation + ''' + model.train() + train_loss = 0 + # for batch_idx, (data, _) in enumerate(train_loader): + # for batch_idx, data in enumerate(train_loader): + for batch_idx, (data, dataindex) in enumerate(train_loader): + if args.precisionModel == 'Double': + data = data.type(torch.DoubleTensor) + elif args.precisionModel == 'Float': + data = data.type(torch.FloatTensor) + data = data.to(device) + regulationMatrixBatch = regulationMatrix[dataindex,:] + optimizer.zero_grad() + if args.model == 'VAE': + recon_batch, mu, logvar, z = model(data) + # Original + # loss = loss_function(recon_batch, data, mu, logvar) + if taskType == 'celltype': + if EMFlag and (not args.EMreguTag): + loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction) + else: + loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction) + elif taskType == 'imputation': + if EMFlag and (not args.EMreguTag): + loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction) + else: + loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction) + + elif args.model == 'AE': + recon_batch, z = model(data) + mu_dummy = '' + logvar_dummy = '' + # Original + # loss = loss_function(recon_batch, data, mu, logvar) + if taskType == 'celltype': + if EMFlag and (not args.EMreguTag): + loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction) + else: + loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction) + elif taskType == 'imputation': + if EMFlag and (not args.EMreguTag): + loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction) + else: + loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction) + + # L1 and L2 regularization in imputation + # 0.0 for no regularization + if taskType == 'imputation': + l1 = 0.0 + l2 = 0.0 + for p in model.parameters(): + l1 = l1 + p.abs().sum() + l2 = l2 + p.pow(2).sum() + loss = loss + args.L1Para * l1 + args.L2Para * l2 + + loss.backward() + train_loss += loss.item() + optimizer.step() + if batch_idx % args.log_interval == 0: + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + epoch, batch_idx * len(data), len(train_loader.dataset), + 100. * batch_idx / len(train_loader), + loss.item() / len(data))) + + # for batch + if batch_idx == 0: + recon_batch_all=recon_batch + data_all = data + z_all = z + else: + recon_batch_all=torch.cat((recon_batch_all, recon_batch), 0) + data_all = torch.cat((data_all, data), 0) + z_all = torch.cat((z_all,z),0) + + print('====> Epoch: {} Average loss: {:.4f}'.format( + epoch, train_loss / len(train_loader.dataset))) + + return recon_batch_all, data_all, z_all + +if __name__ == "__main__": + outParaTag = str(args.k)+'-'+str(args.gammaPara)+'-'+str(args.alphaRegularizePara)+'-'+str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara) + # outParaTag = str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara) + ptfileStart = args.npyDir+args.datasetName+'_'+outParaTag+'_EMtrainingStart.pt' + stateStart = { + # 'epoch': epoch, + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + } + ptfile = args.npyDir+args.datasetName+'_EMtraining.pt' + + # Step 1. celltype clustering + # store parameter + torch.save(stateStart,ptfileStart) + + # Save results only when impute + discreteStr = '' + if args.discreteTag: + discreteStr = 'D' + + if args.imputeMode: + # Does not need now + # save_sparse_matrix(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features) + # sp.save_npz(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_features.npy',scData.features) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropi.npy',scData.i) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropj.npy',scData.j) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropix.npy',scData.ix) + + debuginfoStr('Start feature autoencoder training') + + for epoch in range(1, args.epochs + 1): + recon, original, z = train(epoch, EMFlag=False) + + debuginfoStr('Feature autoencoder training finished') + + zOut = z.detach().cpu().numpy() + # torch.save(model.state_dict(),ptfile) + ptstatus = model.state_dict() + + # Store reconOri for imputation + reconOri = recon.clone() + reconOri = reconOri.detach().cpu().numpy() + + # Step 1. Inferring celltype + #Define resolution + #Default: auto, otherwise use user defined resolution + if args.resolution == 'auto': + if zOut.shape[0]< 2000: + resolution = 0.8 + else: + resolution = 0.5 + else: + resolution = float(args.resolution) + + debuginfoStr('Start construct cell grpah') + # Here para = 'euclidean:10' + # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k)) + adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding)) + # if args.adjtype == 'unweighted': + # adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) + # adjdense = sp.csr_matrix.todense(adj) + # elif args.adjtype == 'weighted': + # adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) + # adjdense = adj.toarray() + debuginfoStr('Cell Graph constructed and pruned') + + # if args.saveinternal: + # reconOut = recon.detach().cpu().numpy() + # if args.imputeMode: + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut) + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z.npy',zOut) + # else: + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon.npy',reconOut) + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z.npy',zOut) + + # Whether use GAE embedding + debuginfoStr('Start Graph Autoencoder training') + if args.useGAEembedding or args.useBothembedding: + zDiscret = zOut>np.mean(zOut,axis=0) + zDiscret = 1.0*zDiscret + if args.useGAEembedding: + zOut=GAEembedding(zDiscret, adj, args) + elif args.useBothembedding: + zEmbedding=GAEembedding(zDiscret, adj, args) + zOut=np.concatenate((zOut,zEmbedding),axis=1) + debuginfoStr('Graph Autoencoder training finished') + + # For iteration studies + G0 = nx.Graph() + G0.add_weighted_edges_from(edgeList) + nlG0=nx.normalized_laplacian_matrix(G0) + # set iteration criteria for converge + adjOld = nlG0 + # set celltype criteria for converge + listResultOld = [1 for i in range(zOut.shape[0])] + + #Fill the zeros before EM iteration + # TODO: better implementation later, now we don't filling zeros for now + if args.zerofillFlag: + for nz_index in range(len(scData.nz_i)): + # tmp = scipy.sparse.lil_matrix.todense(scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]]) + # tmp = np.asarray(tmp).reshape(-1)[0] + tmp = scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]] + reconOut[scData.nz_i[nz_index], scData.nz_j[nz_index]] = tmp + recon = reconOut + + debuginfoStr('EM Iteration started') + for bigepoch in range(0, args.EM_iteration): + iteration_time = time.time() + + # Now for both methods, we need do clustering, using clustering results to check converge + # TODO May reimplement later + # Clustering: Get cluster + clustering_time = time.time() + if args.clustering_method=='Louvain': + # Louvain: the only function has R dependent + # Seperate here for platforms without R support + from R_util import generateLouvainCluster + listResult,size = generateLouvainCluster(edgeList) + k = len(np.unique(listResult)) + print('Louvain cluster: '+str(k)) + elif args.clustering_method=='LouvainK': + from R_util import generateLouvainCluster + listResult,size = generateLouvainCluster(edgeList) + k = len(np.unique(listResult)) + print('Louvain cluster: '+str(k)) + # resolution of louvain cluster: + k = int(k*resolution) if k>3 else 2 + clustering = KMeans(n_clusters=k, random_state=0).fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='LouvainB': + from R_util import generateLouvainCluster + listResult,size = generateLouvainCluster(edgeList) + k = len(np.unique(listResult)) + print('Louvain cluster: '+str(k)) + # resolution of louvain cluster: + k = int(k*resolution) if k>3 else 2 + clustering = Birch(n_clusters=k).fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='KMeans': + clustering = KMeans(n_clusters=args.n_clusters, random_state=0).fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='SpectralClustering': + clustering = SpectralClustering(n_clusters=args.n_clusters, assign_labels="discretize", random_state=0).fit(zOut) + listResult = clustering.labels_.tolist() + elif args.clustering_method=='AffinityPropagation': + clustering = AffinityPropagation().fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='AgglomerativeClustering': + clustering = AgglomerativeClustering(linkage=args.linkage).fit(zOut) + listResult = clustering.labels_.tolist() + elif args.clustering_method=='AgglomerativeClusteringK': + clustering = AgglomerativeClustering(n_clusters=args.n_clusters).fit(zOut) + listResult = clustering.labels_.tolist() + elif args.clustering_method=='Birch': + clustering = Birch(n_clusters=args.n_clusters).fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='BirchN': + clustering = Birch(n_clusters=None).fit(zOut) + listResult = clustering.predict(zOut) + elif args.clustering_method=='MeanShift': + clustering = MeanShift().fit(zOut) + listResult = clustering.labels_.tolist() + elif args.clustering_method=='OPTICS': + clustering = OPTICS(min_samples=int(args.k/2), min_cluster_size=args.minMemberinCluster).fit(zOut) + listResult = clustering.labels_.tolist() + else: + print("Error: Clustering method not appropriate") + # print("---Clustering takes %s seconds ---" % (time.time() - clustering_time)) + + # If clusters more than maxclusters, then have to stop + if len(set(listResult))>args.maxClusterNumber or len(set(listResult))<=1: + print("Stopping: Number of clusters is " + str(len(set(listResult))) + ".") + # Exit + # return None + # Else: dealing with the number + listResult = trimClustering(listResult,minMemberinCluster=args.minMemberinCluster,maxClusterNumber=args.maxClusterNumber) + + #Calculate silhouette + measure_clustering_results(zOut, listResult) + print('Total Cluster Number: '+str(len(set(listResult)))) + + debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training started') + #Graph regulizated EM AE with celltype AE, do the additional AE + if args.EMtype == 'celltypeEM': + # Each cluster has a autoencoder, and organize them back in iteraization + clusterIndexList = [] + for i in range(len(set(listResult))): + clusterIndexList.append([]) + for i in range(len(listResult)): + clusterIndexList[listResult[i]].append(i) + + reconNew = np.zeros((scData.features.shape[0],scData.features.shape[1])) + + # Convert to Tensor + reconNew = torch.from_numpy(reconNew) + if args.precisionModel == 'Double': + reconNew = reconNew.type(torch.DoubleTensor) + elif args.precisionModel == 'Float': + reconNew = reconNew.type(torch.FloatTensor) + reconNew = reconNew.to(device) + + # model.load_state_dict(torch.load(ptfile)) + model.load_state_dict(ptstatus) + + for clusterIndex in clusterIndexList: + reconUsage = recon[clusterIndex] + scDataInter = scDatasetInter(reconUsage) + train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs) + for epoch in range(1, args.cluster_epochs + 1): + reconCluster, originalCluster, zCluster = train(epoch, EMFlag=True) + count = 0 + for i in clusterIndex: + reconNew[i] = reconCluster[count,:] + count +=1 + # Update + recon = reconNew + # torch.save(model.state_dict(),ptfile) + ptstatus = model.state_dict() + + debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training succeed') + + # Use new dataloader + scDataInter = scDatasetInter(recon) + train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs) + + debuginfoStr(str(bigepoch)+' th iter: Start construct cell grpah') + for epoch in range(1, args.EM_epochs + 1): + recon, original, z = train(epoch, EMFlag=True) + + zOut = z.detach().cpu().numpy() + + # Here para = 'euclidean:10' + # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k)) + adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding or (bigepoch == int(args.EM_iteration)-1))) + # if args.adjtype == 'unweighted': + # adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) + # adjdense = sp.csr_matrix.todense(adj) + # elif args.adjtype == 'weighted': + # adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) + # adjdense = adj.toarray() + debuginfoStr(str(bigepoch)+' th iter: Cell Graph constructed and pruned') + + debuginfoStr(str(bigepoch)+' th iter: Start Graph Autoencoder training') + # Whether use GAE embedding + if args.useGAEembedding or args.useBothembedding: + zDiscret = zOut>np.mean(zOut,axis=0) + zDiscret = 1.0*zDiscret + if args.useGAEembedding: + zOut=GAEembedding(zDiscret, adj, args) + elif args.useBothembedding: + zEmbedding=GAEembedding(zDiscret, adj, args) + zOut=np.concatenate((zOut,zEmbedding),axis=1) + + debuginfoStr(str(bigepoch)+' th iter: Graph Autoencoder training finished') + + if args.saveinternal: + reconOut = recon.detach().cpu().numpy() + if args.imputeMode: + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut) + else: + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut) + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut) + + # print("---One iteration in EM process, proceeded %s seconds ---" % (time.time() - iteration_time)) + + #Iteration usage + Gc = nx.Graph() + Gc.add_weighted_edges_from(edgeList) + adjGc = nx.adjacency_matrix(Gc) + + # Update new adj + adjNew = args.alpha*nlG0 + (1-args.alpha) * adjGc/np.sum(adjGc,axis=0) + + #debug + graphChange = np.mean(abs(adjNew-adjOld)) + graphChangeThreshold = args.converge_graphratio * np.mean(abs(nlG0)) + print('adjNew:{} adjOld:{} G0:{}'.format(adjNew, adjOld, nlG0)) + print('mean:{} threshold:{}'.format(graphChange, graphChangeThreshold)) + silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult) + ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(listResultOld, listResult) + print(listResultOld) + print(listResult) + print('celltype similarity:'+str(ari)) + ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult) + resultarray=[] + resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs) + resultarray.append(resultstr) + print('All Results: ') + print(resultstr) + + if args.saveinternal: + if args.imputeMode: + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d') + else: + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d') + + # graph criteria + if args.converge_type == 'graph': + if graphChange < graphChangeThreshold: + print('Graph Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult + break + # celltype criteria + elif args.converge_type == 'celltype': + if ari>args.converge_celltyperatio: + print('Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult + break + # if both criteria are meets + elif args.converge_type == 'both': + if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio: + print('Graph and Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult + break + # if either criteria are meets + elif args.converge_type == 'either': + if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio: + print('Graph or Celltype Converge now!') + # Converge, Update + adjOld = adjNew + listResultOld = listResult + break + + # Update + adjOld = adjNew + listResultOld = listResult + # torch.cuda.empty_cache() + debuginfoStr(str(bigepoch)+' th iter: Iteration finished') + + + # Output celltype related results + if args.imputeMode: + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_final_edgeList.npy',edgeList) + else: + np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_final_edgeList.npy',edgeList) + + # np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_embedding.csv',zOut, delimiter=",",fmt='%10.4f') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_graph.csv',edgeList,fmt='%d,%d,%2.1f') + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt',listResult,fmt='%d') + + resultarray=[] + silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult) + ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult) + resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs) + resultarray.append(resultstr) + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_benchmark.txt',resultarray,fmt='%s') + + # save internal results for imputation + # if args.imputeMode: + # np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy',reconOri) + # np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',adj) + # np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy',listResult) + # else: + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy',reconOri) + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',adj) + # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy',listResult) + + # Step 2. Imputation with best results of graph and celltype + + # if args.imputeMode: + # reconOri = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy') + # adj = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',allow_pickle=True) + # listResult = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy') + # else: + # reconOri = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy') + # adj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',allow_pickle=True) + # listResult = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy') + + # Use new dataloader + scDataInter = scDatasetInter(reconOri) + train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs) + + stateStart = torch.load(ptfileStart) + model.load_state_dict(stateStart['state_dict']) + optimizer.load_state_dict(stateStart['optimizer']) + # if args.aePara == 'start': + # model.load_state_dict(torch.load(ptfileStart)) + # elif args.aePara == 'end': + # model.load_state_dict(torch.load(ptfileEnd)) + + # generate graph regularizer from graph + # adj = adj.tolist() # Used for read/load + # adjdense = sp.csr_matrix.todense(adj) + + # generate adj from edgeList + adjdense = sp.csr_matrix.todense(adj) + adjsample = torch.from_numpy(adjdense) + if args.precisionModel == 'Float': + adjsample = adjsample.float() + elif args.precisionModel == 'Double': + adjsample = adjsample.type(torch.DoubleTensor) + + # generate celltype regularizer from celltype + celltypesample = generateCelltypeRegu(listResult) + + celltypesample = torch.from_numpy(celltypesample) + if args.precisionModel == 'Float': + celltypesample = celltypesample.float() + elif args.precisionModel == 'Double': + celltypesample = celltypesample.type(torch.DoubleTensor) + + for epoch in range(1, args.EM_epochs + 1): + recon, original, z = train(epoch, EMFlag=True, taskType='imputation') + + reconOut = recon.detach().cpu().numpy() + + # out imputation Results + if args.imputeMode: + np.save (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut) + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f') + else: + np.save (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.npy',reconOut) + np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f') + + debuginfoStr(str(bigepoch)+'scGNN finished') From 169864c08dbd377a5a027b88aa188d88e8117485 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 16:44:37 -0600 Subject: [PATCH 077/117] ratio 0.0 --- generating_Impute_0.0.py | 78 +++++++++++++++++++++++++++++++++ submitCluster_imputation_0.0.sh | 14 ++++++ 2 files changed, 92 insertions(+) create mode 100644 generating_Impute_0.0.py create mode 100644 submitCluster_imputation_0.0.sh diff --git a/generating_Impute_0.0.py b/generating_Impute_0.0.py new file mode 100644 index 0000000..dd50d28 --- /dev/null +++ b/generating_Impute_0.0.py @@ -0,0 +1,78 @@ +import argparse + +# python generatingMethodsBatchshell_louvain.py +# python generatingMethodsBatchshell_louvain.py --imputeMode +parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running imputation of original scGNN ') +parser.add_argument('--outputDir', type=str, default='', + help='Directory of batch files for cluster running') +parser.add_argument('--imputeMode', action='store_true', default=True, + help='whether impute') +args = parser.parse_args() + +templateStr1 = "#! /bin/bash\n"\ +"######################### Batch Headers #########################\n"\ +"#SBATCH -A xulab\n"\ +"#SBATCH -p BioCompute,Lewis # use the BioCompute partition Lewis,BioCompute\n"\ +"#SBATCH -J " + +templateStr2 = "\n#SBATCH -o results-%j.out # give the job output a custom name\n"\ +"#SBATCH -t 2-00:00 # two days time limit\n"\ +"#SBATCH -N 1 # number of nodes\n"\ +"#SBATCH -n 1 # number of cores (AKA tasks)\n"\ +"#SBATCH --mem=128G\n"\ +"#################################################################\n"\ +"module load miniconda3\n"\ +"source activate conda_R\n" + +#tuple list +#batchInfo,scGNNparam,outDir +#huge matrix +methodsList = [ + ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'), +] + +dropoutList = ['0.0',] + +# generate sbatch files: +for item in methodsList: + batchInfo,scGNNparam,outDirStr = item + tmp = batchInfo.split() + tmpstr1=tmp[0] + tmpstr2=tmp[1] + imputeStr = '' + if args.imputeMode: + tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute') + tmpstr2 = "I"+tmpstr2 + # tmpstr2 = "I"+tmpstr2[2:] + imputeStr = ' --imputeMode ' + outDirStr = "npyImpute"+outDirStr[3:] + outputFilename = args.outputDir + tmpstr1 + abbrStr = tmpstr2 + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() + + for dropoutPara in dropoutList: + commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n" + outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n" + with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw: + fw.write(outStr) + fw.close() diff --git a/submitCluster_imputation_0.0.sh b/submitCluster_imputation_0.0.sh new file mode 100644 index 0000000..dec5bd9 --- /dev/null +++ b/submitCluster_imputation_0.0.sh @@ -0,0 +1,14 @@ +for i in {1} +do +for j in {0.0} +do +sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh + +sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh + +sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh + +sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh + +done +done \ No newline at end of file From 9a27b23df948af3da2a2b94b44f84577820c52c5 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 16:50:16 -0600 Subject: [PATCH 078/117] ratio 0.0 --- submitCluster_imputation_0.0.sh | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/submitCluster_imputation_0.0.sh b/submitCluster_imputation_0.0.sh index dec5bd9..5dcd876 100644 --- a/submitCluster_imputation_0.0.sh +++ b/submitCluster_imputation_0.0.sh @@ -1,14 +1,4 @@ -for i in {1} -do -for j in {0.0} -do -sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh - -sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh - -sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh - -sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh - -done -done \ No newline at end of file +sbatch run_experimentImpute_2_g_e_1_9_0.0.sh +sbatch run_experimentImpute_2_g_e_1_11_0.0.sh +sbatch run_experimentImpute_2_g_e_1_12_0.0.sh +sbatch run_experimentImpute_2_g_e_1_13_0.0.sh From 3dddc7f23f56c0a3ead97ecb4a83f9710db25181 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 20:26:39 -0600 Subject: [PATCH 079/117] for ratio 0.0 --- codesfromJGandYJ/impute/MAGIC_impute.py | 16 +++++++++------- codesfromJGandYJ/impute/SAUCIE_impute.py | 16 +++++++++------- codesfromJGandYJ/impute/SAVER_impute.py | 20 +++++++++----------- codesfromJGandYJ/impute/SCIMPUTE_impute.py | 17 +++++++++-------- codesfromJGandYJ/impute/dca_impute.py | 16 +++++++++------- codesfromJGandYJ/impute/deepimpute_impute.py | 15 +++++++++------ codesfromJGandYJ/impute/other_dca.sh | 4 +++- codesfromJGandYJ/impute/other_deepimpute.sh | 3 ++- codesfromJGandYJ/impute/other_magic.sh | 3 ++- codesfromJGandYJ/impute/other_saucie.sh | 6 ++---- codesfromJGandYJ/impute/other_saver.sh | 3 ++- codesfromJGandYJ/impute/other_scimpute.sh | 3 ++- codesfromJGandYJ/impute/other_scvi.sh | 3 ++- codesfromJGandYJ/impute/scVi_impute.py | 17 +++++++++-------- 14 files changed, 78 insertions(+), 64 deletions(-) diff --git a/codesfromJGandYJ/impute/MAGIC_impute.py b/codesfromJGandYJ/impute/MAGIC_impute.py index da7b573..95fe325 100644 --- a/codesfromJGandYJ/impute/MAGIC_impute.py +++ b/codesfromJGandYJ/impute/MAGIC_impute.py @@ -8,9 +8,7 @@ #from benchmark_util import impute_dropout parser = argparse.ArgumentParser(description='MAGIC Impute') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() @@ -35,10 +33,14 @@ def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio) +if args.origin: + for datasetName in datasetNameList: + impute_Magic(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio) # From scVI # # Load single-cell RNA-seq data diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 07d7bdc..874c7c1 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -14,9 +14,7 @@ # numpy==1.19.4 parser = argparse.ArgumentParser(description='Impute use SAUCIE') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): @@ -43,7 +41,11 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio) +if args.origin: + for datasetName in datasetNameList: + impute_saucie(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio) \ No newline at end of file diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py index eca2323..f0f7381 100644 --- a/codesfromJGandYJ/impute/SAVER_impute.py +++ b/codesfromJGandYJ/impute/SAVER_impute.py @@ -12,9 +12,7 @@ # Use python to generate input for saver.r, then output parser = argparse.ArgumentParser(description='Impute SAVER') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' @@ -48,11 +46,11 @@ def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_saver(seed=seed, datasetName=datasetName, ratio=ratio) - - - - +if args.origin: + for datasetName in datasetNameList: + impute_saver(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_saver(seed=seed, datasetName=datasetName, ratio=ratio) \ No newline at end of file diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py index 9d8649f..879a6ca 100644 --- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py +++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py @@ -12,9 +12,7 @@ # Ref: https://github.com/Vivianstats/scImpute parser = argparse.ArgumentParser(description='Impute scImpute') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' @@ -58,8 +56,11 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio) - +if args.origin: + for datasetName in datasetNameList: + impute_scimpute(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index 46ecc8a..ffa4504 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -15,9 +15,7 @@ # scanpy==1.5.1 parser = argparse.ArgumentParser(description='Imputation DCA') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/' @@ -48,7 +46,11 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_dca(seed=seed, datasetName=datasetName, ratio=ratio) +if args.origin: + for datasetName in datasetNameList: + impute_dca(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_dca(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py index bcf1a3a..9943321 100644 --- a/codesfromJGandYJ/impute/deepimpute_impute.py +++ b/codesfromJGandYJ/impute/deepimpute_impute.py @@ -10,8 +10,7 @@ parser = argparse.ArgumentParser(description='Impute Deepimpute') # In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() # Ref: @@ -43,7 +42,11 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1): seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio) +if args.origin: + for datasetName in datasetNameList: + impute_deepimpute(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio) diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh index 02f64ca..f41c874 100644 --- a/codesfromJGandYJ/impute/other_dca.sh +++ b/codesfromJGandYJ/impute/other_dca.sh @@ -12,4 +12,6 @@ module load miniconda3 source activate /storage/htc/joshilab/wangjue/conda_R_dca -python3 -W ignore dca_impute.py +# grid +# python3 -W ignore dca_impute.py +python3 -W ignore dca_impute.py --origin diff --git a/codesfromJGandYJ/impute/other_deepimpute.sh b/codesfromJGandYJ/impute/other_deepimpute.sh index b55d6c6..23d18c9 100644 --- a/codesfromJGandYJ/impute/other_deepimpute.sh +++ b/codesfromJGandYJ/impute/other_deepimpute.sh @@ -11,4 +11,5 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W ignore deepimpute_impute.py +# python3 -W ignore deepimpute_impute.py +python3 -W ignore deepimpute_impute.py --origin diff --git a/codesfromJGandYJ/impute/other_magic.sh b/codesfromJGandYJ/impute/other_magic.sh index fd9f5e4..6d85905 100644 --- a/codesfromJGandYJ/impute/other_magic.sh +++ b/codesfromJGandYJ/impute/other_magic.sh @@ -11,4 +11,5 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W ignore MAGIC_impute.py +# python3 -W ignore MAGIC_impute.py +python3 -W ignore MAGIC_impute.py --origin diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh index f517112..31c8ce1 100644 --- a/codesfromJGandYJ/impute/other_saucie.sh +++ b/codesfromJGandYJ/impute/other_saucie.sh @@ -9,9 +9,7 @@ #SBATCH -n 1 # number of cores (AKA tasks) #SBATCH --mem=128G ################################################################# - module load miniconda3 source activate /storage/htc/joshilab/wangjue/conda_R_saucie -# source activate /storage/htc/joshilab/wangjue/conda_R_gpu -# module load cuda/cuda-10.1.243 -python3 -W ignore SAUCIE_impute.py \ No newline at end of file +# python3 -W ignore SAUCIE_impute.py +python3 -W ignore SAUCIE_impute.py --origin \ No newline at end of file diff --git a/codesfromJGandYJ/impute/other_saver.sh b/codesfromJGandYJ/impute/other_saver.sh index 17aa82b..2a29663 100644 --- a/codesfromJGandYJ/impute/other_saver.sh +++ b/codesfromJGandYJ/impute/other_saver.sh @@ -11,4 +11,5 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W ignore SAVER_impute.py +# python3 -W ignore SAVER_impute.py +python3 -W ignore SAVER_impute.py --origin diff --git a/codesfromJGandYJ/impute/other_scimpute.sh b/codesfromJGandYJ/impute/other_scimpute.sh index 8dad300..5da0040 100644 --- a/codesfromJGandYJ/impute/other_scimpute.sh +++ b/codesfromJGandYJ/impute/other_scimpute.sh @@ -11,4 +11,5 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W ignore SCIMPUTE_impute.py \ No newline at end of file +# python3 -W ignore SCIMPUTE_impute.py +python3 -W ignore SCIMPUTE_impute.py --origin \ No newline at end of file diff --git a/codesfromJGandYJ/impute/other_scvi.sh b/codesfromJGandYJ/impute/other_scvi.sh index 888d89b..7b258fa 100644 --- a/codesfromJGandYJ/impute/other_scvi.sh +++ b/codesfromJGandYJ/impute/other_scvi.sh @@ -11,4 +11,5 @@ ################################################################# module load miniconda3 source activate conda_R -python3 -W ignore scVi_impute.py +# python3 -W ignore scVi_impute.py +python3 -W ignore scVi_impute.py --origin \ No newline at end of file diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py index 5710e36..643204b 100644 --- a/codesfromJGandYJ/impute/scVi_impute.py +++ b/codesfromJGandYJ/impute/scVi_impute.py @@ -12,9 +12,7 @@ # pip install scvi==0.6.3 parser = argparse.ArgumentParser(description='scVi imputation') -# In this script, not using arguments -parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000') -parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio') +parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)') args = parser.parse_args() # Ref: @@ -77,15 +75,18 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1): np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon_normalized.npy'.format(datasetName,ratio,seed),normalized_values) - datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] -for datasetName in datasetNameList: - for seed in seedList: - for ratio in ratioList: - impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio) +if args.origin: + for datasetName in datasetNameList: + impute_scvi(seed='1', datasetName=datasetName, ratio='0.0') +else: + for datasetName in datasetNameList: + for seed in seedList: + for ratio in ratioList: + impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio) # celltype: #np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent) From 24c88e682e202f042f6b9aeeeccf962243fb7bfe Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Nov 2020 23:32:39 -0600 Subject: [PATCH 080/117] add figure 3 interactions --- results/Klein_correlation.py | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 results/Klein_correlation.py diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py new file mode 100644 index 0000000..1644ff1 --- /dev/null +++ b/results/Klein_correlation.py @@ -0,0 +1,70 @@ +import numpy as np +from scipy import stats +import csv + +# Get correlation from gene interactions from Klein datasets +# Ref: Klein, Allon M., et al. "Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells." Cell 161.5 (2015): 1187-1201. + +geneList=[ + 'Krt8', #4 + 'S100a6', #19 + 'Id2', #895 + 'Id1', #602 + 'ld3', #1559 + 'Ccnd1',# not in the range + 'Ccnb1',# not in the range + 'Ccnd2',# not in the range + 'Ccna1',# not in the range + 'Sox17',# not in the range + 'Col4a1', #226 + 'Pou5f1', #150 + 'Ccnd3', #255 + 'Ccna2',# not in the range + 'Nanog', #1449 + 'Klf4',# not in the range + 'Sox2', # 601 + 'Zfp42', #527 + 'Trim28', #136 + 'Esrrb', #849 + 'Tdh', #206 +] + +geneNumList=[ + 4, + 19, + 895, + 602, + 1559, + 226, + 150, + 255, + 1449, + 601, + 527, + 136, + 849, + 206, +] + +savedir = './fig3/' +# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc'] +methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute'] + +def corCal(method='magic'): + filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) + x = np.load(filename,allow_pickle=True) + x = x.T + + corr = np.zeros(len(geneNumList),len(geneNumList)) + for i in range(len(geneNumList)): + for j in range(len(geneNumList)): + corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:]) + + out_filename = savedir+method+".csv" + with open(out_filename, "w") as f: + writer = csv.writer(f) + writer.writerows(corr) + + +for method in methodList: + corCal(method=method) \ No newline at end of file From 3992609e4f34a2efd793d1b5babcc96a0af8ffa9 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 08:01:04 -0600 Subject: [PATCH 081/117] update print format --- main_benchmark_timer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py index 6de83ef..a72fe98 100644 --- a/main_benchmark_timer.py +++ b/main_benchmark_timer.py @@ -490,7 +490,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): measure_clustering_results(zOut, listResult) print('Total Cluster Number: '+str(len(set(listResult)))) - debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training started') + debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training started') #Graph regulizated EM AE with celltype AE, do the additional AE if args.EMtype == 'celltypeEM': # Each cluster has a autoencoder, and organize them back in iteraization @@ -528,13 +528,13 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): # torch.save(model.state_dict(),ptfile) ptstatus = model.state_dict() - debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training succeed') + debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training succeed') # Use new dataloader scDataInter = scDatasetInter(recon) train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs) - debuginfoStr(str(bigepoch)+' th iter: Start construct cell grpah') + debuginfoStr(str(bigepoch)+'th iter: Start construct cell grpah') for epoch in range(1, args.EM_epochs + 1): recon, original, z = train(epoch, EMFlag=True) @@ -549,9 +549,9 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): # elif args.adjtype == 'weighted': # adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) # adjdense = adj.toarray() - debuginfoStr(str(bigepoch)+' th iter: Cell Graph constructed and pruned') + debuginfoStr(str(bigepoch)+'th iter: Cell Graph constructed and pruned') - debuginfoStr(str(bigepoch)+' th iter: Start Graph Autoencoder training') + debuginfoStr(str(bigepoch)+'th iter: Start Graph Autoencoder training') # Whether use GAE embedding if args.useGAEembedding or args.useBothembedding: zDiscret = zOut>np.mean(zOut,axis=0) @@ -562,7 +562,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): zEmbedding=GAEembedding(zDiscret, adj, args) zOut=np.concatenate((zOut,zEmbedding),axis=1) - debuginfoStr(str(bigepoch)+' th iter: Graph Autoencoder training finished') + debuginfoStr(str(bigepoch)+'th iter: Graph Autoencoder training finished') if args.saveinternal: reconOut = recon.detach().cpu().numpy() @@ -647,7 +647,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): adjOld = adjNew listResultOld = listResult # torch.cuda.empty_cache() - debuginfoStr(str(bigepoch)+' th iter: Iteration finished') + debuginfoStr(str(bigepoch)+'th iter: Iteration finished') # Output celltype related results @@ -735,4 +735,4 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): np.save (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.npy',reconOut) np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f') - debuginfoStr(str(bigepoch)+'scGNN finished') + debuginfoStr('scGNN finished') From 5130c6470bb26d47824fb94d09b64a2261409f5b Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 14:22:11 -0600 Subject: [PATCH 082/117] update print format in interaction --- results/Klein_correlation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 1644ff1..8586113 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -51,7 +51,10 @@ methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute'] def corCal(method='magic'): - filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) + if method == 'scvinorm': + filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy' + else: + filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) x = np.load(filename,allow_pickle=True) x = x.T From 85550ff013b06ac3aa008f50ccc40dd148e6f5a0 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 14:25:19 -0600 Subject: [PATCH 083/117] output results in interaction --- results/Klein_correlation.py | 2 +- results/Klein_correlation.sh | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 results/Klein_correlation.sh diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 8586113..2841b8a 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -2,7 +2,7 @@ from scipy import stats import csv -# Get correlation from gene interactions from Klein datasets +# Get correlation from gene interactions from Klein datasets in Figure 3 of scGNN paper # Ref: Klein, Allon M., et al. "Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells." Cell 161.5 (2015): 1187-1201. geneList=[ diff --git a/results/Klein_correlation.sh b/results/Klein_correlation.sh new file mode 100644 index 0000000..12ef235 --- /dev/null +++ b/results/Klein_correlation.sh @@ -0,0 +1,14 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J IE2geK +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python -W ignore Klein_correlation.py \ No newline at end of file From 1939008b5fca2cf3aa766a1273c8f8b059dac59b Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 14:28:15 -0600 Subject: [PATCH 084/117] output results in interaction --- results/Klein_correlation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 2841b8a..4a346e5 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -58,7 +58,7 @@ def corCal(method='magic'): x = np.load(filename,allow_pickle=True) x = x.T - corr = np.zeros(len(geneNumList),len(geneNumList)) + corr = np.zeros((len(geneNumList),len(geneNumList))) for i in range(len(geneNumList)): for j in range(len(geneNumList)): corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:]) From 08ef27eebb55e294d8964f61b5a0a6bae481a1aa Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 14:46:26 -0600 Subject: [PATCH 085/117] output results in interaction --- results/Klein_correlation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 4a346e5..34a80d8 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -61,7 +61,7 @@ def corCal(method='magic'): corr = np.zeros((len(geneNumList),len(geneNumList))) for i in range(len(geneNumList)): for j in range(len(geneNumList)): - corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:]) + corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])[0] out_filename = savedir+method+".csv" with open(out_filename, "w") as f: From afdd7fa819500a251b8e0c4d7d0bc77f955884ce Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 29 Nov 2020 20:07:03 -0600 Subject: [PATCH 086/117] add final mem in scGNN.py --- main_benchmark_timer.py | 2 +- scGNN.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py index a72fe98..f74be32 100644 --- a/main_benchmark_timer.py +++ b/main_benchmark_timer.py @@ -217,7 +217,7 @@ def debuginfoStr(info): if args.debuginfo: print ('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+'---'+info) mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - print('Init Mem consumption: '+str(mem)) + print('Mem consumption: '+str(mem)) debuginfoStr('scRNA has been successfully loaded') diff --git a/scGNN.py b/scGNN.py index 6f685a3..b4d5e7a 100644 --- a/scGNN.py +++ b/scGNN.py @@ -755,4 +755,6 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype', s results_df = pd.DataFrame(listResult,index=celllist,columns=["Celltype"]) results_df.to_csv(args.outputDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.alphaRegularizePara)+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt') + mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print('Mem consumption: '+str(mem)) print('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+"---scGNN finished") From 012a693a956c10dbad9e6957d16fd173e125c4b6 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 08:23:59 -0600 Subject: [PATCH 087/117] update package dependence --- requirements.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index fd0d666..9b91edd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -numpy==1.18.1 -torch==1.4.0 -networkx==2.4 -pandas==0.25.3 -rpy2==3.2.4 -matplotlib==3.1.2 -seaborn==0.9.0 -umap-learn==0.3.10 -munkres==1.1.2 +numpy +torch>=1.4.0 +networkx>=2.4 +pandas>=0.25.3 +rpy2>=3.2.4 +matplotlib>=3.1.2 +seaborn>=0.9.0 +umap-learn +munkres>=1.1.2 community -tqdm==4.48.0 \ No newline at end of file +tqdm>=4.48.0 \ No newline at end of file From 92f43c66fef110546d844a79c46cfddb0cd48ecb Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 10:14:02 -0600 Subject: [PATCH 088/117] add scIGANs and netNMFsc imputation evaluation --- results/results_impute_others_all.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index fe54037..04dddba 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -14,8 +14,7 @@ args = parser.parse_args() # Notes: -# Call by submit_Impute_others.sh - +# In HPC, call by sbatch submit_Impute_others.sh datasetList = [ '9.Chung', @@ -29,7 +28,12 @@ seedList = ['1','2','3'] ratioList = ['0.1','0.3','0.6','0.8'] -methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc'] + +# sophisticated, not using +# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc'] + +# We should use only log(x+1) if the method permitted +methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] def outResults(datasetName,seed,ratio,method): featuresOriginal = load_data(datasetName, discreteTag=False) @@ -45,9 +49,11 @@ def outResults(datasetName,seed,ratio,method): featuresImpute = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy') # not using now elif method == 'scIGANs': - featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) + tmp = df.to_numpy() + featuresImpute = tmp.T elif method == 'netNMFsc': - featuresImpute = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy') + featuresImpute = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/'+ratio+'/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy') featruesImpute = featruesImpute.T else: featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') From 86c9a726e19789f6acae970d33b5b95cb0d72cab Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 10:49:22 -0600 Subject: [PATCH 089/117] add figure3, all methods --- results/Klein_correlation.py | 22 +++++++++++++++------- results/Klein_correlation.sh | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 34a80d8..63f2ed0 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -47,16 +47,24 @@ ] savedir = './fig3/' -# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc'] -methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute'] +methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] def corCal(method='magic'): - if method == 'scvinorm': - filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy' + if method == 'scIGANs': + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/'+datasetName+'/scIGANs_npyImputeG2E_1_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) + x = df.to_numpy() else: - filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) - x = np.load(filename,allow_pickle=True) - x = x.T + if method == 'scvinorm': + filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy' + x = np.load(filename,allow_pickle=True) + x = x.T + elif method == 'netNMFsc': + filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy') + x = np.load(filename,allow_pickle=True) + else: + filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) + x = np.load(filename,allow_pickle=True) + x = x.T corr = np.zeros((len(geneNumList),len(geneNumList))) for i in range(len(geneNumList)): diff --git a/results/Klein_correlation.sh b/results/Klein_correlation.sh index 12ef235..01eb788 100644 --- a/results/Klein_correlation.sh +++ b/results/Klein_correlation.sh @@ -2,7 +2,7 @@ ######################### Batch Headers ######################### #SBATCH -A xulab #SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute -#SBATCH -J IE2geK +#SBATCH -J Fig3 #SBATCH -o results-%j.out # give the job output a custom name #SBATCH -t 2-00:00 # two days time limit #SBATCH -N 1 # number of nodes From cd58ea64563b6bb31ac94689c2281787c6a5d82a Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 11:07:08 -0600 Subject: [PATCH 090/117] add scIGANs and netNMFsc imputation evaluation --- results/results_impute_others_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index 04dddba..402ca68 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -49,7 +49,7 @@ def outResults(datasetName,seed,ratio,method): featuresImpute = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy') # not using now elif method == 'scIGANs': - df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) tmp = df.to_numpy() featuresImpute = tmp.T elif method == 'netNMFsc': From decbddd740a26aa92ff2efe9ca6b60652a0154ee Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 11:08:21 -0600 Subject: [PATCH 091/117] add figure3, all methods --- results/Klein_correlation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 63f2ed0..9b3e476 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -59,7 +59,7 @@ def corCal(method='magic'): x = np.load(filename,allow_pickle=True) x = x.T elif method == 'netNMFsc': - filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy') + filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy' x = np.load(filename,allow_pickle=True) else: filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) From cb2cec181cef3cfd3bde5e654d5954f457cc838a Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 11:14:36 -0600 Subject: [PATCH 092/117] add figure3, all methods --- results/Klein_correlation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 9b3e476..c581b46 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -1,5 +1,6 @@ import numpy as np from scipy import stats +import pandas as pd import csv # Get correlation from gene interactions from Klein datasets in Figure 3 of scGNN paper From eaa31f2225780ae282db343eb538027f29344b1a Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 11:32:20 -0600 Subject: [PATCH 093/117] add figure3, all methods --- results/Klein_correlation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index c581b46..9b2fe51 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -52,7 +52,7 @@ def corCal(method='magic'): if method == 'scIGANs': - df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/'+datasetName+'/scIGANs_npyImputeG2E_1_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) x = df.to_numpy() else: if method == 'scvinorm': @@ -60,7 +60,7 @@ def corCal(method='magic'): x = np.load(filename,allow_pickle=True) x = x.T elif method == 'netNMFsc': - filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy' + filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/12.Klein/npyImputeG2E_1_log_imputation.npy' x = np.load(filename,allow_pickle=True) else: filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method) From 0c30fc775bc3519159da3d44b05dfee6afcda03f Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 11:42:27 -0600 Subject: [PATCH 094/117] add figure3, all methods --- results/Klein_correlation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 9b2fe51..1dbae48 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -52,7 +52,7 @@ def corCal(method='magic'): if method == 'scIGANs': - df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0) + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0) x = df.to_numpy() else: if method == 'scvinorm': From 879d156760f4757d1c6807776f68a6b26507f2c2 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 12:01:13 -0600 Subject: [PATCH 095/117] fix a typo --- results/results_impute_others_all.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index 402ca68..c2b6d9f 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -54,7 +54,7 @@ def outResults(datasetName,seed,ratio,method): featuresImpute = tmp.T elif method == 'netNMFsc': featuresImpute = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/'+ratio+'/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy') - featruesImpute = featruesImpute.T + featuresImpute = featuresImpute.T else: featuresImpute = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy') From 142e91a9aa6ef23706ceb42b35deba3b42c32612 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 1 Dec 2020 12:04:48 -0600 Subject: [PATCH 096/117] fix a typo --- results/Klein_correlation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py index 1dbae48..05b86e3 100644 --- a/results/Klein_correlation.py +++ b/results/Klein_correlation.py @@ -52,7 +52,7 @@ def corCal(method='magic'): if method == 'scIGANs': - df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0) + df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_12.Klein_LTMG_0.0_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0) x = df.to_numpy() else: if method == 'scvinorm': From cf553ba7cf738bdfa6993c83fbb9d0d5d4a8f4fa Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 4 Dec 2020 11:40:51 -0600 Subject: [PATCH 097/117] only focus on distribution --- submitCluster_distribution.sh | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh index f631ff7..e36b7ec 100644 --- a/submitCluster_distribution.sh +++ b/submitCluster_distribution.sh @@ -8,25 +8,25 @@ sbatch plot_G2E_$i\_12.sh sbatch plot_G2E_$i\_13.sh done -for i in {0.1,0.3,0.6,0.8} -do -sbatch plot_G2EL_$i\_9.sh -sbatch plot_G1E_$i\_9.sh -sbatch plot_G2F_$i\_9.sh -sbatch plot_N2E_$i\_9.sh +# for i in {0.1,0.3,0.6,0.8} +# do +# sbatch plot_G2EL_$i\_9.sh +# sbatch plot_G1E_$i\_9.sh +# sbatch plot_G2F_$i\_9.sh +# sbatch plot_N2E_$i\_9.sh -sbatch plot_G2EL_$i\_11.sh -sbatch plot_G1E_$i\_11.sh -sbatch plot_G2F_$i\_11.sh -sbatch plot_N2E_$i\_11.sh +# sbatch plot_G2EL_$i\_11.sh +# sbatch plot_G1E_$i\_11.sh +# sbatch plot_G2F_$i\_11.sh +# sbatch plot_N2E_$i\_11.sh -sbatch plot_G2EL_$i\_12.sh -sbatch plot_G1E_$i\_12.sh -sbatch plot_G2F_$i\_12.sh -sbatch plot_N2E_$i\_12.sh +# sbatch plot_G2EL_$i\_12.sh +# sbatch plot_G1E_$i\_12.sh +# sbatch plot_G2F_$i\_12.sh +# sbatch plot_N2E_$i\_12.sh -sbatch plot_G2EL_$i\_13.sh -sbatch plot_G1E_$i\_13.sh -sbatch plot_G2F_$i\_13.sh -sbatch plot_N2E_$i\_13.sh -done \ No newline at end of file +# sbatch plot_G2EL_$i\_13.sh +# sbatch plot_G1E_$i\_13.sh +# sbatch plot_G2F_$i\_13.sh +# sbatch plot_N2E_$i\_13.sh +# done \ No newline at end of file From 6eb4472dcfe898e49c604a5bb6bd04607105f9be Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 7 Dec 2020 18:24:23 -0600 Subject: [PATCH 098/117] add npy2csv --- bak/npy2csv_script.py | 50 +++++++++++++++++++++++++++++++++++++++++++ util_function.py | 14 ++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 bak/npy2csv_script.py diff --git a/bak/npy2csv_script.py b/bak/npy2csv_script.py new file mode 100644 index 0000000..cb35774 --- /dev/null +++ b/bak/npy2csv_script.py @@ -0,0 +1,50 @@ +import numpy as np +import pandas as pd + +def convert(method='dca'): + t=np.load(method+'\\9.Chung_0.0_1_recon.npy') + df = pd.DataFrame(t) + df.to_csv(method+'_9.csv',header=None,index=False) + + t=np.load(method+'\\11.Kolodziejczyk_0.0_1_recon.npy') + df = pd.DataFrame(t) + df.to_csv(method+'_11.csv',header=None,index=False) + + t=np.load(method+'\\12.Klein_0.0_1_recon.npy') + df = pd.DataFrame(t) + df.to_csv(method+'_12.csv',header=None,index=False) + + t=np.load(method+'\\13.Zeisel_0.0_1_recon.npy') + df = pd.DataFrame(t) + df.to_csv(method+'_13.csv',header=None,index=False) + +convert('dca') +convert('deepimpute') +convert('magic') +convert('netNMFsc') +convert('saucie') +convert('saver') +convert('scimpute') +convert('scvi') + + +def convertCSV(method='scIGANs'): + df = pd.read_csv(method+'\\9.Chung_0.0_1_recon.csv.txt',sep='\s+',index_col=0) + df = df.T + df.to_csv(method+'_9.csv',header=None,index=False) + + df = pd.read_csv(method+'\\11.Kolodziejczyk_0.0_1_recon.csv.txt',sep='\s+',index_col=0) + df = df.T + df.to_csv(method+'_11.csv',header=None,index=False) + + df = pd.read_csv(method+'\\12.Klein_0.0_1_recon.csv.txt',sep='\s+',index_col=0) + df = df.T + df.to_csv(method+'_12.csv',header=None,index=False) + + df = pd.read_csv(method+'\\13.Zeisel_0.0_1_recon.csv.txt',sep='\s+',index_col=0) + df = df.T + df.to_csv(method+'_13.csv',header=None,index=False) + +convertCSV('scIGANs') + + diff --git a/util_function.py b/util_function.py index a997186..f159d2a 100644 --- a/util_function.py +++ b/util_function.py @@ -65,13 +65,23 @@ def load_data(datasetName, discreteTag): names = ['x', 'tx', 'allx'] objects = [] for i in range(len(names)): - with open(dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i]), 'rb') as f: + #windows + if os.name=='nt': + filename = dir_path+"\\data\\sc\\{}\\ind.{}.{}".format(datasetName, datasetName, names[i]) + else: + filename = dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i]) + with open(filename, 'rb') as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding='latin1')) else: objects.append(pkl.load(f)) x, tx, allx = tuple(objects) - test_idx_reorder = parse_index_file(dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName)) + #windows + if os.name == 'nt': + filename = dir_path+"\\data\\sc\\{}\\ind.{}.test.index".format(datasetName, datasetName) + else: + filename = dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName) + test_idx_reorder = parse_index_file(filename) test_idx_range = np.sort(test_idx_reorder) if datasetName == 'citeseer': From d2cdfce9edbbd2f575b2d5a603621108cb5293fd Mon Sep 17 00:00:00 2001 From: juexinwang Date: Mon, 7 Dec 2020 23:41:35 -0600 Subject: [PATCH 099/117] Significant! Now provides GPU! One known bug: exclude r-ltmgscgnn --- do_timer_test.sh | 9 +++++++++ main_benchmark_timer.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 do_timer_test.sh diff --git a/do_timer_test.sh b/do_timer_test.sh new file mode 100644 index 0000000..6efc029 --- /dev/null +++ b/do_timer_test.sh @@ -0,0 +1,9 @@ +python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9.txt +python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11.txt +python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12.txt +python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13.txt + +python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9.txt +python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11.txt +python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12.txt +python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13.txt \ No newline at end of file diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py index f74be32..77d1a9d 100644 --- a/main_benchmark_timer.py +++ b/main_benchmark_timer.py @@ -19,7 +19,7 @@ from graph_function import * from benchmark_util import * from gae_embedding import GAEembedding,measure_clustering_results,test_clustering_benchmark_results -from LTMG_R import * +# from LTMG_R import * import pandas as pd # Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script. @@ -46,7 +46,7 @@ help='ratio of cell type change in EM iteration (default: 0.99), 0-1') parser.add_argument('--cluster-epochs', type=int, default=200, metavar='N', help='number of epochs in cluster autoencoder training (default: 200)') -parser.add_argument('--no-cuda', action='store_true', default=True, +parser.add_argument('--no-cuda', action='store_true', default=False, help='enables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') @@ -196,6 +196,10 @@ regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile) regulationMatrix = torch.from_numpy(regulationMatrix) +if args.precisionModel == 'Double': + regulationMatrix = regulationMatrix.type(torch.DoubleTensor) +elif args.precisionModel == 'Float': + regulationMatrix = regulationMatrix.type(torch.FloatTensor) # Original if args.model == 'VAE': @@ -240,6 +244,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): data = data.type(torch.FloatTensor) data = data.to(device) regulationMatrixBatch = regulationMatrix[dataindex,:] + regulationMatrixBatch = regulationMatrixBatch.to(device) optimizer.zero_grad() if args.model == 'VAE': recon_batch, mu, logvar, z = model(data) @@ -712,6 +717,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): adjsample = adjsample.float() elif args.precisionModel == 'Double': adjsample = adjsample.type(torch.DoubleTensor) + adjsample = adjsample.to(device) # generate celltype regularizer from celltype celltypesample = generateCelltypeRegu(listResult) @@ -721,6 +727,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'): celltypesample = celltypesample.float() elif args.precisionModel == 'Double': celltypesample = celltypesample.type(torch.DoubleTensor) + celltypesample = celltypesample.to(device) for epoch in range(1, args.EM_epochs + 1): recon, original, z = train(epoch, EMFlag=True, taskType='imputation') From 36f4ccbea5ef3a2f15996dc8bf1ef8c04e6028ac Mon Sep 17 00:00:00 2001 From: juexinwang Date: Tue, 8 Dec 2020 14:28:22 -0600 Subject: [PATCH 100/117] add time test for both cpu and gpu --- do_timer_test.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/do_timer_test.sh b/do_timer_test.sh index 6efc029..43c06e8 100644 --- a/do_timer_test.sh +++ b/do_timer_test.sh @@ -1,9 +1,9 @@ -python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9.txt -python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11.txt -python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12.txt -python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13.txt +python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9gpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11gpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12gpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13gpu.txt -python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9.txt -python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11.txt -python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12.txt -python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13.txt \ No newline at end of file +python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9cpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11cpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12cpu.txt +python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13cpu.txt From 3b0b1e8475c6467a0f9a04787e0fe15a1d347185 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 17:21:41 -0600 Subject: [PATCH 101/117] add louvain --- results/louvain.py | 39 +++++++++++++++++++++++++++++++++++++++ results/louvain_magic.sh | 17 +++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 results/louvain.py create mode 100644 results/louvain_magic.sh diff --git a/results/louvain.py b/results/louvain.py new file mode 100644 index 0000000..7306341 --- /dev/null +++ b/results/louvain.py @@ -0,0 +1,39 @@ +import os, sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +import numpy as np +from util_function import * +from graph_function import * +from R_util import generateLouvainCluster +import argparse + +parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem') +parser.add_argument('--k', type=int, default=10, + help='parameter k in KNN graph (default: 10)') +parser.add_argument('--knn-distance', type=str, default='euclidean', + help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)') +parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread', + help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)') +#Benchmark related +parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv', + help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)') +parser.add_argument('--input', type=str, default='filename', + help='input filename') +parser.add_argument('--output', type=str, default='filename', + help='input filename') +args = parser.parse_args() + +#Benchmark +bench_pd=pd.read_csv(args.benchmark,index_col=0) +bench_celltype=bench_pd.iloc[:,0].to_numpy() + +zOut = np.load(args.input,allow_pickle=True) +zOut = pcaFunc(zOut, n_components=10) +adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) +listResult,size = generateLouvainCluster(edgeList) +silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult) +ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult) +resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs) +print(resultstr) + +with open(args.output) as fw: + fw.writelines("%s\n" % strr for strr in listResult) diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh new file mode 100644 index 0000000..f752cb9 --- /dev/null +++ b/results/louvain_magic.sh @@ -0,0 +1,17 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J Louvain_magic +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt +python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt +python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt +python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt \ No newline at end of file From d535095f45d6f8505a93633db5d7dab4768ab170 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 17:32:16 -0600 Subject: [PATCH 102/117] fix a bug --- results/louvain.py | 2 +- results/louvain_magic.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/results/louvain.py b/results/louvain.py index 7306341..b8bf3d4 100644 --- a/results/louvain.py +++ b/results/louvain.py @@ -27,7 +27,7 @@ bench_celltype=bench_pd.iloc[:,0].to_numpy() zOut = np.load(args.input,allow_pickle=True) -zOut = pcaFunc(zOut, n_components=10) +zOut,re = pcaFunc(zOut, n_components=10) adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) listResult,size = generateLouvainCluster(edgeList) silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult) diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh index f752cb9..ba200aa 100644 --- a/results/louvain_magic.sh +++ b/results/louvain_magic.sh @@ -2,7 +2,7 @@ ######################### Batch Headers ######################### #SBATCH -A xulab #SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute -#SBATCH -J Louvain_magic +#SBATCH -J L_magic #SBATCH -o results-%j.out # give the job output a custom name #SBATCH -t 2-00:00 # two days time limit #SBATCH -N 1 # number of nodes @@ -12,6 +12,6 @@ module load miniconda3 source activate conda_R python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt -python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt -python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt -python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt \ No newline at end of file +# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt +# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt +# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt \ No newline at end of file From 07d9af1b785fe3a27e1ba51c059d2339be6ad089 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 17:54:27 -0600 Subject: [PATCH 103/117] add benchmark --- results/louvain_magic.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh index ba200aa..a3c0d89 100644 --- a/results/louvain_magic.sh +++ b/results/louvain_magic.sh @@ -11,7 +11,7 @@ ################################################################# module load miniconda3 source activate conda_R -python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt -# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt -# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt -# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt \ No newline at end of file +python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv \ No newline at end of file From 4f050f3cfb5732b2a69a4a33b22ebd44c9c80f80 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 18:00:44 -0600 Subject: [PATCH 104/117] add benchmark fw --- results/louvain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/louvain.py b/results/louvain.py index b8bf3d4..05f3ec8 100644 --- a/results/louvain.py +++ b/results/louvain.py @@ -35,5 +35,5 @@ resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs) print(resultstr) -with open(args.output) as fw: +with open(args.output,'w') as fw: fw.writelines("%s\n" % strr for strr in listResult) From 35eadb91c3890d215521dbdd4df0a3fc0d5299c2 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 18:16:48 -0600 Subject: [PATCH 105/117] =?UTF-8?q?=E2=80=98update=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- graph_function.py | 25 +++++++++++++++++++++++++ results/louvain.py | 2 +- results/louvain_magic.sh | 7 ++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/graph_function.py b/graph_function.py index f1c65b2..9e30b9a 100644 --- a/graph_function.py +++ b/graph_function.py @@ -71,6 +71,12 @@ def generateAdj(featureMatrix, graphType='KNNgraph', para = None, parallelLimit distanceType = parawords[0] k = int(parawords[1]) edgeList = calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType=distanceType, k=k) + elif graphType == 'KNNgraphStatsSingleThreadNoPrune': + if para != None: + parawords = para.split(':') + distanceType = parawords[0] + k = int(parawords[1]) + edgeList = calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType=distanceType, k=k) else: print('Should give graphtype') @@ -330,6 +336,25 @@ def calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType return edgeList +#para: measuareName:k:threshold no prune only +def calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType='euclidean', k=10, param=None): + r""" + Thresholdgraph: KNN Graph with stats one-std based methods, SingleThread version, no boundary, + """ + + edgeList=[] + for i in np.arange(featureMatrix.shape[0]): + tmp=featureMatrix[i,:].reshape(1,-1) + distMat = distance.cdist(tmp,featureMatrix, distanceType) + res = distMat.argsort()[:k+1] + for j in np.arange(1,k+1): + # TODO: check, only exclude large outliners + # if (distMat[0,res[0][j]]<=mean+std) and (distMat[0,res[0][j]]>=mean-std): + weight = 1.0 + edgeList.append((i,res[0][j],weight)) + + return edgeList + # kernelDistance def kernelDistance(distance,delta=1.0): ''' diff --git a/results/louvain.py b/results/louvain.py index 05f3ec8..967b2d6 100644 --- a/results/louvain.py +++ b/results/louvain.py @@ -11,7 +11,7 @@ help='parameter k in KNN graph (default: 10)') parser.add_argument('--knn-distance', type=str, default='euclidean', help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)') -parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread', +parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThreadNoPrune', help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)') #Benchmark related parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv', diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh index a3c0d89..5669824 100644 --- a/results/louvain_magic.sh +++ b/results/louvain_magic.sh @@ -14,4 +14,9 @@ source activate conda_R python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv # python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv # python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv \ No newline at end of file +# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv \ No newline at end of file From 696fcce085aaa937ad9eae746c355400a120c969 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 18:23:26 -0600 Subject: [PATCH 106/117] add all methods --- results/louvain.sh | 57 ++++++++++++++++++++++++++++++++++++++++ results/louvain_magic.sh | 22 ---------------- 2 files changed, 57 insertions(+), 22 deletions(-) create mode 100644 results/louvain.sh delete mode 100644 results/louvain_magic.sh diff --git a/results/louvain.sh b/results/louvain.sh new file mode 100644 index 0000000..99af7cc --- /dev/null +++ b/results/louvain.sh @@ -0,0 +1,57 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J L_magic +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R +python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +# python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +# python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +# python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +# python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh deleted file mode 100644 index 5669824..0000000 --- a/results/louvain_magic.sh +++ /dev/null @@ -1,22 +0,0 @@ -#! /bin/bash -######################### Batch Headers ######################### -#SBATCH -A xulab -#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute -#SBATCH -J L_magic -#SBATCH -o results-%j.out # give the job output a custom name -#SBATCH -t 2-00:00 # two days time limit -#SBATCH -N 1 # number of nodes -#SBATCH -n 1 # number of cores (AKA tasks) -#SBATCH --mem=128G -################################################################# -module load miniconda3 -source activate conda_R -python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv \ No newline at end of file From 64e9b37622de65abea036a7afd4fe416d7b09a01 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 18:26:07 -0600 Subject: [PATCH 107/117] add all methods --- results/louvain.sh | 78 +++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/results/louvain.sh b/results/louvain.sh index 99af7cc..4f0e9e0 100644 --- a/results/louvain.sh +++ b/results/louvain.sh @@ -16,42 +16,42 @@ python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_re python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv - -# python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv -# python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv -# python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv -# python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv +python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv +python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv +python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv From 534ed1b33b19f864d1a8dd51f20c65ad46e6abb7 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 20:13:27 -0600 Subject: [PATCH 108/117] fix a bug in dca --- codesfromJGandYJ/impute/dca_impute.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py index ffa4504..6b7b2a2 100644 --- a/codesfromJGandYJ/impute/dca_impute.py +++ b/codesfromJGandYJ/impute/dca_impute.py @@ -26,20 +26,17 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1): x = x.tolist() x=x.todense() x=np.asarray(x) + x=x.astype(int) features=x.T - #write dropout_filename = save_path+"dca_input.csv" with open(dropout_filename, "w") as f: writer = csv.writer(f) writer.writerows(features) - os.system("dca "+dropout_filename+ " "+save_path+"tmpdca") - filename=save_path+"tmpdca/mean.tsv" imputed_values = pd.read_csv(filename,sep="\t") imputed_values=imputed_values.T - np.save('/storage/htc/joshilab/wangjue/scGNN/dca/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values) datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] From 68f2e6d2924a7d1b99afc6378b5a8fd98e8136ab Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 9 Dec 2020 21:15:05 -0600 Subject: [PATCH 109/117] update name --- results/louvain.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results/louvain.sh b/results/louvain.sh index 4f0e9e0..3f51bea 100644 --- a/results/louvain.sh +++ b/results/louvain.sh @@ -2,7 +2,7 @@ ######################### Batch Headers ######################### #SBATCH -A xulab #SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute -#SBATCH -J L_magic +#SBATCH -J Louvain #SBATCH -o results-%j.out # give the job output a custom name #SBATCH -t 2-00:00 # two days time limit #SBATCH -N 1 # number of nodes From 137b5c61027d8ccdc17915737c76322f562ad435 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 10 Dec 2020 08:04:03 -0600 Subject: [PATCH 110/117] recheck dca --- results/results_impute_others_all.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index c2b6d9f..da15c51 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -33,7 +33,10 @@ # methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc'] # We should use only log(x+1) if the method permitted -methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] +# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] + +# Temp: just test dca +methodList = ['dca'] def outResults(datasetName,seed,ratio,method): featuresOriginal = load_data(datasetName, discreteTag=False) From 98649a84abe51f73cccb0cda846eddc24020b7c4 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 10 Dec 2020 09:14:43 -0600 Subject: [PATCH 111/117] only use 12/13 for dca --- results/results_impute_others_all.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index da15c51..d828554 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -17,8 +17,8 @@ # In HPC, call by sbatch submit_Impute_others.sh datasetList = [ - '9.Chung', - '11.Kolodziejczyk', + # '9.Chung', + # '11.Kolodziejczyk', '12.Klein', '13.Zeisel', ] From f3fbc5ff51282734787a43afb9a4524e525c5c3a Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 10 Dec 2020 09:47:02 -0600 Subject: [PATCH 112/117] back to full methods --- results/results_impute_others_all.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py index d828554..c2b6d9f 100644 --- a/results/results_impute_others_all.py +++ b/results/results_impute_others_all.py @@ -17,8 +17,8 @@ # In HPC, call by sbatch submit_Impute_others.sh datasetList = [ - # '9.Chung', - # '11.Kolodziejczyk', + '9.Chung', + '11.Kolodziejczyk', '12.Klein', '13.Zeisel', ] @@ -33,10 +33,7 @@ # methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc'] # We should use only log(x+1) if the method permitted -# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] - -# Temp: just test dca -methodList = ['dca'] +methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc'] def outResults(datasetName,seed,ratio,method): featuresOriginal = load_data(datasetName, discreteTag=False) From 5e5853abcc32a44301ba3a92bea8c4277f12ca3a Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 10 Dec 2020 17:37:53 -0600 Subject: [PATCH 113/117] add zero percentage calculation --- results/zeroPercentage.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 results/zeroPercentage.py diff --git a/results/zeroPercentage.py b/results/zeroPercentage.py new file mode 100644 index 0000000..cef85e8 --- /dev/null +++ b/results/zeroPercentage.py @@ -0,0 +1,24 @@ +#Calculate Zero percentage in each of the datasets +import numpy as np + +def calcu(dataset='9.Chung',ratio=0.0): + t=np.load('npyImputeG2E_1/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(dataset,ratio),allow_pickle=True) + t=t.tolist() + t=t.todense() + zeroNum = np.where(t==0)[0].shape[0] + allNum = t.shape[0]*t.shape[1] + percent = zeroNum/allNum + print('{} {} {}'.format(zeroNum,allNum,percent)) + +datasetList = [ + '9.Chung', + '11.Kolodziejczyk', + '12.Klein', + '13.Zeisel', +] + +ratioList = ['0.0','0.1','0.3','0.6','0.8'] + +for dataset in datasetList: + for ratio in ratioList: + calcu(dataset, ratio) \ No newline at end of file From 4cf1e7df2b8c9e389e05617b539341aaf8224774 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 12 Dec 2020 14:45:21 -0600 Subject: [PATCH 114/117] add tmp results of celltype --- results/results.sh | 25 +++++++++++++++++++++++++ results/results_tmp.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 results/results.sh create mode 100644 results/results_tmp.py diff --git a/results/results.sh b/results/results.sh new file mode 100644 index 0000000..e1f5d8e --- /dev/null +++ b/results/results.sh @@ -0,0 +1,25 @@ +#! /bin/bash +######################### Batch Headers ######################### +#SBATCH -A xulab +#SBATCH -p Lewis,BioCompute # use the BioCompute partition Lewis,BioCompute +#SBATCH -J Louvain +#SBATCH -o results-%j.out # give the job output a custom name +#SBATCH -t 2-00:00 # two days time limit +#SBATCH -N 1 # number of nodes +#SBATCH -n 1 # number of cores (AKA tasks) +#SBATCH --mem=128G +################################################################# +module load miniconda3 +source activate conda_R + +python -W ignore results_tmp.py --inputOri othermethods/saucie/12.Klein_0.0_1_recon.npy --input otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore results_tmp.py --inputOri othermethods/saucie/13.Zeisel_0.0_1_recon.npy --input otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore results_tmp.py --inputOri othermethods/scvi/12.Klein_0.0_1_recon.npy --input otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore results_tmp.py --inputOri othermethods/scvi/13.Zeisel_0.0_1_recon.npy --input otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --input otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --input otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv + +python -W ignore results_tmp.py --inputOri othermethods/scIGANs/12.Klein_0.0_1_recon.npy --input otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv +python -W ignore results_tmp.py --inputOri othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --input otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv diff --git a/results/results_tmp.py b/results/results_tmp.py new file mode 100644 index 0000000..97aab4d --- /dev/null +++ b/results/results_tmp.py @@ -0,0 +1,31 @@ +import os, sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +# sys.path.append('../') +import numpy as np +from util_function import * +from graph_function import * +import argparse + +parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem') +#Benchmark related +parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv', + help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)') +parser.add_argument('--input', type=str, default='filename', + help='input filename') +parser.add_argument('--inputOri', type=str, default='filename', + help='input filename') +args = parser.parse_args() + +#Benchmark +bench_pd=pd.read_csv(args.benchmark,index_col=0) +bench_celltype=bench_pd.iloc[:,0].to_numpy() + + +#'saucie/13.txt' +z_pd = pd.read_csv(args.input,header=None) +listResult = z_pd.iloc[:,0].to_numpy() +zOut = np.load(args.inputOri,allow_pickle=True) +silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult) +ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult) +resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs) +print(resultstr) From b1317c522d64fae796d050a492c696cfd463951a Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 12 Dec 2020 16:38:01 -0600 Subject: [PATCH 115/117] add saucie plot --- codesfromJGandYJ/impute/SAUCIE_impute.py | 25 +++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py index 874c7c1..a4b0b14 100644 --- a/codesfromJGandYJ/impute/SAUCIE_impute.py +++ b/codesfromJGandYJ/impute/SAUCIE_impute.py @@ -33,10 +33,31 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): model = SAUCIE.SAUCIE(x.shape[1]) # train the model! model.train(loader_train, steps=2000) + #imputation reconstruction = model.get_reconstruction(loader_eval) reconstruction=np.transpose(reconstruction) np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction) +def plot_saucie(seed=1, datasetName='9.Chung', ratio=0.1): + filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio) + x = np.load(filename,allow_pickle=True) + x = x.tolist() + x=x.todense() + x=np.asarray(x) + x=np.log(x+1) + loader_eval = SAUCIE.Loader(x, shuffle=False) + # clear the computational graph + #plot + tf.reset_default_graph() + model = SAUCIE.SAUCIE(x.shape[1]) + model.train(loader_eval, steps=2000) + embedding = model.get_embedding(loader_eval) + num_clusters, clusters = model.get_clusters(loader_eval) + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.scatter(embedding[:, 0], embedding[:, 1], c=clusters) + fig.savefig('saucie_'+datasetName+'.png') + datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel'] seedList = ['1','2','3'] ratioList = [0.1, 0.3, 0.6, 0.8] @@ -48,4 +69,6 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1): for datasetName in datasetNameList: for seed in seedList: for ratio in ratioList: - impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio) \ No newline at end of file + impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio) + +# plot_saucie(seed='1', datasetName=datasetName, ratio='0.0') \ No newline at end of file From 7d2c74a3148f92f6844787484f567b9b7951d2ff Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 27 Dec 2020 10:37:11 -0600 Subject: [PATCH 116/117] add netNMF and scIGAN --- .../codeForCellcluster/Run_netNMF_celltype.py | 71 +++++++++++++++ .../impute/Run_netNMF_imputation.py | 87 +++++++++++++++++++ .../impute/run_scIGANS_imputation.py | 51 +++++++++++ 3 files changed, 209 insertions(+) create mode 100644 codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py create mode 100644 codesfromJGandYJ/impute/Run_netNMF_imputation.py create mode 100644 codesfromJGandYJ/impute/run_scIGANS_imputation.py diff --git a/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py new file mode 100644 index 0000000..19e5b1f --- /dev/null +++ b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py @@ -0,0 +1,71 @@ +# This code has not cleaned yet +# run netNMF-sc from command line and save outputs to specified directory +from __future__ import print_function +import numpy as np +from warnings import warn +from joblib import Parallel, delayed +import copy,argparse,os,math,random,time +from scipy import sparse, io,linalg +from scipy.sparse import csr_matrix +import warnings,os +from netNMFsc import plot +warnings.simplefilter(action='ignore', category=FutureWarning) +import pandas as pd + +def main(args): + if args.method == 'GD': + from netNMFsc import netNMFGD + operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1) + elif args.method == 'MU': + from netNMFsc import netNMFMU + operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1) + + + chung = pd.read_csv(args.filename, header=0, + index_col=0, sep=',') + X = chung.values + genes = [] + for gen in chung.index.values: + if '.' in gen: + genes.append(gen.upper().split('.')[0]) + else: + genes.append(gen.upper()) + #print(genes) + operator.X = X + operator.genes = np.asarray(genes) + #operator.load_10X(direc=args.tenXdir,genome='mm10') + operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity) + dictW = operator.fit_transform() + W, H = dictW['W'], dictW['H'] + k,clusters = plot.select_clusters(H,max_clusters=20) + plot.tSNE(H,clusters,fname=args.direc + '/netNMFsc_tsne') + os.system('mkdir -p %s'%(args.direc)) + np.save(os.path.join(args.direc,'W.npy'),W) + np.save(os.path.join(args.direc,'H.npy'),H) + np.save(os.path.join(args.direc, 'cluster.npy'), clusters) + return +#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/ + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD') + parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx') + parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv') + parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='') + parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='') + parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human') + parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble') + parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez') + parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0) + parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99) + parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500) + parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2) + parser.add_argument("-d","--direc", help="directory to save files",default='') + parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10) + parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0) + parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '') + args = parser.parse_args() + main(args) + + +#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv' diff --git a/codesfromJGandYJ/impute/Run_netNMF_imputation.py b/codesfromJGandYJ/impute/Run_netNMF_imputation.py new file mode 100644 index 0000000..8c74b72 --- /dev/null +++ b/codesfromJGandYJ/impute/Run_netNMF_imputation.py @@ -0,0 +1,87 @@ +# This code has not cleaned yet +# run netNMF-sc from command line and save outputs to specified directory +from __future__ import print_function +import numpy as np +from warnings import warn +from joblib import Parallel, delayed +import copy,argparse,os,math,random,time +from scipy import sparse, io,linalg +from scipy.sparse import csr_matrix +import warnings,os +from netNMFsc import plot +warnings.simplefilter(action='ignore', category=FutureWarning) +import pandas as pd + +def main(args): + if args.method == 'GD': + from netNMFsc import netNMFGD + operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4) + elif args.method == 'MU': + from netNMFsc import netNMFMU + operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4) + + filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format( + args.Randomdata, args.datasetName,args.dropratio) + x = np.load(filename, allow_pickle=True) + x = x.tolist() + x = x.todense() + x = np.asarray(x) + if args.process == 'log': + x = np.log(x + 1) + + # transpose and add names for rows and cols + features = np.transpose(x) + + chung = pd.read_csv(args.filename, header=0, + index_col=0, sep=',') + X = features + genes = [] + for gen in chung.index.values: + if '.' in gen: + genes.append(gen.upper().split('.')[0]) + else: + genes.append(gen.upper()) + #print(genes) + operator.genes = np.asarray(genes) + operator.X = X + #operator.load_10X(direc=args.tenXdir,genome='mm10') + operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity) + dictW = operator.fit_transform() + W, H = dictW['W'], dictW['H'] + # k,clusters = plot.select_clusters(H,max_clusters=20) + # plot.tSNE(H,clusters,fname=args.direc+ '/netNMFsc_tsne_imputation_' +args.process +'_'+args.Randomdata) + # os.system('mkdir -p %s'%(args.direc)) + np.save(os.path.join(args.direc,args.Randomdata+'_'+args.process+'_imputation.npy'),np.dot(W,H)) + #np.save(os.path.join(args.direc,'H.npy'),H) + #np.save(os.path.join(args.direc, 'cluster.npy'), H) + return +#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/ + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD') + parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx') + parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv') + parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='') + parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='') + parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human') + parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble') + parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez') + parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0) + parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99) + parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500) + parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2) + parser.add_argument("-d","--direc", help="directory to save files",default='') + parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10) + parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0) + parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '') + parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1', help='npyImputeG2E_1,2,3') + parser.add_argument('--datasetName', type=str, default='12.Klein', help='12.Klein,13.Zeisel') + parser.add_argument('--process', type=str, default='null', help='log/null to process data') + parser.add_argument("-Hasdot","--Hasdot",type = bool, help="data gene names has dot",default = True) + parser.add_argument('--dropratio', type=str, default='0.1', help='0.1,0.3,0.6,0.8') + args = parser.parse_args() + main(args) + + +#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv' diff --git a/codesfromJGandYJ/impute/run_scIGANS_imputation.py b/codesfromJGandYJ/impute/run_scIGANS_imputation.py new file mode 100644 index 0000000..20faf7d --- /dev/null +++ b/codesfromJGandYJ/impute/run_scIGANS_imputation.py @@ -0,0 +1,51 @@ +# This code has not cleaned yet +import sys,os +import numpy as np +import pandas as pd +import argparse +sys.path.append('../') +sys.path.append('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/scIGANs/') + +parser = argparse.ArgumentParser(description='') +parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1',help='npyImputeG2E_1,2,3') +parser.add_argument('--datasetName', type=str, default='12.Klein',help='12.Klein,13.Zeisel') +parser.add_argument('--process', type=str, default='null',help='log/null to process data') +parser.add_argument('--exec', type=str, default='scIGANs',help='12.Klein') +parser.add_argument('--dropratio', type=str, default='0.1',help='0.1,0.3,0.6,0.8') +parser.add_argument('--csvsavepath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein') +parser.add_argument('--labelpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein') +parser.add_argument('--outpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200/',help='12.Klein') +parser.add_argument('--Epotch', type=str, default='200',help='epotch') +args = parser.parse_args() + +# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0) + +filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(args.Randomdata,args.datasetName,args.dropratio) +x = np.load(filename,allow_pickle=True) +x = x.tolist() +x=x.todense() +x=np.asarray(x) +if args.process=='log': + x=np.log(x+1) + saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features_log.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata, + args.datasetName,args.dropratio) +elif args.process=='null': + saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata, + args.datasetName,args.dropratio) +#transpose and add names for rows and cols +features=np.transpose(x) + +pd.DataFrame(features).to_csv(saveintedir,sep='\t') + +label = '{}{}/{}_only_label.csv'.format(args.labelpath,args.datasetName,args.datasetName.split('.')[-1]) +#/storage/htc/joshilab/jghhd/singlecellTest/Data/12.Klein/Klein_only_label.csv + +cmd = '{} {} -l {} -e {} -o {}{}'.format(args.exec,saveintedir,label,args.Epotch,args.outpath,args.datasetName) +print(cmd) +os.system(cmd) +#scIGANs saveintedir -l -e 50 + +# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix) +# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='') + +#np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction) From 89af13cfc0fe53cac38a88344bc5b07d65f59c3e Mon Sep 17 00:00:00 2001 From: juexinwang Date: Thu, 18 Feb 2021 22:41:21 -0600 Subject: [PATCH 117/117] Create choose_louvain.py Add how and why Louvain works with python-igraph, but python-louvain does not work --- scripts/choose_louvain.py | 104 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 scripts/choose_louvain.py diff --git a/scripts/choose_louvain.py b/scripts/choose_louvain.py new file mode 100644 index 0000000..42cbe0c --- /dev/null +++ b/scripts/choose_louvain.py @@ -0,0 +1,104 @@ +# Script to test efficiency of louvain + +# Option 1: Original version, use r version of louvain, it takes time to link R, and need install rpy2. +# Not use anymore +# Clustering is different between Case one and two +import pandas as pd +import rpy2.robjects as ro +from rpy2.robjects.packages import importr +from rpy2.robjects import r, pandas2ri +pandas2ri.activate() + +# case one: +edgeList = [] +edgeList.append((0,2,1.0)) +edgeList.append((1,2,1.0)) +edgeList.append((2,3,1.0)) +edgeList.append((3,4,1.0)) +edgeList.append((4,5,1.0)) +edgeList.append((4,6,1.0)) + +# case two: +edgeList.append((0,2,1.0)) +edgeList.append((1,2,1.0)) +edgeList.append((2,3,0.1)) +edgeList.append((3,4,1.0)) +edgeList.append((4,5,1.0)) +edgeList.append((4,6,1.0)) + +fromVec = [] +toVec = [] +weightVec = [] +for edge in edgeList: + fromVec.append(edge[0]) + toVec.append(edge[1]) + weightVec.append(edge[2]) + +igraph = importr('igraph') +base = importr('base') +fromV = ro.FloatVector(fromVec) +toV = ro.FloatVector(toVec) +# weightV= ro.FloatVector([0.1,1.0,1.0,0.1,1.0]) +weightV= ro.FloatVector(weightVec) +links = ro.DataFrame({'from':fromV,'to':toV,'weight':weightV}) +g = igraph.graph_from_data_frame(links,directed = False) +cl = igraph.cluster_louvain(g) + +def as_dict(vector): + """Convert an RPy2 ListVector to a Python dict""" + result = {} + for i, name in enumerate(vector.names): + if isinstance(vector[i], ro.ListVector): + result[name] = as_dict(vector[i]) + elif len(vector[i]) == 1: + result[name] = vector[i][0] + else: + result[name] = vector[i] + return result + +cl_dict = as_dict(cl) +df = pd.DataFrame() +# df['Cluster']=cl_dict['membership'] +size = float(len(set(cl_dict['membership']))) + +listResult=[] +count = 0 +for i in range(len(cl_dict['membership'])): + listResult.append(int(cl_dict['membership'][i])-1) + count += 1 + +# Option 2: use package python-louvain, but does not work +# Clustering is identical between Case one and two, so we cannot use it +import networkx as nx +import community as community_louvain +G = nx.Graph() +G.add_weighted_edges_from(edgeList) +partition = community_louvain.best_partition(G,weight='weight') + + +# Option 3: use igraph, pure python and looks right +# Clustering is identical between Case one and two, so we cannot use it +import numpy as np +from igraph import * +#Case 1: +W=np.zeros((7,7)) +W[0,2]=1.0 +W[1,2]=1.0 +W[2,3]=1.0 +W[3,4]=1.0 +W[4,5]=1.0 +W[4,6]=1.0 + +#Case 2: +W=np.zeros((7,7)) +W[0,2]=1.0 +W[1,2]=1.0 +W[2,3]=0.1 +W[3,4]=1.0 +W[4,5]=1.0 +W[4,6]=1.0 + +graph = Graph.Weighted_Adjacency(W.tolist(), mode=ADJ_UNDIRECTED, attr="weight", loops=False) +louvain_partition = graph.community_multilevel(weights=graph.es['weight'], return_levels=False) +print(louvain_partition) +