From e871a267e35a1cfb9226fc53ff0acdd1562393c0 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Thu, 12 Nov 2020 23:06:05 -0600
Subject: [PATCH 001/117] add ablation

---
 generating_Impute_0.1-0.8-ablation.py | 80 +++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 generating_Impute_0.1-0.8-ablation.py

diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py
new file mode 100644
index 0000000..6aad7b4
--- /dev/null
+++ b/generating_Impute_0.1-0.8-ablation.py
@@ -0,0 +1,80 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+parser.add_argument('--imputeMode', action='store_true', default=True,
+                    help='whether impute')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('run_experiment_2_g_e_L_1 2geL1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 1 --npyDir','npyG2EL_1/'),
+    ('run_experiment_1_g_e_1 1ge1','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG1E_1/'),
+    ('run_experiment_2_g_f_1 2gf1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 1 --npyDir','npyG2F_1/'),
+    ('run_experiment_2_n_e_LK_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'),
+    ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'),
+
+    ('run_experiment_2_g_e_L_2 2geL2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 2 --npyDir','npyG2EL_2/'),
+    ('run_experiment_1_g_e_2 1ge2','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG1E_2/'),
+    ('run_experiment_2_g_f_2 2gf2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 2 --npyDir','npyG2F_2/'),
+    ('run_experiment_2_n_e_LK_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'),
+    ('run_experiment_2_g_e_2 2ge2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG2E_2/'),
+
+    ('run_experiment_2_g_e_L_3 2geL3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 3 --npyDir','npyG2EL_3/'),
+    ('run_experiment_1_g_e_3 1ge3','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG1E_3/'),
+    ('run_experiment_2_g_f_3 2gf3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 3 --npyDir','npyG2F_3/'),
+    ('run_experiment_2_n_e_LK_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'),
+    ('run_experiment_2_g_e_3 2ge3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG2E_3/'),
+]
+
+dropoutList = ['0.1','0.3','0.6','0.8']
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,scGNNparam,outDirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    if args.imputeMode:
+        tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute')
+        tmpstr2 = "I"+tmpstr2
+        # tmpstr2 = "I"+tmpstr2[2:]
+        imputeStr = ' --imputeMode  '
+        outDirStr = "npyImpute"+outDirStr[3:]
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2   
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()

From 0ce6b2ed6850cf55c1ca026d78b5e12215483cdc Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Thu, 12 Nov 2020 23:31:39 -0600
Subject: [PATCH 002/117] add ablation tests on imputation

---
 generating_Impute_0.1-0.8-ablation.py        |  6 ++--
 submitCluster_imputation_0.1-0.8-ablation.sh | 35 ++++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 submitCluster_imputation_0.1-0.8-ablation.sh

diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py
index 6aad7b4..86773f7 100644
--- a/generating_Impute_0.1-0.8-ablation.py
+++ b/generating_Impute_0.1-0.8-ablation.py
@@ -31,19 +31,19 @@
     ('run_experiment_2_g_e_L_1 2geL1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 1 --npyDir','npyG2EL_1/'),
     ('run_experiment_1_g_e_1 1ge1','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG1E_1/'),
     ('run_experiment_2_g_f_1 2gf1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 1 --npyDir','npyG2F_1/'),
-    ('run_experiment_2_n_e_LK_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'),
+    ('run_experiment_2_n_e_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'),
     ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'),
 
     ('run_experiment_2_g_e_L_2 2geL2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 2 --npyDir','npyG2EL_2/'),
     ('run_experiment_1_g_e_2 1ge2','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG1E_2/'),
     ('run_experiment_2_g_f_2 2gf2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 2 --npyDir','npyG2F_2/'),
-    ('run_experiment_2_n_e_LK_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'),
+    ('run_experiment_2_n_e_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'),
     ('run_experiment_2_g_e_2 2ge2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG2E_2/'),
 
     ('run_experiment_2_g_e_L_3 2geL3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 3 --npyDir','npyG2EL_3/'),
     ('run_experiment_1_g_e_3 1ge3','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG1E_3/'),
     ('run_experiment_2_g_f_3 2gf3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 3 --npyDir','npyG2F_3/'),
-    ('run_experiment_2_n_e_LK_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'),
+    ('run_experiment_2_n_e_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'),
     ('run_experiment_2_g_e_3 2ge3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG2E_3/'),
 ]
 
diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh
new file mode 100644
index 0000000..d558529
--- /dev/null
+++ b/submitCluster_imputation_0.1-0.8-ablation.sh
@@ -0,0 +1,35 @@
+mkdir npyImputeG2E_1
+mkdir npyImputeG2EL_1
+mkdir npyImputeG2F_1
+mkdir npyImputeN2E_1
+mkdir npyImputeG1E_1
+
+mkdir npyImputeG2E_2
+mkdir npyImputeG2EL_2
+mkdir npyImputeG2F_2
+mkdir npyImputeN2E_2
+mkdir npyImputeG1E_2
+
+mkdir npyImputeG2E_3
+mkdir npyImputeG2EL_3
+mkdir npyImputeG2F_3
+mkdir npyImputeN2E_3
+mkdir npyImputeG1E_3
+
+for i in {1..3}
+do
+for j in {0.1,0.3,0.6,0.8}
+do
+sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
+
+sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
+done
+done
\ No newline at end of file

From 157a7730bd6746e026086cae2b550466cd3eec9a Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 16:48:56 -0600
Subject: [PATCH 003/117] add plot

---
 main_benchmark.py            |  3 +--
 results/plot_distribution.py | 40 ++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 results/plot_distribution.py

diff --git a/main_benchmark.py b/main_benchmark.py
index 101de9f..31f0942 100644
--- a/main_benchmark.py
+++ b/main_benchmark.py
@@ -23,8 +23,7 @@
 # Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script.
 parser = argparse.ArgumentParser(description='Graph EM AutoEncoder for scRNA')
 parser.add_argument('--datasetName', type=str, default='1.Biase',
-                    help='TGFb/sci-CAR/sci-CAR_LTMG/MMPbasal/MMPbasal_all/MMPbasal_allgene/MMPbasal_allcell/MMPepo/MMPbasal_LTMG/MMPbasal_all_LTMG/MMPbasal_2000')
-# Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
 parser.add_argument('--batch-size', type=int, default=12800, metavar='N',
                     help='input batch size for training (default: 12800)')
 parser.add_argument('--epochs', type=int, default=500, metavar='N',
diff --git a/results/plot_distribution.py b/results/plot_distribution.py
new file mode 100644
index 0000000..6248850
--- /dev/null
+++ b/results/plot_distribution.py
@@ -0,0 +1,40 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+
+parser = argparse.ArgumentParser(description='Infer Spatial from Expression in single cells')
+
+parser.add_argument('--datasetName', type=str, default='1.Biase',
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
+parser.add_argument('--para', type=str, default='LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1',
+                    help='save npy results in directory')
+parser.add_argument('--inDir', type=str, default='npyGraphTest/',
+                    help='save npy results in directory')
+parser.add_argument('--outDir', type=str, default='DistNpy/',
+                    help='save npy results in directory')
+args = parser.parse_args()
+
+
+ix=np.load(args.datasetName+'_'+args.para+'_dropix.npy')
+i =np.load(args.datasetName+'_'+args.para+'_dropi.npy')
+j =np.load(args.datasetName+'_'+args.para+'_dropj.npy')
+recon   =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
+features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
+features=features.tolist()
+
+_ = plt.hist(features.ravel())
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
+plt.close()
+
+features_log = np.log(features+1)
+_ = plt.hist(features_log.ravel(),bin=100)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
+plt.close()
+
+_ = plt.hist(recon.ravel(),bin=100)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
+plt.close()
+
+recon_exp = np.exp(recon)-1
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
+plt.close()
\ No newline at end of file

From 751360aed9f6d09463454de48dafacb8f8206311 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 17:30:15 -0600
Subject: [PATCH 004/117] update dist

---
 results/plot_distribution.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/results/plot_distribution.py b/results/plot_distribution.py
index 6248850..b4d4115 100644
--- a/results/plot_distribution.py
+++ b/results/plot_distribution.py
@@ -21,6 +21,7 @@
 recon   =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
 features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
 features=features.tolist()
+features=features.todense()
 
 _ = plt.hist(features.ravel())
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')

From 0534832ac513a2c874f1d2f47712513f61c18787 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 18:38:40 -0600
Subject: [PATCH 005/117] reconstruct

---
 results/plot_distribution.py => plot_distribution.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename results/plot_distribution.py => plot_distribution.py (100%)

diff --git a/results/plot_distribution.py b/plot_distribution.py
similarity index 100%
rename from results/plot_distribution.py
rename to plot_distribution.py

From 37c1a8b042c797f3b9cb50ca5993bc06b5724929 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 18:39:25 -0600
Subject: [PATCH 006/117] reconstruct

---
 plot_distribution.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/plot_distribution.py b/plot_distribution.py
index b4d4115..f55f1ff 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -15,11 +15,11 @@
 args = parser.parse_args()
 
 
-ix=np.load(args.datasetName+'_'+args.para+'_dropix.npy')
-i =np.load(args.datasetName+'_'+args.para+'_dropi.npy')
-j =np.load(args.datasetName+'_'+args.para+'_dropj.npy')
-recon   =np.load(args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
-features=np.load(args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
+ix=np.load(args.inDir+args.datasetName+'_'+args.para+'_dropix.npy')
+i =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropi.npy')
+j =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropj.npy')
+recon   =np.load(args.inDir+args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
+features=np.load(args.inDir+args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
 features=features.tolist()
 features=features.todense()
 

From 5f1dfc99096461e52cb4c5c08575586e9859332f Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 23:00:04 -0600
Subject: [PATCH 007/117] change numpy hist

---
 plot_distribution.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/plot_distribution.py b/plot_distribution.py
index f55f1ff..c0471d0 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -23,19 +23,47 @@
 features=features.tolist()
 features=features.todense()
 
-_ = plt.hist(features.ravel())
+# Directly use plt histogram
+# _ = plt.hist(features.ravel())
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
+# plt.close()
+
+# features_log = np.log(features+1)
+# _ = plt.hist(features_log.ravel(),bin=100)
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
+# plt.close()
+
+# _ = plt.hist(recon.ravel(),bin=100)
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
+# plt.close()
+
+# recon_exp = np.exp(recon)-1
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
+# plt.close()
+
+# Use numpy histogram
+hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100))
+plt.bar(bin_edges[:-1], hist, width = 1)
+plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
 plt.close()
 
 features_log = np.log(features+1)
-_ = plt.hist(features_log.ravel(),bin=100)
+hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1))
+plt.bar(bin_edges[:-1], hist, width = 1)
+plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
 plt.close()
 
-_ = plt.hist(recon.ravel(),bin=100)
+hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1))
+plt.bar(bin_edges[:-1], hist, width = 1)
+plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
 plt.close()
 
 recon_exp = np.exp(recon)-1
+hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1))
+plt.bar(bin_edges[:-1], hist, width = 1)
+plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
 plt.close()
\ No newline at end of file

From bc3fc5a8cbc28287158d9703789a74bb5d28bdab Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Sun, 15 Nov 2020 23:48:59 -0600
Subject: [PATCH 008/117] change numpy hist

---
 plot_distribution.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/plot_distribution.py b/plot_distribution.py
index c0471d0..1b103f7 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -24,6 +24,8 @@
 features=features.todense()
 
 # Directly use plt histogram
+# Careful! plt.hist does not work for huge datasets
+
 # _ = plt.hist(features.ravel())
 # plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
 # plt.close()
@@ -43,6 +45,7 @@
 
 # Use numpy histogram
 hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100))
+print(hist)
 plt.bar(bin_edges[:-1], hist, width = 1)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
@@ -50,12 +53,14 @@
 
 features_log = np.log(features+1)
 hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1))
+print(hist)
 plt.bar(bin_edges[:-1], hist, width = 1)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
 plt.close()
 
 hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1))
+print(hist)
 plt.bar(bin_edges[:-1], hist, width = 1)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
@@ -63,6 +68,7 @@
 
 recon_exp = np.exp(recon)-1
 hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1))
+print(hist)
 plt.bar(bin_edges[:-1], hist, width = 1)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')

From 6b0c31344630d6b5ab2f7f726e5c7de4f50fd6ec Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Mon, 16 Nov 2020 00:30:56 -0600
Subject: [PATCH 009/117] change numpy hist

---
 plot_distribution.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/plot_distribution.py b/plot_distribution.py
index 1b103f7..7d7764a 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -44,32 +44,33 @@
 # plt.close()
 
 # Use numpy histogram
-hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),100))
+hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),10))
 print(hist)
-plt.bar(bin_edges[:-1], hist, width = 1)
+# plt.bar(bin_edges[:-1], hist, width = 1)
+plt.bar(bin_edges[:-1], hist)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
 plt.close()
 
 features_log = np.log(features+1)
-hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.1))
+hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.01))
 print(hist)
-plt.bar(bin_edges[:-1], hist, width = 1)
+plt.bar(bin_edges[:-1], hist)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
 plt.close()
 
-hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.1))
+hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.01))
 print(hist)
-plt.bar(bin_edges[:-1], hist, width = 1)
+plt.bar(bin_edges[:-1], hist)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
 plt.close()
 
 recon_exp = np.exp(recon)-1
-hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),0.1))
+hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),10))
 print(hist)
-plt.bar(bin_edges[:-1], hist, width = 1)
+plt.bar(bin_edges[:-1], hist)
 plt.xlim(min(bin_edges), max(bin_edges))
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
 plt.close()
\ No newline at end of file

From fd5c4ae4cf852a7e06ae2fe1fd7fcd4a55821a65 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Mon, 16 Nov 2020 10:10:52 -0600
Subject: [PATCH 010/117] change numpy hist

---
 plot_distribution.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/plot_distribution.py b/plot_distribution.py
index 7d7764a..06196bc 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -43,34 +43,45 @@
 # plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
 # plt.close()
 
+# Something wrong, have to change to here:
+# plt.bar(bin_edges[:-1], hist)
+# plt.xlim(min(bin_edges), max(bin_edges))
+
 # Use numpy histogram
-hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features),10))
+hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features)+10,10))
 print(hist)
-# plt.bar(bin_edges[:-1], hist, width = 1)
-plt.bar(bin_edges[:-1], hist)
-plt.xlim(min(bin_edges), max(bin_edges))
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
 plt.close()
 
 features_log = np.log(features+1)
-hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features),0.01))
+hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.01,0.01))
 print(hist)
-plt.bar(bin_edges[:-1], hist)
-plt.xlim(min(bin_edges), max(bin_edges))
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
 plt.close()
 
-hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon),0.01))
+hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.01,0.01))
 print(hist)
-plt.bar(bin_edges[:-1], hist)
-plt.xlim(min(bin_edges), max(bin_edges))
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
 plt.close()
 
 recon_exp = np.exp(recon)-1
-hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features),10))
+hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features)+10,10))
 print(hist)
-plt.bar(bin_edges[:-1], hist)
-plt.xlim(min(bin_edges), max(bin_edges))
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
 plt.close()
\ No newline at end of file

From 4398da535285e6c2b997eb18d9492d2d358b72f1 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Mon, 16 Nov 2020 10:22:39 -0600
Subject: [PATCH 011/117] change fig

---
 plot_distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plot_distribution.py b/plot_distribution.py
index 06196bc..3aad4be 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -77,7 +77,7 @@
 plt.close()
 
 recon_exp = np.exp(recon)-1
-hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(features)+10,10))
+hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(recon_exp)+10,10))
 print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)

From 8677a704b2d23e2654dc05705f8fdf7bd414dab5 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 09:22:43 -0600
Subject: [PATCH 012/117] add r support

---
 plot_distribution.py | 27 ++++++++++++++--
 plot_distribution.r  | 77 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 plot_distribution.r

diff --git a/plot_distribution.py b/plot_distribution.py
index 3aad4be..aa09b2b 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -1,6 +1,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import argparse
+from scipy.stats import chi2_contingency
+from scipy.stats import nbinom
 
 parser = argparse.ArgumentParser(description='Infer Spatial from Expression in single cells')
 
@@ -18,6 +20,8 @@
 ix=np.load(args.inDir+args.datasetName+'_'+args.para+'_dropix.npy')
 i =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropi.npy')
 j =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropj.npy')
+# recon   =np.load('12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_recon.npy',allow_pickle=True)
+# features=np.load('/Users/juexinwang/Downloads/temp/12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features.npy',allow_pickle=True)
 recon   =np.load(args.inDir+args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
 features=np.load(args.inDir+args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
 features=features.tolist()
@@ -58,7 +62,7 @@
 plt.close()
 
 features_log = np.log(features+1)
-hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.01,0.01))
+hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.1,0.1))
 print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)
@@ -67,7 +71,7 @@
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
 plt.close()
 
-hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.01,0.01))
+hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.1,0.1))
 print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)
@@ -84,4 +88,21 @@
 plt.xticks(x_pos, bin_edges[:-1])
 plt.xticks(rotation=90)
 plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
-plt.close()
\ No newline at end of file
+plt.close()
+
+#test
+# find x,y in 2D matrix
+# numpy.unravel_index(a.argmax(), a.shape)
+# data = [[207, 282, 241], [282, 240, 234, 3]]
+# chi2_contingency(data)
+np.savetxt(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.txt', features, fmt='%d')
+
+# https://stats.stackexchange.com/questions/260580/negative-binomial-distribution-with-python-scipy-stats
+# https://en.wikipedia.org/wiki/Negative_binomial_distribution#Alternative_formulations
+# mean = np.mean(features)
+# var  = np.var(features)
+# p = (var-mean)/var
+# r = mean**2/(var-mean)
+# x = np.arange(nbinom.ppf(0.01, p, r),nbinom.ppf(0.99, p, r))
+# ax.plot(x, nbinom.pmf(x, p, r), 'bo', ms=8, label='nbinom pmf')
+
diff --git a/plot_distribution.r b/plot_distribution.r
new file mode 100644
index 0000000..d2ab09b
--- /dev/null
+++ b/plot_distribution.r
@@ -0,0 +1,77 @@
+# R
+# Running after plot_distribution.py
+
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf
+# https://arxiv.org/pdf/1810.02618.pdf
+# https://rdrr.io/cran/gamlss.dist/man/ZANBI.html
+
+#install in conda:
+# https://anaconda.org/conda-forge/r-fitdistrplus
+# https://anaconda.org/conda-forge/r-gamlss
+# install.packages("fitdistrplus")
+# install.packages("gamlss")
+library(fitdistrplus)
+library(gamlss)
+
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least four argument must be supplied (input file).n", call.=FALSE)
+}
+
+datasetName=args[1]
+para=args[2]
+indir=args[3]
+outdir=args[4]
+
+features = read.table(paste(indir,"/",datasetName,"_",para,"_features.txt",sep=''), header = FALSE, sep = " ")
+features = data.matrix(features)
+features = as.vector(features)
+features = as.numeric(features)
+
+mu_ = mean(features)
+sigma_ = (sd(features)-mean(features))/mean(features)**2
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219
+fit_nbi = fitdist(features, 'NBI',   start = list(mu = mu_, sigma = sigma_ ))
+tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep=''))
+plot(fit_zinb)
+dev.off()
+
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221
+nu_ = 1-length(which(features!=0))/(length(features))
+fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_))
+gofstat(fit_zinb)
+tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff",sep=''))
+plot(fit_zinb)
+dev.off()
+
+fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_))
+gofstat(fit_zinb_)
+tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff_",sep=''))
+plot(fit_zinb_)
+dev.off()
+
+
+# NBI:
+# Goodness-of-fit statistics
+#                                 1-mle-NBI
+# Kolmogorov-Smirnov statistic 3.671374e-01
+# Cramer-von Mises statistic   1.016737e+05
+# Anderson-Darling statistic            Inf
+
+# Goodness-of-fit criteria
+#                                1-mle-NBI
+# Akaike's Information Criterion  25429885
+# Bayesian Information Criterion  25429912
+
+
+# ZINB
+# Goodness-of-fit statistics
+#                               1-mle-ZINBI
+# Kolmogorov-Smirnov statistic 4.532250e-01
+# Cramer-von Mises statistic   1.873046e+05
+# Anderson-Darling statistic            Inf
+
+# Goodness-of-fit criteria
+#                                1-mle-ZINBI
+# Akaike's Information Criterion    25969108
+# Bayesian Information Criterion    25969135
\ No newline at end of file

From a2b27d120b46497a0b4231736930d05e170c449f Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 11:13:50 -0600
Subject: [PATCH 013/117] add generating distribution

---
 generating_distribution.py | 85 ++++++++++++++++++++++++++++++++++++++
 plot_distribution.r        |  1 +
 2 files changed, 86 insertions(+)
 create mode 100644 generating_distribution.py

diff --git a/generating_distribution.py b/generating_distribution.py
new file mode 100644
index 0000000..625d4ef
--- /dev/null
+++ b/generating_distribution.py
@@ -0,0 +1,85 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('plot_G2E_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+
+    ('plot_G2EL_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+
+    ('plot_G1E_0.1 G1E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.3 G1E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.6 G1E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.8 G1E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+
+    ('plot_G2F_0.1 G2F1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.3 G2F3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.6 G2F6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.8 G2F8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+
+    ('plot_N2E_0.1 N2E1','noregu_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.3 N2E3','noregu_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.6 N2E6','noregu_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.8 N2E8','noregu_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+
+]
+
+seedList = ['_1/','_2/','_3/']
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,param,dirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2   
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore main_benchmark.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_12.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_13.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
diff --git a/plot_distribution.r b/plot_distribution.r
index d2ab09b..a679862 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -32,6 +32,7 @@ mu_ = mean(features)
 sigma_ = (sd(features)-mean(features))/mean(features)**2
 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219
 fit_nbi = fitdist(features, 'NBI',   start = list(mu = mu_, sigma = sigma_ ))
+gofstat(fit_nbi)
 tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep=''))
 plot(fit_zinb)
 dev.off()

From be583819ed9a227e23327bc516e9e9d002643e91 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 11:35:23 -0600
Subject: [PATCH 014/117] add distribution sbatch file

---
 submitCluster_distribution.sh | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 submitCluster_distribution.sh

diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh
new file mode 100644
index 0000000..d4e9bae
--- /dev/null
+++ b/submitCluster_distribution.sh
@@ -0,0 +1,16 @@
+#submit plotting
+
+for i in {0.1,0.3,0.6,0.8}
+do
+sbatch plot_G2E_$i\_12.sh
+sbatch plot_G2EL_$i\_12.sh
+sbatch plot_G1E_$i\_12.sh
+sbatch plot_G2F_$i\_12.sh
+sbatch plot_N2E_$i\_12.sh
+
+sbatch plot_G2E_$i\_13.sh
+sbatch plot_G2EL_$i\_13.sh
+sbatch plot_G1E_$i\_13.sh
+sbatch plot_G2F_$i\_13.sh
+sbatch plot_N2E_$i\_13.sh
+done
\ No newline at end of file

From 28451f50673f719158d3ba06709265dcd19f8788 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 12:20:16 -0600
Subject: [PATCH 015/117] update fig

---
 plot_distribution.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plot_distribution.r b/plot_distribution.r
index a679862..9559896 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -34,7 +34,7 @@ sigma_ = (sd(features)-mean(features))/mean(features)**2
 fit_nbi = fitdist(features, 'NBI',   start = list(mu = mu_, sigma = sigma_ ))
 gofstat(fit_nbi)
 tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep=''))
-plot(fit_zinb)
+plot(fit_nbi)
 dev.off()
 
 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221

From d805f4bf113d052fe7357656f059688cbbd4f093 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 15:01:24 -0600
Subject: [PATCH 016/117] debug

---
 generating_distribution.py | 4 ++--
 plot_distribution.r        | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/generating_distribution.py b/generating_distribution.py
index 625d4ef..30cd87e 100644
--- a/generating_distribution.py
+++ b/generating_distribution.py
@@ -67,7 +67,7 @@
 
     commandLine = ''
     for seed in seedList:
-        commandLine += "python3 -W ignore main_benchmark.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
         commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
     with open(outputFilename+"_12.sh",'w') as fw:
@@ -76,7 +76,7 @@
 
     commandLine = ''
     for seed in seedList:
-        commandLine += "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
         commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
     with open(outputFilename+"_13.sh",'w') as fw:
diff --git a/plot_distribution.r b/plot_distribution.r
index 9559896..cb9872c 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -28,6 +28,7 @@ features = data.matrix(features)
 features = as.vector(features)
 features = as.numeric(features)
 
+print(paste("\n",indir,"/",datasetName,"_",para,"_features.txt")
 mu_ = mean(features)
 sigma_ = (sd(features)-mean(features))/mean(features)**2
 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219

From e823a2a231228139dfffe80949d4a673231cf46a Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 15:20:47 -0600
Subject: [PATCH 017/117] update

---
 generating_distribution.py | 4 ++--
 plot_distribution.py       | 6 +++---
 plot_distribution.r        | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/generating_distribution.py b/generating_distribution.py
index 30cd87e..7b42241 100644
--- a/generating_distribution.py
+++ b/generating_distribution.py
@@ -68,7 +68,7 @@
     commandLine = ''
     for seed in seedList:
         commandLine += "python3 -W ignore plot_distribution.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
-        commandLine += "R CMD BATCH plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
     with open(outputFilename+"_12.sh",'w') as fw:
         fw.write(outStr)
@@ -77,7 +77,7 @@
     commandLine = ''
     for seed in seedList:
         commandLine += "python3 -W ignore plot_distribution.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
-        commandLine += "R CMD BATCH plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
     with open(outputFilename+"_13.sh",'w') as fw:
         fw.write(outStr)
diff --git a/plot_distribution.py b/plot_distribution.py
index aa09b2b..081f6ad 100644
--- a/plot_distribution.py
+++ b/plot_distribution.py
@@ -53,7 +53,7 @@
 
 # Use numpy histogram
 hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features)+10,10))
-print(hist)
+# print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)
 plt.xticks(x_pos, bin_edges[:-1])
@@ -63,7 +63,7 @@
 
 features_log = np.log(features+1)
 hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.1,0.1))
-print(hist)
+# print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)
 plt.xticks(x_pos, bin_edges[:-1])
@@ -72,7 +72,7 @@
 plt.close()
 
 hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.1,0.1))
-print(hist)
+# print(hist)
 x_pos = [i for i, _ in enumerate(hist)]
 plt.bar(x_pos, hist)
 plt.xticks(x_pos, bin_edges[:-1])
diff --git a/plot_distribution.r b/plot_distribution.r
index cb9872c..473b3cf 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -28,7 +28,7 @@ features = data.matrix(features)
 features = as.vector(features)
 features = as.numeric(features)
 
-print(paste("\n",indir,"/",datasetName,"_",para,"_features.txt")
+print(paste(indir,"/",datasetName,"_",para,"_features.txt",sep=''))
 mu_ = mean(features)
 sigma_ = (sd(features)-mean(features))/mean(features)**2
 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219

From d2c8eb91c5ff670ade0929d2614a1f65907c1362 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 17:32:34 -0600
Subject: [PATCH 018/117] change orders

---
 plot_distribution.r | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/plot_distribution.r b/plot_distribution.r
index 473b3cf..409f9b6 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -12,6 +12,7 @@
 # install.packages("gamlss")
 library(fitdistrplus)
 library(gamlss)
+suppressWarnings()
 
 args = commandArgs(trailingOnly=TRUE)
 if (length(args)==0) {
@@ -39,6 +40,13 @@ plot(fit_nbi)
 dev.off()
 
 # http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221
+fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_))
+gofstat(fit_zinb_)
+tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI_.tiff",sep=''))
+plot(fit_zinb_)
+dev.off()
+
+
 nu_ = 1-length(which(features!=0))/(length(features))
 fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_))
 gofstat(fit_zinb)
@@ -46,11 +54,7 @@ tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff",sep=''))
 plot(fit_zinb)
 dev.off()
 
-fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_))
-gofstat(fit_zinb_)
-tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff_",sep=''))
-plot(fit_zinb_)
-dev.off()
+
 
 
 # NBI:

From c404291ad3f164d9a35346cf82b3e5c1c10e5a40 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 17:35:21 -0600
Subject: [PATCH 019/117] change orders

---
 submitCluster_distribution.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh
index d4e9bae..d526005 100644
--- a/submitCluster_distribution.sh
+++ b/submitCluster_distribution.sh
@@ -3,12 +3,17 @@
 for i in {0.1,0.3,0.6,0.8}
 do
 sbatch plot_G2E_$i\_12.sh
+
+sbatch plot_G2E_$i\_13.sh
+done
+
+for i in {0.1,0.3,0.6,0.8}
+do
 sbatch plot_G2EL_$i\_12.sh
 sbatch plot_G1E_$i\_12.sh
 sbatch plot_G2F_$i\_12.sh
 sbatch plot_N2E_$i\_12.sh
 
-sbatch plot_G2E_$i\_13.sh
 sbatch plot_G2EL_$i\_13.sh
 sbatch plot_G1E_$i\_13.sh
 sbatch plot_G2F_$i\_13.sh

From 6cb5bd68ae3e6749a36b5647495054ef3e08ea7e Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 17:44:25 -0600
Subject: [PATCH 020/117] change orders

---
 generating_distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generating_distribution.py b/generating_distribution.py
index 7b42241..0ab95f7 100644
--- a/generating_distribution.py
+++ b/generating_distribution.py
@@ -10,7 +10,7 @@
 templateStr1 = "#! /bin/bash\n"\
 "######################### Batch Headers #########################\n"\
 "#SBATCH -A xulab\n"\
-"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute\n"\
 "#SBATCH -J "
 
 templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\

From 4b201320673d90d2f9ef68ae19a3698430b33395 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 18:54:55 -0600
Subject: [PATCH 021/117] change orders

---
 benchmark_util.py          | 4 +++-
 plot_distribution.r        | 1 -
 results/results_Reading.py | 1 +
 results/results_impute.py  | 4 ++--
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/benchmark_util.py b/benchmark_util.py
index d85d409..63d6eb4 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -562,6 +562,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - np.log(y+1))
+        resultL2 = (x - np.log(y+1))**2
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -570,8 +571,9 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - np.log(yuse+1))
+        resultL2 = (x - np.log(yuse+1))**2
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result),np.mean(resultL2), np.median(resultL2), np.min(resultL2), np.max(resultL2)
 
 # cosine similarity
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):
diff --git a/plot_distribution.r b/plot_distribution.r
index 409f9b6..4fe8b23 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -12,7 +12,6 @@
 # install.packages("gamlss")
 library(fitdistrplus)
 library(gamlss)
-suppressWarnings()
 
 args = commandArgs(trailingOnly=TRUE)
 if (length(args)==0) {
diff --git a/results/results_Reading.py b/results/results_Reading.py
index 88f34aa..50ebc2b 100644
--- a/results/results_Reading.py
+++ b/results/results_Reading.py
@@ -13,6 +13,7 @@
 args = parser.parse_args()
 
 # Note:
+# Main Check results
 # Generate results in python other than in shell for better organization
 # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
 # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel
diff --git a/results/results_impute.py b/results/results_impute.py
index 61796dc..6c001ad 100644
--- a/results/results_impute.py
+++ b/results/results_impute.py
@@ -56,8 +56,8 @@
 dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy')
 
 featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy')
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax), end='')
 
 def imputeResult(inputData):
     '''

From b72a43bfdc804a018874b0f1c3ed72c79fb91fe0 Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 17 Nov 2020 21:49:41 -0600
Subject: [PATCH 022/117] Add RMSE

---
 benchmark_util.py         | 6 +++---
 plot_distribution.r       | 5 ++++-
 results/results_impute.py | 4 ++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/benchmark_util.py b/benchmark_util.py
index 63d6eb4..a72418b 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -562,7 +562,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - np.log(y+1))
-        resultL2 = (x - np.log(y+1))**2
+        rmse = ((x - np.log(y+1))**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -571,9 +571,9 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - np.log(yuse+1))
-        resultL2 = (x - np.log(yuse+1))**2
+        rmse = ((x - np.log(yuse+1))**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result),np.mean(resultL2), np.median(resultL2), np.min(resultL2), np.max(resultL2)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), rmse
 
 # cosine similarity
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):
diff --git a/plot_distribution.r b/plot_distribution.r
index 4fe8b23..fdc5a17 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -79,4 +79,7 @@ dev.off()
 # Goodness-of-fit criteria
 #                                1-mle-ZINBI
 # Akaike's Information Criterion    25969108
-# Bayesian Information Criterion    25969135
\ No newline at end of file
+# Bayesian Information Criterion    25969135
+
+# Can learn from *
+# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html
\ No newline at end of file
diff --git a/results/results_impute.py b/results/results_impute.py
index 6c001ad..f265477 100644
--- a/results/results_impute.py
+++ b/results/results_impute.py
@@ -56,8 +56,8 @@
 dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy')
 
 featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy')
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, l2ErrorMean, l2ErrorMedian, l2ErrorMin, l2ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='')
 
 def imputeResult(inputData):
     '''

From 5fedb49729c99e960a006e030176a848634c9bb0 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 10:34:29 -0600
Subject: [PATCH 023/117] add recheck

---
 results/results_Reading_recheck.py            | 307 ++++++++++++++++++
 .../submitCluster_Result_Impute_recheck.sh    |  17 +
 2 files changed, 324 insertions(+)
 create mode 100644 results/results_Reading_recheck.py
 create mode 100644 results/submitCluster_Result_Impute_recheck.sh

diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py
new file mode 100644
index 0000000..c6584b2
--- /dev/null
+++ b/results/results_Reading_recheck.py
@@ -0,0 +1,307 @@
+import os
+import argparse
+parser = argparse.ArgumentParser(description='Read Results in different methods')
+parser.add_argument('--methodName', type=int, default=0, 
+                    help="method used: 0-62")
+parser.add_argument('--imputeMode', default=True, action='store_true',
+                    help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true')
+parser.add_argument('--runMode',action='store_true', default=False, help="Run or prepare cluster script")
+parser.add_argument('--splitMode', default=False, action='store_true',
+                    help='whether split, used for long queue')
+parser.add_argument('--batchStr', type=int, default=0, 
+                    help="method used: 1-13")
+args = parser.parse_args()
+
+# Note:
+# Generate results in python other than in shell for better organization
+# We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
+# We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel
+# So we use os.system('') here
+
+if args.splitMode:
+    #The split of batch, more batches, more parallel
+
+    if args.batchStr == 8:
+        datasetList = [
+        '9.Chung',
+        # '9.Chung --discreteTag'
+        ]
+    elif args.batchStr == 11:
+        datasetList = [
+        '11.Kolodziejczyk',
+        # '11.Kolodziejczyk --discreteTag'
+        ]
+    elif args.batchStr == 12:
+        datasetList = [
+        '12.Klein',
+        # '12.Klein --discreteTag'
+        ]
+    elif args.batchStr == 13:
+        datasetList = [
+        '13.Zeisel',
+        # '13.Zeisel --discreteTag'
+        ]
+else:
+    datasetList = [
+        '9.Chung',
+        '11.Kolodziejczyk',
+        '12.Klein',
+        '13.Zeisel',
+    ]
+
+if args.imputeMode:
+    pyStr = 'results_impute.py'
+
+    npyList = [
+        '../npyImputeG2E_1/ --ratio 0.1', #1
+        '../npyImputeG2E_1/ --ratio 0.3', #2
+        '../npyImputeG2E_1/ --ratio 0.6', #3
+        '../npyImputeG2E_1/ --ratio 0.8', #4
+        '../npyImputeG2EL_1/ --ratio 0.1', #5
+        '../npyImputeG2EL_1/ --ratio 0.3', #6
+        '../npyImputeG2EL_1/ --ratio 0.6', #7
+        '../npyImputeG2EL_1/ --ratio 0.8', #8
+        '../npyImputeG1E_1/ --ratio 0.1', #9
+        '../npyImputeG1E_1/ --ratio 0.3', #10
+        '../npyImputeG1E_1/ --ratio 0.6', #11
+        '../npyImputeG1E_1/ --ratio 0.8', #12
+        '../npyImputeG2F_1/ --ratio 0.1', #13
+        '../npyImputeG2F_1/ --ratio 0.3', #14
+        '../npyImputeG2F_1/ --ratio 0.6', #15
+        '../npyImputeG2F_1/ --ratio 0.8', #16
+        '../npyImputeN2E_1/ --ratio 0.1', #17
+        '../npyImputeN2E_1/ --ratio 0.3', #18
+        '../npyImputeN2E_1/ --ratio 0.6', #19
+        '../npyImputeN2E_1/ --ratio 0.8', #20
+
+        '../npyImputeG2E_2/ --ratio 0.1', #21
+        '../npyImputeG2E_2/ --ratio 0.3', #22
+        '../npyImputeG2E_2/ --ratio 0.6', #23
+        '../npyImputeG2E_2/ --ratio 0.8', #24
+        '../npyImputeG2EL_2/ --ratio 0.1', #25
+        '../npyImputeG2EL_2/ --ratio 0.3', #26
+        '../npyImputeG2EL_2/ --ratio 0.6', #27
+        '../npyImputeG2EL_2/ --ratio 0.8', #28
+        '../npyImputeG1E_2/ --ratio 0.1', #29
+        '../npyImputeG1E_2/ --ratio 0.3', #30
+        '../npyImputeG1E_2/ --ratio 0.6', #31
+        '../npyImputeG1E_2/ --ratio 0.8', #32
+        '../npyImputeG2F_2/ --ratio 0.1', #33
+        '../npyImputeG2F_2/ --ratio 0.3', #34
+        '../npyImputeG2F_2/ --ratio 0.6', #35
+        '../npyImputeG2F_2/ --ratio 0.8', #36
+        '../npyImputeN2E_2/ --ratio 0.1', #37
+        '../npyImputeN2E_2/ --ratio 0.3', #38
+        '../npyImputeN2E_2/ --ratio 0.6', #39
+        '../npyImputeN2E_2/ --ratio 0.8', #40
+
+        '../npyImputeG2E_3/ --ratio 0.1', #41
+        '../npyImputeG2E_3/ --ratio 0.3', #42
+        '../npyImputeG2E_3/ --ratio 0.6', #43
+        '../npyImputeG2E_3/ --ratio 0.8', #44
+        '../npyImputeG2EL_3/ --ratio 0.1', #45
+        '../npyImputeG2EL_3/ --ratio 0.3', #46
+        '../npyImputeG2EL_3/ --ratio 0.6', #47
+        '../npyImputeG2EL_3/ --ratio 0.8', #48
+        '../npyImputeG1E_3/ --ratio 0.1', #49
+        '../npyImputeG1E_3/ --ratio 0.3', #50
+        '../npyImputeG1E_3/ --ratio 0.6', #51
+        '../npyImputeG1E_3/ --ratio 0.8', #52
+        '../npyImputeG2F_3/ --ratio 0.1', #53
+        '../npyImputeG2F_3/ --ratio 0.3', #54
+        '../npyImputeG2F_3/ --ratio 0.6', #55
+        '../npyImputeG2F_3/ --ratio 0.8', #56
+        '../npyImputeN2E_3/ --ratio 0.1', #57
+        '../npyImputeN2E_3/ --ratio 0.3', #58
+        '../npyImputeN2E_3/ --ratio 0.6', #59
+        '../npyImputeN2E_3/ --ratio 0.8', #60
+
+        ]
+
+else:
+    pyStr = 'results_celltype.py'
+
+    npyList = [
+        '../npyG1B/', #0
+        '../npyG1E/', #1
+        '../npyG1F/', #2
+        '../npyR1B/', #3
+        '../npyR1E/', #4
+        '../npyR1F/', #5
+        '../npyN1B/', #6
+        '../npyN1E/', #7
+        '../npyN1F/', #8
+        '../npyG2B/', #9
+        '../npyG2E/', #10
+        '../npyG2F/', #11
+        '../npyR2B/', #12
+        '../npyR2E/', #13
+        '../npyR2F/', #14
+        '../npyN2B/', #15
+        '../npyN2E/', #16
+        '../npyN2F/', #17
+
+        '../npyG1B_LK/', #18
+        '../npyG1E_LK/', #19
+        '../npyG1F_LK/', #20
+        '../npyR1B_LK/', #21
+        '../npyR1E_LK/', #22
+        '../npyR1F_LK/', #23
+        '../npyN1B_LK/', #24
+        '../npyN1E_LK/', #25
+        '../npyN1F_LK/', #26
+        '../npyG2B_LK/', #27
+        '../npyG2E_LK/', #28
+        '../npyG2F_LK/', #29
+        '../npyR2B_LK/', #30
+        '../npyR2E_LK/', #31
+        '../npyR2F_LK/', #32
+        '../npyN2B_LK/', #33
+        '../npyN2E_LK/', #34
+        '../npyN2F_LK/', #35
+
+        '../npyG1B_LB/', #36
+        '../npyG1E_LB/', #37
+        '../npyG1F_LB/', #38
+        '../npyR1B_LB/', #39
+        '../npyR1E_LB/', #40
+        '../npyR1F_LB/', #41
+        '../npyN1B_LB/', #42
+        '../npyN1E_LB/', #43
+        '../npyN1F_LB/', #44
+        '../npyG2B_LB/', #45
+        '../npyG2E_LB/', #46
+        '../npyG2F_LB/', #47
+        '../npyR2B_LB/', #48
+        '../npyR2E_LB/', #49
+        '../npyR2F_LB/', #50
+        '../npyN2B_LB/', #51
+        '../npyN2E_LB/', #52
+        '../npyN2F_LB/', #53
+        ]
+
+reguDict={}
+
+for i in range(0,16):
+    reguDict[i]='LTMG'
+for i in range(16,20):
+    reguDict[i]='noregu'
+for i in range(20,36):
+    reguDict[i]='LTMG'
+for i in range(36,40):
+    reguDict[i]='noregu'
+for i in range(40,56):
+    reguDict[i]='LTMG'
+for i in range(56,60):
+    reguDict[i]='noregu'
+
+reguStr=''
+if args.methodName in reguDict:
+    reguStr=' --regulized-type ' + reguDict[args.methodName] + ' '
+
+npyStr = npyList[args.methodName]
+
+benchmarkStr = ''
+
+if args.runMode:
+    labelFileDir = '/home/wangjue/biodata/scData/allBench/'
+else:
+    labelFileDir = '/home/jwang/data/scData/'
+    
+def getBenchmarkStr(count):
+    benchmarkStr = ''
+    if args.batchStr == 0:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '1.Biase/Biase_cell_label.csv '\
+                    '--n-clusters 3 '
+    elif args.batchStr == 1:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '2.Li/Li_cell_label.csv '\
+                    '--n-clusters 9 '
+    elif args.batchStr == 2:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '3.Treutlein/Treutlein_cell_label.csv '\
+                    '--n-clusters 5 '
+    elif args.batchStr == 3:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '4.Yan/Yan_cell_label.csv '\
+                    '--n-clusters 7 '
+    elif args.batchStr == 4:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '5.Goolam/Goolam_cell_label.csv '\
+                    '--n-clusters 5 '
+    elif args.batchStr == 5:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '6.Guo/Guo_cell_label.csv '\
+                    '--n-clusters 9 '
+    elif args.batchStr == 6:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '7.Deng/Deng_cell_label.csv '\
+                    '--n-clusters 10 '
+    elif args.batchStr == 7:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '8.Pollen/Pollen_cell_label.csv '\
+                    '--n-clusters 11 '
+    elif args.batchStr == 8:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '9.Chung/Chung_cell_label.csv '\
+                    '--n-clusters 4 '
+    elif args.batchStr == 9:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '10.Usoskin/Usoskin_cell_label.csv '\
+                    '--n-clusters 11 '
+    elif args.batchStr == 10:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '11.Kolodziejczyk/Kolodziejczyk_cell_label.csv '\
+                    '--n-clusters 3 '
+    elif args.batchStr == 11:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '12.Klein/Klein_cell_label.csv '\
+                    '--n-clusters 4 '
+    elif args.batchStr == 12:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '13.Zeisel/Zeisel_cell_label.csv '\
+                    '--n-clusters 7 '
+    
+    return benchmarkStr
+
+
+if not args.runMode:
+    if args.imputeMode:
+        imputeStr = 'I'
+    else:
+        imputeStr = 'C'
+    splitStr = ''
+    if args.splitMode:
+        splitStr = '_'+str(args.batchStr)
+    templateStr = "#! /bin/bash\n"\
+    "######################### Batch Headers #########################\n"\
+    "#SBATCH -A xulab\n"\
+    "#SBATCH -p Lewis,BioCompute               # use the BioCompute partition\n"\
+    "#SBATCH -J R" + imputeStr + '_' + str(args.methodName) + splitStr +              " \n"\
+    "#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+    "#SBATCH -t 2-00:00                  # two days time limit\n"\
+    "#SBATCH -N 1                        # number of nodes\n"\
+    "#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+    "#SBATCH --mem=128G\n"\
+    "#################################################################\n"\
+    "module load miniconda3\n"\
+    "source activate conda_R\n"
+    print(templateStr)
+
+count = 0
+for datasetStr in datasetList:
+    commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --npyDir ' + npyStr
+    if args.runMode:
+        os.system(commandStr)
+    else:
+        print(commandStr)
+    for i in range(10):
+        commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr
+        if args.runMode:
+            os.system(commandStr)
+        else:
+            print(commandStr)
+    count += 1
+
+
diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
new file mode 100644
index 0000000..7cbc427
--- /dev/null
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -0,0 +1,17 @@
+for i in {0..59}
+do
+for j in {8,11,12,13}
+do
+python results_Reading_23dropout.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
+done
+done
+
+# submit
+for i in {0..59}
+do
+for j in {8,11,12,13}
+do
+sbatch run_Results_Impute_$i-$j.sh
+sleep 1
+done
+done
\ No newline at end of file

From 00129a4eae66402a8ce233ae0c1875d9410801b5 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 10:43:21 -0600
Subject: [PATCH 024/117] fix a bug

---
 results/submitCluster_Result_Impute_recheck.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index 7cbc427..1277b79 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -2,16 +2,16 @@ for i in {0..59}
 do
 for j in {8,11,12,13}
 do
-python results_Reading_23dropout.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
+python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
 done
 done
 
 # submit
-for i in {0..59}
-do
-for j in {8,11,12,13}
-do
-sbatch run_Results_Impute_$i-$j.sh
-sleep 1
-done
-done
\ No newline at end of file
+# for i in {0..59}
+# do
+# for j in {8,11,12,13}
+# do
+# sbatch run_Results_Impute_$i-$j.sh
+# sleep 1
+# done
+# done
\ No newline at end of file

From f0e7bdfc8da54e4a73823c8c67255cc0edf3f8de Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 11:28:43 -0600
Subject: [PATCH 025/117] change to new format

---
 results/results_Reading_recheck.py | 40 +++++++++++++++---------------
 results/results_impute_graph.py    | 12 ++++-----
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py
index c6584b2..124743c 100644
--- a/results/results_Reading_recheck.py
+++ b/results/results_Reading_recheck.py
@@ -50,7 +50,7 @@
     ]
 
 if args.imputeMode:
-    pyStr = 'results_impute.py'
+    pyStr = 'results_impute_graph.py'
 
     npyList = [
         '../npyImputeG2E_1/ --ratio 0.1', #1
@@ -210,55 +210,55 @@
     
 def getBenchmarkStr(count):
     benchmarkStr = ''
-    if args.batchStr == 0:
+    if args.batchStr == 1:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '1.Biase/Biase_cell_label.csv '\
                     '--n-clusters 3 '
-    elif args.batchStr == 1:
+    elif args.batchStr == 2:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '2.Li/Li_cell_label.csv '\
                     '--n-clusters 9 '
-    elif args.batchStr == 2:
+    elif args.batchStr == 3:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '3.Treutlein/Treutlein_cell_label.csv '\
                     '--n-clusters 5 '
-    elif args.batchStr == 3:
+    elif args.batchStr == 4:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '4.Yan/Yan_cell_label.csv '\
                     '--n-clusters 7 '
-    elif args.batchStr == 4:
+    elif args.batchStr == 5:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '5.Goolam/Goolam_cell_label.csv '\
                     '--n-clusters 5 '
-    elif args.batchStr == 5:
+    elif args.batchStr == 6:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '6.Guo/Guo_cell_label.csv '\
                     '--n-clusters 9 '
-    elif args.batchStr == 6:
+    elif args.batchStr == 7:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '7.Deng/Deng_cell_label.csv '\
                     '--n-clusters 10 '
-    elif args.batchStr == 7:
+    elif args.batchStr == 8:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '8.Pollen/Pollen_cell_label.csv '\
                     '--n-clusters 11 '
-    elif args.batchStr == 8:
+    elif args.batchStr == 9:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '9.Chung/Chung_cell_label.csv '\
                     '--n-clusters 4 '
-    elif args.batchStr == 9:
+    elif args.batchStr == 10:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '10.Usoskin/Usoskin_cell_label.csv '\
                     '--n-clusters 11 '
-    elif args.batchStr == 10:
+    elif args.batchStr == 11:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '11.Kolodziejczyk/Kolodziejczyk_cell_label.csv '\
                     '--n-clusters 3 '
-    elif args.batchStr == 11:
+    elif args.batchStr == 12:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '12.Klein/Klein_cell_label.csv '\
                     '--n-clusters 4 '
-    elif args.batchStr == 12:
+    elif args.batchStr == 13:
         benchmarkStr = ' --benchmark '\
                     '--labelFilename ' + labelFileDir + '13.Zeisel/Zeisel_cell_label.csv '\
                     '--n-clusters 7 '
@@ -296,12 +296,12 @@ def getBenchmarkStr(count):
         os.system(commandStr)
     else:
         print(commandStr)
-    for i in range(10):
-        commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr
-        if args.runMode:
-            os.system(commandStr)
-        else:
-            print(commandStr)
+    # for i in range(10):
+    #     commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr
+    #     if args.runMode:
+    #         os.system(commandStr)
+    #     else:
+    #         print(commandStr)
     count += 1
 
 
diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py
index 4145dd1..9f7101d 100644
--- a/results/results_impute_graph.py
+++ b/results/results_impute_graph.py
@@ -63,20 +63,20 @@
 # dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropi.npy')
 # dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropj.npy')
 # dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropix.npy')
-dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropi.npy')
-dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropj.npy')
-dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropix.npy')
+dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropi.npy')
+dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropj.npy')
+dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropix.npy')
 
 
 
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_'+args.regupara+'_recon'+args.reconstr+'.npy')
-featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_recon'+args.reconstr+'.npy')
+featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_recon'+args.reconstr+'.npy')
 # featuresImpute   = pd.read_csv(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.regupara+'_0.0_0.0_recon'+args.reconstr+'.csv')
 # featuresImpute = featuresImpute.to_numpy()
 
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='')
 
 def imputeResult(inputData):
     '''

From 92912a1ba8e58507fba07076916848601932fe35 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 15:14:07 -0600
Subject: [PATCH 026/117] fix a bug

---
 benchmark_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark_util.py b/benchmark_util.py
index a72418b..f85f128 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -573,7 +573,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         result = np.abs(x - np.log(yuse+1))
         rmse = ((x - np.log(yuse+1))**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result), rmse
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
 # cosine similarity
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):

From 59d9cc1722817a6d2701e89c069941904d46e04d Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 15:54:08 -0600
Subject: [PATCH 027/117] add 9 and 11 data

---
 generating_Impute_0.1-0.8-ablation.py        | 26 ++++++++++++----
 submitCluster_imputation_0.1-0.8-ablation.sh | 32 ++++++++++++++------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py
index 86773f7..05e3135 100644
--- a/generating_Impute_0.1-0.8-ablation.py
+++ b/generating_Impute_0.1-0.8-ablation.py
@@ -63,18 +63,32 @@
         imputeStr = ' --imputeMode  '
         outDirStr = "npyImpute"+outDirStr[3:]
     outputFilename = args.outputDir + tmpstr1
-    abbrStr = tmpstr2   
+    abbrStr = tmpstr2 
 
     for dropoutPara in dropoutList:
-        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
         outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
-        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+        with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw:
             fw.write(outStr)
             fw.close()
 
     for dropoutPara in dropoutList:
-        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
         outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
-        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+        with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw:
             fw.write(outStr)
-            fw.close()
+            fw.close()  
+
+    # for dropoutPara in dropoutList:
+    #     commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+    #     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    #     with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+    #         fw.write(outStr)
+    #         fw.close()
+
+    # for dropoutPara in dropoutList:
+    #     commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+    #     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    #     with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+    #         fw.write(outStr)
+    #         fw.close()
diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh
index d558529..b6e3ea5 100644
--- a/submitCluster_imputation_0.1-0.8-ablation.sh
+++ b/submitCluster_imputation_0.1-0.8-ablation.sh
@@ -20,16 +20,28 @@ for i in {1..3}
 do
 for j in {0.1,0.3,0.6,0.8}
 do
-sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
-sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
-sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
-sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
-sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_1_g_e_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_9_$j\.sh
 
-sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
-sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
-sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
-sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
-sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_1_g_e_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_11_$j\.sh
+
+# sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
+# sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
+# sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
+# sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
+# sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
+
+# sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
+# sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
+# sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
+# sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
+# sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
 done
 done
\ No newline at end of file

From b04bbb4c99a156267a2e53d3a1d5a0e5c4be73aa Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 21:17:24 -0600
Subject: [PATCH 028/117] add 9 and 11 for recheck

---
 generating_Impute_0.1-0.8-ablation.py         | 24 +++++++++----------
 generating_distribution.py                    | 20 +++++++++++++++-
 plot_distribution.r                           |  1 -
 results/results_impute_graph.py               |  2 --
 .../submitCluster_Result_Impute_recheck.sh    | 18 +++++++-------
 submitCluster_distribution.sh                 | 13 +++++++++-
 submitCluster_imputation_0.1-0.8-ablation.sh  | 20 ++++++++--------
 7 files changed, 62 insertions(+), 36 deletions(-)

diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py
index 05e3135..ce1d245 100644
--- a/generating_Impute_0.1-0.8-ablation.py
+++ b/generating_Impute_0.1-0.8-ablation.py
@@ -79,16 +79,16 @@
             fw.write(outStr)
             fw.close()  
 
-    # for dropoutPara in dropoutList:
-    #     commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
-    #     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
-    #     with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
-    #         fw.write(outStr)
-    #         fw.close()
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
 
-    # for dropoutPara in dropoutList:
-    #     commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
-    #     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
-    #     with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
-    #         fw.write(outStr)
-    #         fw.close()
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
diff --git a/generating_distribution.py b/generating_distribution.py
index 0ab95f7..724bf79 100644
--- a/generating_distribution.py
+++ b/generating_distribution.py
@@ -63,7 +63,25 @@
     tmpstr2=tmp[1]
     imputeStr = ''
     outputFilename = args.outputDir + tmpstr1
-    abbrStr = tmpstr2   
+    abbrStr = tmpstr2
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 9.Chung --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 9.Chung "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_9.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 11.Kolodziejczyk --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 11.Kolodziejczyk "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_12.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()   
 
     commandLine = ''
     for seed in seedList:
diff --git a/plot_distribution.r b/plot_distribution.r
index fdc5a17..c09f27b 100644
--- a/plot_distribution.r
+++ b/plot_distribution.r
@@ -45,7 +45,6 @@ tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI_.tiff",sep=''))
 plot(fit_zinb_)
 dev.off()
 
-
 nu_ = 1-length(which(features!=0))/(length(features))
 fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_))
 gofstat(fit_zinb)
diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py
index 9f7101d..a0a11fc 100644
--- a/results/results_impute_graph.py
+++ b/results/results_impute_graph.py
@@ -67,8 +67,6 @@
 dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropj.npy')
 dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropix.npy')
 
-
-
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_'+args.regupara+'_recon'+args.reconstr+'.npy')
 featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_recon'+args.reconstr+'.npy')
diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index 1277b79..93349eb 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -6,12 +6,12 @@ python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > ru
 done
 done
 
-# submit
-# for i in {0..59}
-# do
-# for j in {8,11,12,13}
-# do
-# sbatch run_Results_Impute_$i-$j.sh
-# sleep 1
-# done
-# done
\ No newline at end of file
+submit
+for i in {0..59}
+do
+for j in {8,11,12,13}
+do
+sbatch run_Results_Impute_$i-$j.sh
+sleep 1
+done
+done
\ No newline at end of file
diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh
index d526005..f631ff7 100644
--- a/submitCluster_distribution.sh
+++ b/submitCluster_distribution.sh
@@ -2,13 +2,24 @@
 
 for i in {0.1,0.3,0.6,0.8}
 do
+sbatch plot_G2E_$i\_9.sh
+sbatch plot_G2E_$i\_11.sh
 sbatch plot_G2E_$i\_12.sh
-
 sbatch plot_G2E_$i\_13.sh
 done
 
 for i in {0.1,0.3,0.6,0.8}
 do
+sbatch plot_G2EL_$i\_9.sh
+sbatch plot_G1E_$i\_9.sh
+sbatch plot_G2F_$i\_9.sh
+sbatch plot_N2E_$i\_9.sh
+
+sbatch plot_G2EL_$i\_11.sh
+sbatch plot_G1E_$i\_11.sh
+sbatch plot_G2F_$i\_11.sh
+sbatch plot_N2E_$i\_11.sh
+
 sbatch plot_G2EL_$i\_12.sh
 sbatch plot_G1E_$i\_12.sh
 sbatch plot_G2F_$i\_12.sh
diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh
index b6e3ea5..a822d20 100644
--- a/submitCluster_imputation_0.1-0.8-ablation.sh
+++ b/submitCluster_imputation_0.1-0.8-ablation.sh
@@ -32,16 +32,16 @@ sbatch run_experimentImpute_2_g_e_L_$i\_11_$j\.sh
 sbatch run_experimentImpute_2_g_f_$i\_11_$j\.sh
 sbatch run_experimentImpute_2_n_e_$i\_11_$j\.sh
 
-# sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
-# sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
-# sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
-# sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
-# sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
 
-# sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
-# sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
-# sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
-# sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
-# sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
 done
 done
\ No newline at end of file

From d0f43edd1b1a396337c346d95eddb2bbdbaaecf7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 22 Nov 2020 22:23:48 -0600
Subject: [PATCH 029/117] fix a bug

---
 generating_distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generating_distribution.py b/generating_distribution.py
index 724bf79..a69efbb 100644
--- a/generating_distribution.py
+++ b/generating_distribution.py
@@ -79,7 +79,7 @@
         commandLine += "python3 -W ignore plot_distribution.py --datasetName 11.Kolodziejczyk --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
         commandLine += "Rscript plot_distribution.r 11.Kolodziejczyk "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
     outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
-    with open(outputFilename+"_12.sh",'w') as fw:
+    with open(outputFilename+"_11.sh",'w') as fw:
         fw.write(outStr)
         fw.close()   
 

From bfdcd91a1c2e6a6b2bab0f5630cff8bda8c739d8 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Mon, 23 Nov 2020 07:51:42 -0600
Subject: [PATCH 030/117] fix a typo

---
 results/submitCluster_Result_Impute_recheck.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index 93349eb..e6d5272 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -6,7 +6,7 @@ python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > ru
 done
 done
 
-submit
+# submit
 for i in {0..59}
 do
 for j in {8,11,12,13}

From f505af38e5bfb796441dc247359b3008461516fc Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Mon, 23 Nov 2020 09:26:58 -0600
Subject: [PATCH 031/117] update converge type

---
 main_benchmark.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/main_benchmark.py b/main_benchmark.py
index 31f0942..0a295fc 100644
--- a/main_benchmark.py
+++ b/main_benchmark.py
@@ -36,7 +36,7 @@
                     help='EM process type (default: celltypeEM) or EM')
 parser.add_argument('--alpha', type=float, default=0.5,
                     help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1')
-parser.add_argument('--converge-type', type=str, default='either',
+parser.add_argument('--converge-type', type=str, default='celltype',
                     help='type of converge: celltype/graph/both/either (default: celltype) ')
 parser.add_argument('--converge-graphratio', type=float, default=0.01,
                     help='ratio of cell type change in EM iteration (default: 0.01), 0-1')
@@ -587,22 +587,34 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         # graph criteria
         if args.converge_type == 'graph':       
             if graphChange < graphChangeThreshold:
-                print('Converge now!')
+                print('Graph Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # celltype criteria
         elif args.converge_type == 'celltype':            
             if ari>args.converge_celltyperatio:
-                print('Converge now!')
+                print('Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # if both criteria are meets
         elif args.converge_type == 'both': 
             if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio:
-                print('Converge now!')
+                print('Graph and Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # if either criteria are meets
         elif args.converge_type == 'either': 
             if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio:
-                print('Converge now!')
+                print('Graph or Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
 
         # Update

From 9f7ae1e66e944e1210973cfd9ba6d1230e89435a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Mon, 23 Nov 2020 15:40:53 -0600
Subject: [PATCH 032/117] change ranking

---
 results/submitCluster_Result_Impute_recheck.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index e6d5272..ba68356 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -1,16 +1,16 @@
-for i in {0..59}
-do
-for j in {8,11,12,13}
-do
-python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
-done
-done
+# for i in {0..59}
+# do
+# for j in {8,11,12,13}
+# do
+# python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
+# done
+# done
 
 # submit
-for i in {0..59}
-do
 for j in {8,11,12,13}
 do
+for i in {0..59}
+do
 sbatch run_Results_Impute_$i-$j.sh
 sleep 1
 done

From 7863f74df12c7cf284da11f9f9be49068a0854ec Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Mon, 23 Nov 2020 18:22:42 -0600
Subject: [PATCH 033/117] add cosine

---
 results/results_impute_graph.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py
index a0a11fc..b964534 100644
--- a/results/results_impute_graph.py
+++ b/results/results_impute_graph.py
@@ -74,7 +74,8 @@
 # featuresImpute = featuresImpute.to_numpy()
 
 l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='')
+cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse), end='')
 
 def imputeResult(inputData):
     '''

From e18d2913f236e30c90ef9faa59f9f0f0205a0b2b Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 11:22:39 -0600
Subject: [PATCH 034/117] add magic

---
 codesfromJGandYJ/impute code/MAGIC_impute.py | 73 ++++++--------------
 1 file changed, 21 insertions(+), 52 deletions(-)

diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py
index c0c1f22..0743d27 100644
--- a/codesfromJGandYJ/impute code/MAGIC_impute.py	
+++ b/codesfromJGandYJ/impute code/MAGIC_impute.py	
@@ -5,71 +5,40 @@
 import numpy as np
 import argparse
 import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
 #from benchmark_util import impute_dropout
 
-def impute_dropout(X, rate=0.1):
-    """
-    X: original testing set
-    ========
-    returns:
-    X_zero: copy of X with zeros
-    i, j, ix: indices of where dropout is applied
-    """
-    #If the input is a dense matrix
-    if isinstance(X, np.ndarray):
-        X_zero = np.copy(X)
-        # select non-zero subset
-        i,j = np.nonzero(X_zero)
-    # If the input is a sparse matrix
-    else:
-        X_zero = scipy.sparse.lil_matrix.copy(X)
-        # select non-zero subset
-        i,j = X_zero.nonzero()
-    # choice number 1 : select 10 percent of the non zero values (so that distributions overlap enough)
-    ix = np.random.choice(range(len(i)), int(np.floor(0.1 * len(i))), replace=False)
-    X_zero[i[ix], j[ix]] *= np.random.binomial(1, rate)
-    # choice number 2, focus on a few but corrupt binomially
-    #ix = np.random.choice(range(len(i)), int(slice_prop * np.floor(len(i))), replace=False)
-    #X_zero[i[ix], j[ix]] = np.random.binomial(X_zero[i[ix], j[ix]].astype(np.int), rate)
-    return X_zero, i, j, ix
-
 parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
 parser.add_argument('--ratio', type=str, default='0.1',
                     help='dropoutratio')
 args = parser.parse_args()
 
 
-# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
+def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/hpc/scratch/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
 
-# Load single-cell RNA-seq data
-# Default is KNN=5
-magic_operator = magic.MAGIC()
-# magic_operator = magic.MAGIC(knn=10)
-X_magic = magic_operator.fit_transform(x, genes="all_genes")
-recon = X_magic
+    # Load single-cell RNA-seq data
+    # Default is KNN=5
+    magic_operator = magic.MAGIC()
+    # magic_operator = magic.MAGIC(knn=10)
+    X_magic = magic_operator.fit_transform(x, genes="all_genes")
+    recon = X_magic
 
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
+    np.save('/storage/hpc/scratch/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon)
 
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/magic/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),recon)
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
 
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio)
 
 # From scVI
 # # Load single-cell RNA-seq data

From 013240c84700876cd2e5370d051fe45438484d05 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 11:24:55 -0600
Subject: [PATCH 035/117] add bash

---
 codesfromJGandYJ/impute code/other_magic.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 codesfromJGandYJ/impute code/other_magic.sh

diff --git a/codesfromJGandYJ/impute code/other_magic.sh b/codesfromJGandYJ/impute code/other_magic.sh
new file mode 100644
index 0000000..fd9f5e4
--- /dev/null
+++ b/codesfromJGandYJ/impute code/other_magic.sh	
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Magic
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore MAGIC_impute.py

From 8d313d70ca96c0a6e35f8dc99b394c52f68d4013 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 13:04:55 -0600
Subject: [PATCH 036/117] add bash

---
 codesfromJGandYJ/impute code/MAGIC_impute.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py
index 0743d27..954cd4c 100644
--- a/codesfromJGandYJ/impute code/MAGIC_impute.py	
+++ b/codesfromJGandYJ/impute code/MAGIC_impute.py	
@@ -15,7 +15,7 @@
 
 
 def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1):
-    filename = '/storage/hpc/scratch/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
     x = np.load(filename,allow_pickle=True)
     x = x.tolist()
     x=x.todense()
@@ -29,7 +29,7 @@ def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1):
     X_magic = magic_operator.fit_transform(x, genes="all_genes")
     recon = X_magic
 
-    np.save('/storage/hpc/scratch/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon)
+    np.save('/storage/htc/joshilab/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon)
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']

From bef9ce4cb127e9ebcaa09d7ec9dd5ebba63e9ffd Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 18:06:56 -0600
Subject: [PATCH 037/117] add scvi

---
 codesfromJGandYJ/impute code/scVi_impute.py   | 102 ------------------
 .../{impute code => impute}/MAGIC_impute.py   |   6 +-
 .../{impute code => impute}/SAVER_impute.py   |   0
 .../{impute code => impute}/SCIMPUTE.py       |   0
 .../{impute code => impute}/dca_impute.py     |   0
 .../deepimpute_impute.py                      |   0
 .../{impute code => impute}/other_magic.sh    |   0
 codesfromJGandYJ/impute/other_scvi.sh         |  14 +++
 .../saucie_impute_t.py                        |   0
 codesfromJGandYJ/impute/scVi_impute.py        |  92 ++++++++++++++++
 10 files changed, 109 insertions(+), 105 deletions(-)
 delete mode 100644 codesfromJGandYJ/impute code/scVi_impute.py
 rename codesfromJGandYJ/{impute code => impute}/MAGIC_impute.py (89%)
 rename codesfromJGandYJ/{impute code => impute}/SAVER_impute.py (100%)
 rename codesfromJGandYJ/{impute code => impute}/SCIMPUTE.py (100%)
 rename codesfromJGandYJ/{impute code => impute}/dca_impute.py (100%)
 rename codesfromJGandYJ/{impute code => impute}/deepimpute_impute.py (100%)
 rename codesfromJGandYJ/{impute code => impute}/other_magic.sh (100%)
 create mode 100644 codesfromJGandYJ/impute/other_scvi.sh
 rename codesfromJGandYJ/{impute code => impute}/saucie_impute_t.py (100%)
 create mode 100644 codesfromJGandYJ/impute/scVi_impute.py

diff --git a/codesfromJGandYJ/impute code/scVi_impute.py b/codesfromJGandYJ/impute code/scVi_impute.py
deleted file mode 100644
index 6ce9383..0000000
--- a/codesfromJGandYJ/impute code/scVi_impute.py	
+++ /dev/null
@@ -1,102 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset
-from scvi.models import VAE
-from scvi.inference import UnsupervisedTrainer
-import torch
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-
-
-featuresOriginal = np.copy(x)
-features, dropi, dropj, dropix = impute_dropout(featuresOriginal, rate=float(args.ratio))
-
-#transpose and add names for rows and cols
-features=np.transpose(features)
-rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
-features=np.concatenate([rowname,features],axis=1)
-colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
-features=np.concatenate([colname,features],axis=0)
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-# gene_dataset = CortexDataset(save_path=save_path, total_genes=558)
-gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/")
-
-n_epochs = 400 
-lr = 1e-3
-use_batches = False
-use_cuda = True 
-
-vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)
-trainer = UnsupervisedTrainer(
-    vae,
-    gene_dataset,
-    train_size=0.75,
-    use_cuda=use_cuda,
-    frequency=5,
-)
-
-trainer.train(n_epochs=n_epochs, lr=lr)
-
-
-full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
-latent, batch_indices, labels = full.sequential().get_latent()
-batch_indices = batch_indices.ravel()
-
-# use imputation
-imputed_values = full.sequential().imputation()
-normalized_values = full.sequential().get_sample_scale()
-
-np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed_values)
-np.save(save_path+'{}_{}_recon_normalized.npy'.format(datasetNameStr,args.ratio),normalized_values)
-np.save(save_path+'{}_{}_featuresOriginal.npy'.format(datasetNameStr,args.ratio),featuresOriginal)
-np.save(save_path+'{}_{}_dropi.npy'.format(datasetNameStr,args.ratio),dropi)
-np.save(save_path+'{}_{}_dropj.npy'.format(datasetNameStr,args.ratio),dropj)
-np.save(save_path+'{}_{}_dropix.npy'.format(datasetNameStr,args.ratio),dropix)
-
-# celltype:
-#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent)
diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute/MAGIC_impute.py
similarity index 89%
rename from codesfromJGandYJ/impute code/MAGIC_impute.py
rename to codesfromJGandYJ/impute/MAGIC_impute.py
index 954cd4c..da7b573 100644
--- a/codesfromJGandYJ/impute code/MAGIC_impute.py	
+++ b/codesfromJGandYJ/impute/MAGIC_impute.py
@@ -7,10 +7,10 @@
 import sys
 #from benchmark_util import impute_dropout
 
-parser = argparse.ArgumentParser(description='')
+parser = argparse.ArgumentParser(description='MAGIC Impute')
+# In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
 
diff --git a/codesfromJGandYJ/impute code/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
similarity index 100%
rename from codesfromJGandYJ/impute code/SAVER_impute.py
rename to codesfromJGandYJ/impute/SAVER_impute.py
diff --git a/codesfromJGandYJ/impute code/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py
similarity index 100%
rename from codesfromJGandYJ/impute code/SCIMPUTE.py
rename to codesfromJGandYJ/impute/SCIMPUTE.py
diff --git a/codesfromJGandYJ/impute code/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
similarity index 100%
rename from codesfromJGandYJ/impute code/dca_impute.py
rename to codesfromJGandYJ/impute/dca_impute.py
diff --git a/codesfromJGandYJ/impute code/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
similarity index 100%
rename from codesfromJGandYJ/impute code/deepimpute_impute.py
rename to codesfromJGandYJ/impute/deepimpute_impute.py
diff --git a/codesfromJGandYJ/impute code/other_magic.sh b/codesfromJGandYJ/impute/other_magic.sh
similarity index 100%
rename from codesfromJGandYJ/impute code/other_magic.sh
rename to codesfromJGandYJ/impute/other_magic.sh
diff --git a/codesfromJGandYJ/impute/other_scvi.sh b/codesfromJGandYJ/impute/other_scvi.sh
new file mode 100644
index 0000000..888d89b
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_scvi.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J scvi
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore scVi_impute.py
diff --git a/codesfromJGandYJ/impute code/saucie_impute_t.py b/codesfromJGandYJ/impute/saucie_impute_t.py
similarity index 100%
rename from codesfromJGandYJ/impute code/saucie_impute_t.py
rename to codesfromJGandYJ/impute/saucie_impute_t.py
diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
new file mode 100644
index 0000000..21594ef
--- /dev/null
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -0,0 +1,92 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset
+from scvi.models import VAE
+from scvi.inference import UnsupervisedTrainer
+import torch
+import csv
+import argparse
+import sys
+from benchmark_util import impute_dropout
+
+# pip install scvi==0.6.3
+parser = argparse.ArgumentParser(description='')
+# In this script, not using arguments
+parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+args = parser.parse_args()
+
+# Ref:
+# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
+
+
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+
+    features = np.copy(x)
+
+    #transpose and add names for rows and cols
+    features=np.transpose(features)
+    rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
+    features=np.concatenate([rowname,features],axis=1)
+    colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
+    features=np.concatenate([colname,features],axis=0)
+
+    #write
+    dropout_filename = save_path+"dropout.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    # gene_dataset = CortexDataset(save_path=save_path, total_genes=558)
+    gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/")
+
+    n_epochs = 400 
+    lr = 1e-3
+    use_batches = False
+    use_cuda = False 
+
+    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)
+    trainer = UnsupervisedTrainer(
+        vae,
+        gene_dataset,
+        train_size=0.75,
+        use_cuda=use_cuda,
+        frequency=5,
+    )
+
+    trainer.train(n_epochs=n_epochs, lr=lr)
+
+    full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
+    latent, batch_indices, labels = full.sequential().get_latent()
+    batch_indices = batch_indices.ravel()
+
+    # use imputation
+    imputed_values = full.sequential().imputation()
+    normalized_values = full.sequential().get_sample_scale()
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon_normalized.npy'.format(datasetName,ratio,seed),normalized_values)
+
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1]
+
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio)
+
+# celltype:
+#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent)

From ec3ab735197e2f6c336f0030d12db8927ba17fd7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 18:11:51 -0600
Subject: [PATCH 038/117] add scvi

---
 codesfromJGandYJ/impute/scVi_impute.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
index 21594ef..0bdc546 100644
--- a/codesfromJGandYJ/impute/scVi_impute.py
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -9,7 +9,6 @@
 import csv
 import argparse
 import sys
-from benchmark_util import impute_dropout
 
 # pip install scvi==0.6.3
 parser = argparse.ArgumentParser(description='')

From 87b5205991bd121c65d4d45cf1561bcd0ba10301 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 18:16:19 -0600
Subject: [PATCH 039/117] add scvi

---
 codesfromJGandYJ/impute/scVi_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
index 0bdc546..28fd724 100644
--- a/codesfromJGandYJ/impute/scVi_impute.py
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -48,7 +48,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
         writer.writerows(features)
 
     # gene_dataset = CortexDataset(save_path=save_path, total_genes=558)
-    gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/")
+    gene_dataset = CsvDataset(dropout_filename, save_path=save_path)
 
     n_epochs = 400 
     lr = 1e-3

From 09a1ae34cbe12ba49a3486344cc81575e5ca9324 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 19:34:53 -0600
Subject: [PATCH 040/117] add asucie

---
 codesfromJGandYJ/impute/SAUCIE_impute.py   | 44 +++++++++++++++++
 codesfromJGandYJ/impute/other_saucie.py    | 20 ++++++++
 codesfromJGandYJ/impute/saucie_impute_t.py | 55 ----------------------
 codesfromJGandYJ/impute/scVi_impute.py     |  2 +-
 4 files changed, 65 insertions(+), 56 deletions(-)
 create mode 100644 codesfromJGandYJ/impute/SAUCIE_impute.py
 create mode 100644 codesfromJGandYJ/impute/other_saucie.py
 delete mode 100644 codesfromJGandYJ/impute/saucie_impute_t.py

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
new file mode 100644
index 0000000..e79f754
--- /dev/null
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -0,0 +1,44 @@
+import sys
+import tensorflow as tf
+import SAUCIE
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import argparse
+
+parser = argparse.ArgumentParser(description='Impute use SAUCIE')
+# In this script, not using arguments
+parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+args = parser.parse_args()
+
+
+def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+
+    x=np.transpose(x)
+
+    saucie = SAUCIE.SAUCIE(x.shape[1])
+    loadtrain = SAUCIE.Loader(x, shuffle=True)
+    saucie.train(loadtrain, steps=1000)
+
+    loadeval = SAUCIE.Loader(x, shuffle=False)
+    reconstruction = saucie.get_reconstruction(loadeval)
+
+    reconstruction=np.transpose(reconstruction)
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1]
+
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.py
new file mode 100644
index 0000000..51a90c9
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_saucie.py
@@ -0,0 +1,20 @@
+#!/bin/bash
+#-------------------------------------------------------------------------------
+#  SBATCH CONFIG
+#-------------------------------------------------------------------------------
+## resources
+#SBATCH -A xulab
+#SBATCH --partition gpu4
+#SBATCH --cpus-per-task=1  # cores per task
+#SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
+#SBATCH --time 2-00:00     # days-hours:minutes
+#SBATCH --gres gpu:1 #gpu:1 any gpu
+## labels and outputs
+#SBATCH --job-name=modelpyenetCB-%j.out
+#SBATCH --output=results-%j.out  # %j is the unique jobID
+#################################################################
+
+module load miniconda3
+source activate /storage/htc/joshilab/wangjue/conda_R_gpu
+module load cuda/cuda-10.1.243
+python3 -W ignore SAUCIE_impute.py
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/saucie_impute_t.py b/codesfromJGandYJ/impute/saucie_impute_t.py
deleted file mode 100644
index 5831c63..0000000
--- a/codesfromJGandYJ/impute/saucie_impute_t.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-import tensorflow as tf
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/SAUCIE-master/SAUCIE-master/')
-from model import SAUCIE
-from loader import Loader
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-import argparse
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False, 
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-args = parser.parse_args()
-
-# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-
-x=np.transpose(x)
-
-saucie = SAUCIE(x.shape[1])
-loadtrain = Loader(x, shuffle=True)
-saucie.train(loadtrain, steps=1000)
-
-loadeval = Loader(x, shuffle=False)
-reconstruction = saucie.get_reconstruction(loadeval)
-
-reconstruction=np.transpose(reconstruction)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix)
-# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
-
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction)
-
-
diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
index 28fd724..3044585 100644
--- a/codesfromJGandYJ/impute/scVi_impute.py
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -11,7 +11,7 @@
 import sys
 
 # pip install scvi==0.6.3
-parser = argparse.ArgumentParser(description='')
+parser = argparse.ArgumentParser(description='scVi imputation')
 # In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
 parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')

From 0ed5e109737027f2734dd1d65a4fc6267ab4ee26 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 22:26:16 -0600
Subject: [PATCH 041/117] tmp dca

---
 codesfromJGandYJ/impute/dca_impute.py   | 81 +++++++------------------
 codesfromJGandYJ/impute/other_saucie.py |  1 +
 2 files changed, 22 insertions(+), 60 deletions(-)

diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index 0496364..db577e2 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -1,23 +1,3 @@
-#from dca.api import dca
-#import anndata
-#import matplotlib.pyplot as plt
-#import numpy as np
-#import time
-#import pandas as pd
-
-#Ref:
-# https://github.com/theislab/dca/blob/master/tutorial.ipynb
-#z = pd.read_csv('/home/wangjue/biodata/scData/MMPbasal.csv')
-#z = z.to_numpy()
-#z = z[:,:-1]
-
-#selected = np.std(z, axis=0).argsort()[-2000:][::-1]
-#expression_data = z[:, selected]
-
-#train = anndata.AnnData(expression_data)
-#res = dca(train, verbose=True)
-#train.X
-
 import os
 import numpy as np
 import pandas as pd
@@ -26,54 +6,35 @@
 import csv
 import argparse
 import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
 
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
+parser = argparse.ArgumentParser(description='Imputation DCA')
+# In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/dca/',
-                    help='output filefolder')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
 
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-
-
-features=x.T
+def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
 
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
+    save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data)
 
+    features=x.T
 
+    #write
+    dropout_filename = save_path+datasetNameStr+"_dropout.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
 
-os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr)
+    os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr)
 
-filename=save_path+datasetNameStr+"/mean.tsv"
-imputed_values = pd.read_csv(filename,sep="\t")
-imputed_values=imputed_values.T
+    filename=save_path+datasetNameStr+"/mean.tsv"
+    imputed_values = pd.read_csv(filename,sep="\t")
+    imputed_values=imputed_values.T
 
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values)
\ No newline at end of file
+    np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values)
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.py
index 51a90c9..45bef22 100644
--- a/codesfromJGandYJ/impute/other_saucie.py
+++ b/codesfromJGandYJ/impute/other_saucie.py
@@ -8,6 +8,7 @@
 #SBATCH --cpus-per-task=1  # cores per task
 #SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
 #SBATCH --time 2-00:00     # days-hours:minutes
+#SBATCH -J SAUCIE
 #SBATCH --gres gpu:1 #gpu:1 any gpu
 ## labels and outputs
 #SBATCH --job-name=modelpyenetCB-%j.out

From 42578da759d619d2c4ee8d02c4cd7b93ec76a3eb Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 23:50:41 -0600
Subject: [PATCH 042/117] add dca/deepimpute

---
 codesfromJGandYJ/impute/SAUCIE_impute.py     |  2 +-
 codesfromJGandYJ/impute/dca_impute.py        | 20 +++--
 codesfromJGandYJ/impute/deepimpute_impute.py | 80 +++++++++-----------
 codesfromJGandYJ/impute/other_dca.sh         | 14 ++++
 codesfromJGandYJ/impute/other_deepimpute.py  | 14 ++++
 codesfromJGandYJ/impute/scVi_impute.py       |  4 +-
 util_function.py                             | 42 ++++++++++
 7 files changed, 123 insertions(+), 53 deletions(-)
 create mode 100644 codesfromJGandYJ/impute/other_dca.sh
 create mode 100644 codesfromJGandYJ/impute/other_deepimpute.py

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index e79f754..3aadce7 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -36,7 +36,7 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']
-ratioList = [0.1]
+ratioList = [0.1, 0.3, 0.6, 0.8]
 
 for datasetName in datasetNameList:
     for seed in seedList:
diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index db577e2..95fbca7 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -13,6 +13,7 @@
 parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
 
 def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
@@ -21,20 +22,27 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     x=x.todense()
     x=np.asarray(x)
 
-    save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data)
-
     features=x.T
 
     #write
-    dropout_filename = save_path+datasetNameStr+"_dropout.csv"
+    dropout_filename = save_path+"dca_input.csv"
     with open(dropout_filename, "w") as f:
         writer = csv.writer(f)
         writer.writerows(features)
 
-    os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr)
+    os.system("dca "+dropout_filename+ " "+save_path+"dca_output.csv")
 
-    filename=save_path+datasetNameStr+"/mean.tsv"
+    filename=save_path+"dca_output.csv"
     imputed_values = pd.read_csv(filename,sep="\t")
     imputed_values=imputed_values.T
 
-    np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values)
\ No newline at end of file
+    np.save('/storage/htc/joshilab/wangjue/scGNN/dca/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_dca(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
index 6c31962..14f5a35 100644
--- a/codesfromJGandYJ/impute/deepimpute_impute.py
+++ b/codesfromJGandYJ/impute/deepimpute_impute.py
@@ -7,53 +7,45 @@
 import csv
 import argparse
 import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
 
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
+parser = argparse.ArgumentParser(description='Impute Deepimpute')
+# In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/deepimpute/',
-                    help='output filefolder')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
 # Ref:
 # https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-#x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-features=x
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-data = pd.read_csv(dropout_filename, header=None)
-model = MultiNet()
-model.fit(data)
-imputed = model.predict(data)
-
-
-np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed)
-
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    
+    save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data)
+
+    features=x
+    dropout_filename = save_path+"deepimpute.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    data = pd.read_csv(dropout_filename, header=None)
+    model = MultiNet()
+    model.fit(data)
+    imputed = model.predict(data)
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/deepimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
new file mode 100644
index 0000000..be4dfe9
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J dca
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore dca_impute.py
diff --git a/codesfromJGandYJ/impute/other_deepimpute.py b/codesfromJGandYJ/impute/other_deepimpute.py
new file mode 100644
index 0000000..b55d6c6
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_deepimpute.py
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J deepimpute
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore deepimpute_impute.py
diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
index 3044585..5710e36 100644
--- a/codesfromJGandYJ/impute/scVi_impute.py
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -42,7 +42,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
     features=np.concatenate([colname,features],axis=0)
 
     #write
-    dropout_filename = save_path+"dropout.csv"
+    dropout_filename = save_path+"scvi.csv"
     with open(dropout_filename, "w") as f:
         writer = csv.writer(f)
         writer.writerows(features)
@@ -80,7 +80,7 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']
-ratioList = [0.1]
+ratioList = [0.1, 0.3, 0.6, 0.8]
 
 for datasetName in datasetNameList:
     for seed in seedList:
diff --git a/util_function.py b/util_function.py
index 103b585..a997186 100644
--- a/util_function.py
+++ b/util_function.py
@@ -199,6 +199,48 @@ def __getitem__(self, idx):
        
         return sample,idx
 
+class scDatasetDropoutSparse(Dataset):
+    def __init__(self, data=None, discreteTag=False, ratio=0.1, seed=1, transform=None):
+        """
+        Args:
+            Sparse
+            datasetName (String): TGFb, etc.
+            transform (callable, optional):
+        """
+
+        self.featuresOriginal = data.transpose()
+        self.ratio = ratio
+        # Random seed
+        # np.random.uniform(1, 2) 
+        self.features, self.i, self.j, self.ix = impute_dropout(self.featuresOriginal, seed=seed, rate=self.ratio) 
+        # Now lines are cells, and cols are genes
+        # self.features = self.features.transpose()
+        self.transform = transform  
+        # check whether log or not
+        self.discreteTag = discreteTag       
+
+    def __len__(self):
+        return self.features.shape[0]
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+
+        sample = self.features[idx,:]
+        if type(sample)==sp.lil_matrix:
+            sample = torch.from_numpy(sample.toarray())
+        else:
+            sample = torch.from_numpy(sample)
+        
+        # transform after get the data
+        if self.transform:
+            sample = self.transform(sample)
+        
+        if not self.discreteTag:
+            sample = torch.log(sample+1)
+       
+        return sample,idx
+
 class scDataset(Dataset):
     def __init__(self, data=None, transform=None):
         """

From e9379022f354014ecd00b69f87ffbaa035f72818 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 23:53:10 -0600
Subject: [PATCH 043/117] add deep impute and asucie

---
 .../impute/{other_deepimpute.py => other_deepimpute.sh}           | 0
 codesfromJGandYJ/impute/{other_saucie.py => other_saucie.sh}      | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename codesfromJGandYJ/impute/{other_deepimpute.py => other_deepimpute.sh} (100%)
 rename codesfromJGandYJ/impute/{other_saucie.py => other_saucie.sh} (100%)

diff --git a/codesfromJGandYJ/impute/other_deepimpute.py b/codesfromJGandYJ/impute/other_deepimpute.sh
similarity index 100%
rename from codesfromJGandYJ/impute/other_deepimpute.py
rename to codesfromJGandYJ/impute/other_deepimpute.sh
diff --git a/codesfromJGandYJ/impute/other_saucie.py b/codesfromJGandYJ/impute/other_saucie.sh
similarity index 100%
rename from codesfromJGandYJ/impute/other_saucie.py
rename to codesfromJGandYJ/impute/other_saucie.sh

From 66e22044bd7e6174fb388fbe1859d9fc563918d9 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 24 Nov 2020 23:57:59 -0600
Subject: [PATCH 044/117] update deepimpute

---
 codesfromJGandYJ/impute/deepimpute_impute.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
index 14f5a35..03b3da4 100644
--- a/codesfromJGandYJ/impute/deepimpute_impute.py
+++ b/codesfromJGandYJ/impute/deepimpute_impute.py
@@ -25,8 +25,6 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1):
     x=x.todense()
     x=np.asarray(x)
     x=np.log(x+1)
-    
-    save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data)
 
     features=x
     dropout_filename = save_path+"deepimpute.csv"

From 65b5203419da4fb60cd140184f9996f7a2ed907c Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 00:02:05 -0600
Subject: [PATCH 045/117] update dca

---
 codesfromJGandYJ/impute/other_dca.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
index be4dfe9..561a761 100644
--- a/codesfromJGandYJ/impute/other_dca.sh
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -10,5 +10,5 @@
 #SBATCH --mem=128G
 #################################################################
 module load miniconda3
-source activate conda_R
+source activate /storage/htc/joshilab/wangjue/conda_R_gpu
 python3 -W ignore dca_impute.py

From 10d6387f873f88b76f393b100e84d5ce9d621dd9 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 00:16:24 -0600
Subject: [PATCH 046/117] update deepimpute to raw counts

---
 codesfromJGandYJ/impute/deepimpute_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
index 03b3da4..bcf1a3a 100644
--- a/codesfromJGandYJ/impute/deepimpute_impute.py
+++ b/codesfromJGandYJ/impute/deepimpute_impute.py
@@ -24,7 +24,7 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1):
     x = x.tolist()
     x=x.todense()
     x=np.asarray(x)
-    x=np.log(x+1)
+    # x=np.log(x+1)
 
     features=x
     dropout_filename = save_path+"deepimpute.csv"

From d36c62a5fb4eff8f1c7c34b71c8466e2288a178c Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 00:21:07 -0600
Subject: [PATCH 047/117] update GPU settings

---
 codesfromJGandYJ/impute/other_dca.sh    | 23 +++++++++++++++--------
 codesfromJGandYJ/impute/other_saucie.sh |  2 +-
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
index 561a761..c30f0ef 100644
--- a/codesfromJGandYJ/impute/other_dca.sh
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -1,14 +1,21 @@
-#! /bin/bash
-######################### Batch Headers #########################
+#!/bin/bash
+#-------------------------------------------------------------------------------
+#  SBATCH CONFIG
+#-------------------------------------------------------------------------------
+## resources
 #SBATCH -A xulab
-#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH --partition gpu3,gpu4
+#SBATCH --cpus-per-task=1  # cores per task
+#SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
+#SBATCH --time 2-00:00     # days-hours:minutes
 #SBATCH -J dca
-#SBATCH -o results-%j.out           # give the job output a custom name
-#SBATCH -t 2-00:00                  # two days time limit
-#SBATCH -N 1                        # number of nodes
-#SBATCH -n 1                        # number of cores (AKA tasks)
-#SBATCH --mem=128G
+#SBATCH --gres gpu:1 #gpu:1 any gpu
+## labels and outputs
+#SBATCH --job-name=modelpyenetCB-%j.out
+#SBATCH --output=results-%j.out  # %j is the unique jobID
 #################################################################
+
 module load miniconda3
 source activate /storage/htc/joshilab/wangjue/conda_R_gpu
+module load cuda/cuda-10.1.243
 python3 -W ignore dca_impute.py
diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
index 45bef22..4716bb6 100644
--- a/codesfromJGandYJ/impute/other_saucie.sh
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -4,7 +4,7 @@
 #-------------------------------------------------------------------------------
 ## resources
 #SBATCH -A xulab
-#SBATCH --partition gpu4
+#SBATCH --partition gpu3,gpu4
 #SBATCH --cpus-per-task=1  # cores per task
 #SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
 #SBATCH --time 2-00:00     # days-hours:minutes

From d8469d67c183170f0fe030bb39ef683b81f7cf3b Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 10:28:41 -0600
Subject: [PATCH 048/117] update saucie

---
 codesfromJGandYJ/impute/other_dca.sh    | 2 +-
 codesfromJGandYJ/impute/other_saucie.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
index c30f0ef..0dad353 100644
--- a/codesfromJGandYJ/impute/other_dca.sh
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -16,6 +16,6 @@
 #################################################################
 
 module load miniconda3
-source activate /storage/htc/joshilab/wangjue/conda_R_gpu
+source activate /storage/htc/joshilab/wangjue/conda_R_dca
 module load cuda/cuda-10.1.243
 python3 -W ignore dca_impute.py
diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
index 4716bb6..75ce679 100644
--- a/codesfromJGandYJ/impute/other_saucie.sh
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -16,6 +16,6 @@
 #################################################################
 
 module load miniconda3
-source activate /storage/htc/joshilab/wangjue/conda_R_gpu
+source activate /storage/htc/joshilab/wangjue/conda_R_saucie
 module load cuda/cuda-10.1.243
 python3 -W ignore SAUCIE_impute.py
\ No newline at end of file

From 992580ea323f686b2b0a16cea604b844be9e36e7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 10:37:28 -0600
Subject: [PATCH 049/117] update saucie directory

---
 codesfromJGandYJ/impute/SAUCIE_impute.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 3aadce7..4bcbbcd 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -5,6 +5,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 import argparse
+sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/")
 
 parser = argparse.ArgumentParser(description='Impute use SAUCIE')
 # In this script, not using arguments

From d9d4932eb773b8ad6d316df442d01bc6154325d1 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 10:41:54 -0600
Subject: [PATCH 050/117] update saucie directory

---
 codesfromJGandYJ/impute/SAUCIE_impute.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 4bcbbcd..5fc479e 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -1,11 +1,12 @@
 import sys
+sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/")
 import tensorflow as tf
 import SAUCIE
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import argparse
-sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/")
+
 
 parser = argparse.ArgumentParser(description='Impute use SAUCIE')
 # In this script, not using arguments

From 786b79cb11c18d772fa3be697fdefa379d8c080e Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 10:45:48 -0600
Subject: [PATCH 051/117] update saucie directory

---
 codesfromJGandYJ/impute/SAUCIE_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 5fc479e..36cd755 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -1,5 +1,5 @@
 import sys
-sys.path.append("/storage/htc/joshilab/wangjue/SAUCIE/")
+sys.path.append("/storage/htc/joshilab/wangjue/")
 import tensorflow as tf
 import SAUCIE
 import numpy as np

From a9c96833f963cb99591350e83edb47b23d6d14bb Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 10:50:41 -0600
Subject: [PATCH 052/117] update saucie directory

---
 codesfromJGandYJ/impute/other_saucie.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
index 75ce679..01fec69 100644
--- a/codesfromJGandYJ/impute/other_saucie.sh
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -16,6 +16,7 @@
 #################################################################
 
 module load miniconda3
-source activate /storage/htc/joshilab/wangjue/conda_R_saucie
+# source activate /storage/htc/joshilab/wangjue/conda_R_saucie
+source activate /storage/htc/joshilab/wangjue/conda_R_gpu
 module load cuda/cuda-10.1.243
 python3 -W ignore SAUCIE_impute.py
\ No newline at end of file

From c9326143f7330b0c54611afd69bf28ff59362076 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 25 Nov 2020 12:55:24 -0600
Subject: [PATCH 053/117] update saucie directory

---
 codesfromJGandYJ/impute/SAUCIE_impute.py |  1 -
 codesfromJGandYJ/impute/dca_impute.py    |  1 -
 codesfromJGandYJ/impute/other_saucie.sh  | 29 ++++++++++--------------
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 36cd755..7d3f5cb 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -1,6 +1,5 @@
 import sys
 sys.path.append("/storage/htc/joshilab/wangjue/")
-import tensorflow as tf
 import SAUCIE
 import numpy as np
 import matplotlib.pyplot as plt
diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index 95fbca7..8fd9519 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -2,7 +2,6 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-import torch
 import csv
 import argparse
 import sys
diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
index 01fec69..f517112 100644
--- a/codesfromJGandYJ/impute/other_saucie.sh
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -1,22 +1,17 @@
-#!/bin/bash
-#-------------------------------------------------------------------------------
-#  SBATCH CONFIG
-#-------------------------------------------------------------------------------
-## resources
+#! /bin/bash
+######################### Batch Headers #########################
 #SBATCH -A xulab
-#SBATCH --partition gpu3,gpu4
-#SBATCH --cpus-per-task=1  # cores per task
-#SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
-#SBATCH --time 2-00:00     # days-hours:minutes
-#SBATCH -J SAUCIE
-#SBATCH --gres gpu:1 #gpu:1 any gpu
-## labels and outputs
-#SBATCH --job-name=modelpyenetCB-%j.out
-#SBATCH --output=results-%j.out  # %j is the unique jobID
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J saucie
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
 #################################################################
 
 module load miniconda3
-# source activate /storage/htc/joshilab/wangjue/conda_R_saucie
-source activate /storage/htc/joshilab/wangjue/conda_R_gpu
-module load cuda/cuda-10.1.243
+source activate /storage/htc/joshilab/wangjue/conda_R_saucie
+# source activate /storage/htc/joshilab/wangjue/conda_R_gpu
+# module load cuda/cuda-10.1.243
 python3 -W ignore SAUCIE_impute.py
\ No newline at end of file

From c74ffbdee8e3833f09c00da69df45b202270b1a7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 07:59:45 -0600
Subject: [PATCH 054/117] modify saucie

---
 codesfromJGandYJ/impute/SAUCIE_impute.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 7d3f5cb..ae7018d 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -1,6 +1,7 @@
 import sys
 sys.path.append("/storage/htc/joshilab/wangjue/")
 import SAUCIE
+import tensorflow as tf
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
@@ -13,7 +14,7 @@
 parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
-
+# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb
 def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
     filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
     x = np.load(filename,allow_pickle=True)
@@ -21,18 +22,17 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
     x=x.todense()
     x=np.asarray(x)
     x=np.log(x+1)
-
     x=np.transpose(x)
-
-    saucie = SAUCIE.SAUCIE(x.shape[1])
-    loadtrain = SAUCIE.Loader(x, shuffle=True)
-    saucie.train(loadtrain, steps=1000)
-
-    loadeval = SAUCIE.Loader(x, shuffle=False)
-    reconstruction = saucie.get_reconstruction(loadeval)
-
+    loader_train = SAUCIE.Loader(x, shuffle=True)
+    loader_eval = SAUCIE.Loader(x, shuffle=False)
+    # clear the computational graph
+    tf.reset_default_graph()
+    # build the SAUCIE model
+    model = SAUCIE.SAUCIE(x.shape[1])
+    # train the model!
+    model.train(loader_train, steps=2000)
+    reconstruction = model.get_reconstruction(loader_eval)
     reconstruction=np.transpose(reconstruction)
-
     np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction)
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']

From 1fe435aac44b6f4e8e143831bd83803d57cd574e Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 09:36:03 -0600
Subject: [PATCH 055/117] add dca update

---
 codesfromJGandYJ/impute/dca_impute.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index 8fd9519..3adba5d 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -19,8 +19,7 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     x = np.load(filename,allow_pickle=True)
     x = x.tolist()
     x=x.todense()
-    x=np.asarray(x)
-
+    # x=np.asarray(x)
     features=x.T
 
     #write
@@ -29,9 +28,9 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
         writer = csv.writer(f)
         writer.writerows(features)
 
-    os.system("dca "+dropout_filename+ " "+save_path+"dca_output.csv")
+    os.system("dca "+dropout_filename+ " "+save_path+"tmpdca")
 
-    filename=save_path+"dca_output.csv"
+    filename=save_path+"tmpdca/mean.tsv"
     imputed_values = pd.read_csv(filename,sep="\t")
     imputed_values=imputed_values.T
 

From 3a12bb0a432bf60e6ef15f276e1fc19d94e6c89d Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 09:43:41 -0600
Subject: [PATCH 056/117] add dca

---
 codesfromJGandYJ/impute/other_dca.sh | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
index 0dad353..02f64ca 100644
--- a/codesfromJGandYJ/impute/other_dca.sh
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -1,21 +1,15 @@
-#!/bin/bash
-#-------------------------------------------------------------------------------
-#  SBATCH CONFIG
-#-------------------------------------------------------------------------------
-## resources
+#! /bin/bash
+######################### Batch Headers #########################
 #SBATCH -A xulab
-#SBATCH --partition gpu3,gpu4
-#SBATCH --cpus-per-task=1  # cores per task
-#SBATCH --mem-per-cpu=12G  # memory per core (default is 1GB/core)
-#SBATCH --time 2-00:00     # days-hours:minutes
-#SBATCH -J dca
-#SBATCH --gres gpu:1 #gpu:1 any gpu
-## labels and outputs
-#SBATCH --job-name=modelpyenetCB-%j.out
-#SBATCH --output=results-%j.out  # %j is the unique jobID
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J DCA
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
 #################################################################
 
 module load miniconda3
 source activate /storage/htc/joshilab/wangjue/conda_R_dca
-module load cuda/cuda-10.1.243
 python3 -W ignore dca_impute.py

From 068559dbe21602b7e89a774af64c0d74490e8fe9 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 10:32:43 -0600
Subject: [PATCH 057/117] update dca

---
 codesfromJGandYJ/impute/dca_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index 3adba5d..c0d62ba 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -19,7 +19,7 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     x = np.load(filename,allow_pickle=True)
     x = x.tolist()
     x=x.todense()
-    # x=np.asarray(x)
+    x=np.asarray(x)
     features=x.T
 
     #write

From c3252d1150ecea9f8e6b409bd23d568110da91a0 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 10:33:33 -0600
Subject: [PATCH 058/117] update dca

---
 codesfromJGandYJ/impute/dca_impute.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index c0d62ba..a9c16b2 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -14,6 +14,7 @@
 
 save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
 
+# Ref: https://github.com/theislab/dca
 def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
     x = np.load(filename,allow_pickle=True)

From 87608115870e5fd63dd069384c59cae818fded74 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 15:32:48 -0600
Subject: [PATCH 059/117] add saver in impute

---
 codesfromJGandYJ/impute/SAUCIE_impute.py  |  6 +-
 codesfromJGandYJ/impute/SAVER_impute.py   | 81 ++++++++++++-----------
 codesfromJGandYJ/impute/SCIMPUTE.py       | 69 ++++++++++---------
 codesfromJGandYJ/impute/dca_impute.py     |  9 ++-
 codesfromJGandYJ/impute/other_saver.sh    | 14 ++++
 codesfromJGandYJ/impute/other_scimpute.sh | 14 ++++
 codesfromJGandYJ/impute/saver.r           | 16 +++++
 codesfromJGandYJ/impute/scimpute.r        |  1 +
 8 files changed, 133 insertions(+), 77 deletions(-)
 create mode 100644 codesfromJGandYJ/impute/other_saver.sh
 create mode 100644 codesfromJGandYJ/impute/other_scimpute.sh
 create mode 100644 codesfromJGandYJ/impute/saver.r
 create mode 100644 codesfromJGandYJ/impute/scimpute.r

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index ae7018d..07d7bdc 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -7,6 +7,11 @@
 import pandas as pd
 import argparse
 
+# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb
+# Notes: Have to use very old tensorflow downloaded from conda:
+# python==3.6.12
+# tensorflow==1.4.0
+# numpy==1.19.4
 
 parser = argparse.ArgumentParser(description='Impute use SAUCIE')
 # In this script, not using arguments
@@ -14,7 +19,6 @@
 parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
-# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb
 def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
     filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
     x = np.load(filename,allow_pickle=True)
diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
index 5d32405..3425dfc 100644
--- a/codesfromJGandYJ/impute/SAVER_impute.py
+++ b/codesfromJGandYJ/impute/SAVER_impute.py
@@ -5,51 +5,52 @@
 import csv
 import argparse
 import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
 
+# Ref:
+# https://mohuangx.github.io/SAVER/articles/saver-tutorial.html
+# Use python to generate input for saver.r, then output
 
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
+parser = argparse.ArgumentParser(description='Impute SAVER')
+# In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/',
-                    help='output filefolder')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saver/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-features=x
-
-
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    features=x.T
+
+    #write
+    dropout_filename = save_path+"saver_input.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    #run the R script
+    os.system("Rscript saver.r "+save_path+"saver_input.csv "+save_path+"saver_output.csv ")
+
+    filename=save_path+"saver_output.csv"
+    imputed_values = pd.read_csv(filename,sep="\t")
+    imputed_values=imputed_values.T
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_saver(seed=seed, datasetName=datasetName, ratio=ratio)
 
 
 
diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py
index 246239d..730bba9 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE.py
@@ -5,52 +5,51 @@
 import csv
 import argparse
 import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
 
+# Notes in install scimpute:
+# Have to add in R: 
+# Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS=TRUE)
+# Ref: https://github.com/Vivianstats/scImpute
 
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
+parser = argparse.ArgumentParser(description='Impute scImpute')
+# In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/',
-                    help='output filefolder')
+parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
 args = parser.parse_args()
 
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
 
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scimpute/{}/'.format(args.data)
+def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
 
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    features=x.T
 
-features=x
+    #write
+    dropout_filename = save_path+"saver_input.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
 
+    #run the R script
+    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv")
 
+    filename=save_path+"scimpute_input.csv"
+    imputed_values = pd.read_csv(filename,sep="\t")
+    imputed_values=imputed_values.T
 
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
+    np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
 
+for datasetName in datasetNameList:
+    for seed in seedList:
+        for ratio in ratioList:        
+            impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio)
 
diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index a9c16b2..46ecc8a 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -6,6 +6,14 @@
 import argparse
 import sys
 
+# Ref: https://github.com/theislab/dca
+# Notes: As tensorflow comes to 2.0 version, lots of things chagned, here is the version tested in Nov.26, 2020
+# python==3.7.9
+# tensorflow==1.15.4
+# keras==2.3.1
+# theano==1.0.5
+# scanpy==1.5.1
+
 parser = argparse.ArgumentParser(description='Imputation DCA')
 # In this script, not using arguments
 parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
@@ -14,7 +22,6 @@
 
 save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
 
-# Ref: https://github.com/theislab/dca
 def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
     x = np.load(filename,allow_pickle=True)
diff --git a/codesfromJGandYJ/impute/other_saver.sh b/codesfromJGandYJ/impute/other_saver.sh
new file mode 100644
index 0000000..17aa82b
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_saver.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Saver
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 12                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore SAVER_impute.py
diff --git a/codesfromJGandYJ/impute/other_scimpute.sh b/codesfromJGandYJ/impute/other_scimpute.sh
new file mode 100644
index 0000000..8dad300
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_scimpute.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J scimpute
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 12                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore SCIMPUTE_impute.py
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/saver.r b/codesfromJGandYJ/impute/saver.r
new file mode 100644
index 0000000..1b0953b
--- /dev/null
+++ b/codesfromJGandYJ/impute/saver.r
@@ -0,0 +1,16 @@
+# Usage:
+# Rscript saver.r input.txt output.txt
+# test if there is one argument: if not, return an error
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file)\n", call.=FALSE)
+} 
+
+library(SAVER)
+inputfile = args[1]
+outputfile = args[2]
+raw.data <- read.csv(inputfile, header = FALSE, sep=',')
+expr <- as.matrix(raw.data)
+# Use 12 cores in saver
+expr.saver <- saver(expr, ncores = 12, estimates.only = TRUE)
+write.table(expr.saver, file=outputfile, row.names = F, col.names = F, sep = "\t")
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r
new file mode 100644
index 0000000..503fa1d
--- /dev/null
+++ b/codesfromJGandYJ/impute/scimpute.r
@@ -0,0 +1 @@
+#TODO
\ No newline at end of file

From 64458d4fdb4e154ad3b9c679ef987cecd5e8dfd7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 15:50:16 -0600
Subject: [PATCH 060/117] add scimpute in imputation

---
 codesfromJGandYJ/impute/SAVER_impute.py |  1 +
 codesfromJGandYJ/impute/SCIMPUTE.py     |  8 ++++----
 codesfromJGandYJ/impute/scimpute.r      | 22 +++++++++++++++++++++-
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
index 3425dfc..05a5b2a 100644
--- a/codesfromJGandYJ/impute/SAVER_impute.py
+++ b/codesfromJGandYJ/impute/SAVER_impute.py
@@ -7,6 +7,7 @@
 import sys
 
 # Ref:
+# https://github.com/mohuangx/SAVER
 # https://mohuangx.github.io/SAVER/articles/saver-tutorial.html
 # Use python to generate input for saver.r, then output
 
diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE.py
index 730bba9..7f44de8 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE.py
@@ -36,13 +36,13 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
         writer.writerows(features)
 
     #run the R script
-    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv")
+    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"/tmpscimpute/ scimpute_output.csv")
 
-    filename=save_path+"scimpute_input.csv"
-    imputed_values = pd.read_csv(filename,sep="\t")
+    filename=save_path+"/tmpscimpute/scimpute_output.csv"
+    imputed_values = pd.read_csv(filename,sep=",")
     imputed_values=imputed_values.T
 
-    np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']
diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r
index 503fa1d..d4ed3f3 100644
--- a/codesfromJGandYJ/impute/scimpute.r
+++ b/codesfromJGandYJ/impute/scimpute.r
@@ -1 +1,21 @@
-#TODO
\ No newline at end of file
+# Usage:
+# Rscript scImpute.r input.txt output.txt
+# test if there is one argument: if not, return an error
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file)\n", call.=FALSE)
+} 
+
+library(scImpute)
+inputfile = args[1]
+outputDir = args[2]
+outputfile = args[3]
+scimpute(# full path to raw count matrix
+         count_path = inputfile, 
+         infile = "csv",           # format of input file
+         outfile = "csv",          # format of output file
+         out_dir = "./",           # full path to output directory
+         labeled = outputDir,          # cell type labels not available
+         drop_thre = 0.5,          # threshold set on dropout probability
+         Kcluster = 2,             # 2 cell subpopulations
+         ncores = 12)              # number of cores used in parallel computation

From 583ab13e20e9ff9c5f5f77409123c2120c81a582 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 15:53:08 -0600
Subject: [PATCH 061/117] add scimpute in imputation

---
 codesfromJGandYJ/impute/{SCIMPUTE.py => SCIMPUTE_impute.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename codesfromJGandYJ/impute/{SCIMPUTE.py => SCIMPUTE_impute.py} (100%)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
similarity index 100%
rename from codesfromJGandYJ/impute/SCIMPUTE.py
rename to codesfromJGandYJ/impute/SCIMPUTE_impute.py

From 3cf5d3c8db87033cb540ee5b59eee6c8c410e4b8 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 15:59:06 -0600
Subject: [PATCH 062/117] add scimpute in imputation

---
 codesfromJGandYJ/impute/SCIMPUTE_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index 7f44de8..83519b2 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -30,7 +30,7 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
     features=x.T
 
     #write
-    dropout_filename = save_path+"saver_input.csv"
+    dropout_filename = save_path+"scimpute_input.csv"
     with open(dropout_filename, "w") as f:
         writer = csv.writer(f)
         writer.writerows(features)

From 7eb9a207b16e95e5fd78aab9db9518ae5de28b1f Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 16:06:43 -0600
Subject: [PATCH 063/117] add scimpute in imputation

---
 codesfromJGandYJ/impute/SCIMPUTE_impute.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index 83519b2..87826fb 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -27,7 +27,17 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
     x=x.todense()
     x=np.asarray(x)
     x=np.log(x+1)
-    features=x.T
+
+    features = np.copy(x)
+
+    #transpose and add names for rows and cols
+    features=np.transpose(features)
+    rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
+    features=np.concatenate([rowname,features],axis=1)
+    colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
+    features=np.concatenate([colname,features],axis=0)
+
+    features=features.T
 
     #write
     dropout_filename = save_path+"scimpute_input.csv"

From 9f064264db1878e70416db2fc153e4824b855bf6 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 17:36:55 -0600
Subject: [PATCH 064/117] add scimpute in imputation debug

---
 codesfromJGandYJ/impute/SCIMPUTE_impute.py | 11 ++++++++---
 codesfromJGandYJ/impute/scimpute.r         |  4 ++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index 87826fb..b19a8da 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -54,9 +54,14 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
 
     np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
-datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
-seedList = ['1','2','3']
-ratioList = [0.1, 0.3, 0.6, 0.8]
+# datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+# seedList = ['1','2','3']
+# ratioList = [0.1, 0.3, 0.6, 0.8]
+
+# Debug
+datasetNameList = ['12.Klein']
+seedList = ['1']
+ratioList = [0.1]
 
 for datasetName in datasetNameList:
     for seed in seedList:
diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r
index d4ed3f3..d7fbb16 100644
--- a/codesfromJGandYJ/impute/scimpute.r
+++ b/codesfromJGandYJ/impute/scimpute.r
@@ -14,8 +14,8 @@ scimpute(# full path to raw count matrix
          count_path = inputfile, 
          infile = "csv",           # format of input file
          outfile = "csv",          # format of output file
-         out_dir = "./",           # full path to output directory
-         labeled = outputDir,          # cell type labels not available
+         out_dir = outputDir,           # full path to output directory
+         labeled = FALSE,          # cell type labels not available
          drop_thre = 0.5,          # threshold set on dropout probability
          Kcluster = 2,             # 2 cell subpopulations
          ncores = 12)              # number of cores used in parallel computation

From df944a433c8a8c985abe750d753a6fb4d39f943c Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 18:08:27 -0600
Subject: [PATCH 065/117] move not using scripts in results to new folder

---
 results/{ => NotForFinalUsage}/calculateROGUE.R                   | 0
 results/{ => NotForFinalUsage}/compare_varID.py                   | 0
 results/{ => NotForFinalUsage}/jobinfo_imp_23dropout.txt          | 0
 results/{ => NotForFinalUsage}/jobinfo_imp_explore.txt            | 0
 results/{ => NotForFinalUsage}/jobinfo_imp_louvain_2.txt          | 0
 results/{ => NotForFinalUsage}/results_ROGUE.py                   | 0
 results/{ => NotForFinalUsage}/results_Reading.py                 | 0
 results/{ => NotForFinalUsage}/results_Reading_23.py              | 0
 results/{ => NotForFinalUsage}/results_Reading_23dropout.py       | 0
 results/{ => NotForFinalUsage}/results_Reading_explore.py         | 0
 results/{ => NotForFinalUsage}/results_Reading_graph.py           | 0
 results/{ => NotForFinalUsage}/results_imputation.sh              | 0
 results/{ => NotForFinalUsage}/results_imputation_0.3.sh          | 0
 results/{ => NotForFinalUsage}/results_imputation_grid.sh         | 0
 results/{ => NotForFinalUsage}/results_impute.py                  | 0
 results/{ => NotForFinalUsage}/results_impute_graph_ROC.py        | 0
 results/{ => NotForFinalUsage}/results_impute_graph_ROC.sh        | 0
 results/{ => NotForFinalUsage}/submitCluster_Result_Celltype.sh   | 0
 results/{ => NotForFinalUsage}/submitCluster_Result_Impute.sh     | 0
 results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23.sh  | 0
 .../submitCluster_Result_Impute_23dropout.sh                      | 0
 .../{ => NotForFinalUsage}/submitCluster_Result_Impute_explore.sh | 0
 .../{ => NotForFinalUsage}/submitCluster_Result_Impute_graph.sh   | 0
 results/{ => NotForFinalUsage}/summary.sh                         | 0
 results/{ => NotForFinalUsage}/summary_cmd.py                     | 0
 25 files changed, 0 insertions(+), 0 deletions(-)
 rename results/{ => NotForFinalUsage}/calculateROGUE.R (100%)
 rename results/{ => NotForFinalUsage}/compare_varID.py (100%)
 rename results/{ => NotForFinalUsage}/jobinfo_imp_23dropout.txt (100%)
 rename results/{ => NotForFinalUsage}/jobinfo_imp_explore.txt (100%)
 rename results/{ => NotForFinalUsage}/jobinfo_imp_louvain_2.txt (100%)
 rename results/{ => NotForFinalUsage}/results_ROGUE.py (100%)
 rename results/{ => NotForFinalUsage}/results_Reading.py (100%)
 rename results/{ => NotForFinalUsage}/results_Reading_23.py (100%)
 rename results/{ => NotForFinalUsage}/results_Reading_23dropout.py (100%)
 rename results/{ => NotForFinalUsage}/results_Reading_explore.py (100%)
 rename results/{ => NotForFinalUsage}/results_Reading_graph.py (100%)
 rename results/{ => NotForFinalUsage}/results_imputation.sh (100%)
 rename results/{ => NotForFinalUsage}/results_imputation_0.3.sh (100%)
 rename results/{ => NotForFinalUsage}/results_imputation_grid.sh (100%)
 rename results/{ => NotForFinalUsage}/results_impute.py (100%)
 rename results/{ => NotForFinalUsage}/results_impute_graph_ROC.py (100%)
 rename results/{ => NotForFinalUsage}/results_impute_graph_ROC.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Celltype.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_23dropout.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_explore.sh (100%)
 rename results/{ => NotForFinalUsage}/submitCluster_Result_Impute_graph.sh (100%)
 rename results/{ => NotForFinalUsage}/summary.sh (100%)
 rename results/{ => NotForFinalUsage}/summary_cmd.py (100%)

diff --git a/results/calculateROGUE.R b/results/NotForFinalUsage/calculateROGUE.R
similarity index 100%
rename from results/calculateROGUE.R
rename to results/NotForFinalUsage/calculateROGUE.R
diff --git a/results/compare_varID.py b/results/NotForFinalUsage/compare_varID.py
similarity index 100%
rename from results/compare_varID.py
rename to results/NotForFinalUsage/compare_varID.py
diff --git a/results/jobinfo_imp_23dropout.txt b/results/NotForFinalUsage/jobinfo_imp_23dropout.txt
similarity index 100%
rename from results/jobinfo_imp_23dropout.txt
rename to results/NotForFinalUsage/jobinfo_imp_23dropout.txt
diff --git a/results/jobinfo_imp_explore.txt b/results/NotForFinalUsage/jobinfo_imp_explore.txt
similarity index 100%
rename from results/jobinfo_imp_explore.txt
rename to results/NotForFinalUsage/jobinfo_imp_explore.txt
diff --git a/results/jobinfo_imp_louvain_2.txt b/results/NotForFinalUsage/jobinfo_imp_louvain_2.txt
similarity index 100%
rename from results/jobinfo_imp_louvain_2.txt
rename to results/NotForFinalUsage/jobinfo_imp_louvain_2.txt
diff --git a/results/results_ROGUE.py b/results/NotForFinalUsage/results_ROGUE.py
similarity index 100%
rename from results/results_ROGUE.py
rename to results/NotForFinalUsage/results_ROGUE.py
diff --git a/results/results_Reading.py b/results/NotForFinalUsage/results_Reading.py
similarity index 100%
rename from results/results_Reading.py
rename to results/NotForFinalUsage/results_Reading.py
diff --git a/results/results_Reading_23.py b/results/NotForFinalUsage/results_Reading_23.py
similarity index 100%
rename from results/results_Reading_23.py
rename to results/NotForFinalUsage/results_Reading_23.py
diff --git a/results/results_Reading_23dropout.py b/results/NotForFinalUsage/results_Reading_23dropout.py
similarity index 100%
rename from results/results_Reading_23dropout.py
rename to results/NotForFinalUsage/results_Reading_23dropout.py
diff --git a/results/results_Reading_explore.py b/results/NotForFinalUsage/results_Reading_explore.py
similarity index 100%
rename from results/results_Reading_explore.py
rename to results/NotForFinalUsage/results_Reading_explore.py
diff --git a/results/results_Reading_graph.py b/results/NotForFinalUsage/results_Reading_graph.py
similarity index 100%
rename from results/results_Reading_graph.py
rename to results/NotForFinalUsage/results_Reading_graph.py
diff --git a/results/results_imputation.sh b/results/NotForFinalUsage/results_imputation.sh
similarity index 100%
rename from results/results_imputation.sh
rename to results/NotForFinalUsage/results_imputation.sh
diff --git a/results/results_imputation_0.3.sh b/results/NotForFinalUsage/results_imputation_0.3.sh
similarity index 100%
rename from results/results_imputation_0.3.sh
rename to results/NotForFinalUsage/results_imputation_0.3.sh
diff --git a/results/results_imputation_grid.sh b/results/NotForFinalUsage/results_imputation_grid.sh
similarity index 100%
rename from results/results_imputation_grid.sh
rename to results/NotForFinalUsage/results_imputation_grid.sh
diff --git a/results/results_impute.py b/results/NotForFinalUsage/results_impute.py
similarity index 100%
rename from results/results_impute.py
rename to results/NotForFinalUsage/results_impute.py
diff --git a/results/results_impute_graph_ROC.py b/results/NotForFinalUsage/results_impute_graph_ROC.py
similarity index 100%
rename from results/results_impute_graph_ROC.py
rename to results/NotForFinalUsage/results_impute_graph_ROC.py
diff --git a/results/results_impute_graph_ROC.sh b/results/NotForFinalUsage/results_impute_graph_ROC.sh
similarity index 100%
rename from results/results_impute_graph_ROC.sh
rename to results/NotForFinalUsage/results_impute_graph_ROC.sh
diff --git a/results/submitCluster_Result_Celltype.sh b/results/NotForFinalUsage/submitCluster_Result_Celltype.sh
similarity index 100%
rename from results/submitCluster_Result_Celltype.sh
rename to results/NotForFinalUsage/submitCluster_Result_Celltype.sh
diff --git a/results/submitCluster_Result_Impute.sh b/results/NotForFinalUsage/submitCluster_Result_Impute.sh
similarity index 100%
rename from results/submitCluster_Result_Impute.sh
rename to results/NotForFinalUsage/submitCluster_Result_Impute.sh
diff --git a/results/submitCluster_Result_Impute_23.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_23.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_23.sh
rename to results/NotForFinalUsage/submitCluster_Result_Impute_23.sh
diff --git a/results/submitCluster_Result_Impute_23dropout.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_23dropout.sh
rename to results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh
diff --git a/results/submitCluster_Result_Impute_explore.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_explore.sh
rename to results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh
diff --git a/results/submitCluster_Result_Impute_graph.sh b/results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_graph.sh
rename to results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh
diff --git a/results/summary.sh b/results/NotForFinalUsage/summary.sh
similarity index 100%
rename from results/summary.sh
rename to results/NotForFinalUsage/summary.sh
diff --git a/results/summary_cmd.py b/results/NotForFinalUsage/summary_cmd.py
similarity index 100%
rename from results/summary_cmd.py
rename to results/NotForFinalUsage/summary_cmd.py

From 4db2a4eb107c0f0a2de5d4d0eb85defdcf8aa7ab Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 19:14:45 -0600
Subject: [PATCH 066/117] reorganize old codes

---
 {otherresults => bak/otherresults}/BAK_MAGIC.py          | 0
 {otherresults => bak/otherresults}/MAGIC_analysis.sh     | 0
 .../otherresults}/MAGIC_analysis_usage.sh                | 0
 {otherresults => bak/otherresults}/MAGIC_impute.py       | 0
 {otherresults => bak/otherresults}/MAGIC_impute_usage.py | 0
 .../otherresults}/Other_Results_Evaluation.sh            | 0
 .../otherresults}/Other_results_Reading.py               | 0
 .../otherresults}/Other_results_celltype.py              | 0
 .../otherresults}/Other_results_impute.py                | 0
 {otherresults => bak/otherresults}/README.md             | 0
 {otherresults => bak/otherresults}/SAUCIE_analysis.sh    | 0
 {otherresults => bak/otherresults}/SAUCIE_celltype.py    | 0
 {otherresults => bak/otherresults}/SAUCIE_impute.py      | 0
 {otherresults => bak/otherresults}/SAVER_impute.R        | 0
 {otherresults => bak/otherresults}/SCIMPUTE_impute.R     | 0
 {otherresults => bak/otherresults}/dca_impute.py         | 0
 {otherresults => bak/otherresults}/scVi_impute.py        | 0
 .../otherresults}/simulation_generator.R                 | 0
 .../NotForFinalUsage => bak/results}/calculateROGUE.R    | 0
 .../NotForFinalUsage => bak/results}/compare_varID.py    | 0
 .../results}/jobinfo_imp_23dropout.txt                   | 0
 .../results}/jobinfo_imp_explore.txt                     | 0
 .../results}/jobinfo_imp_louvain_2.txt                   | 0
 .../NotForFinalUsage => bak/results}/results_ROGUE.py    | 0
 .../NotForFinalUsage => bak/results}/results_Reading.py  | 0
 .../results}/results_Reading_23.py                       | 0
 .../results}/results_Reading_23dropout.py                | 0
 .../results}/results_Reading_explore.py                  | 0
 .../results}/results_Reading_graph.py                    | 0
 .../results}/results_imputation.sh                       | 0
 .../results}/results_imputation_0.3.sh                   | 0
 .../results}/results_imputation_grid.sh                  | 0
 .../NotForFinalUsage => bak/results}/results_impute.py   | 0
 .../results}/results_impute_graph_ROC.py                 | 0
 .../results}/results_impute_graph_ROC.sh                 | 0
 .../results}/submitCluster_Result_Celltype.sh            | 0
 .../results}/submitCluster_Result_Impute.sh              | 0
 .../results}/submitCluster_Result_Impute_23.sh           | 0
 .../results}/submitCluster_Result_Impute_23dropout.sh    | 0
 .../results}/submitCluster_Result_Impute_explore.sh      | 0
 .../results}/submitCluster_Result_Impute_graph.sh        | 0
 {results/NotForFinalUsage => bak/results}/summary.sh     | 0
 {results/NotForFinalUsage => bak/results}/summary_cmd.py | 0
 results/results_Reading_recheck.py                       | 9 +++++++--
 results/submitCluster_Result_Impute_recheck.sh           | 4 ++--
 45 files changed, 9 insertions(+), 4 deletions(-)
 rename {otherresults => bak/otherresults}/BAK_MAGIC.py (100%)
 rename {otherresults => bak/otherresults}/MAGIC_analysis.sh (100%)
 rename {otherresults => bak/otherresults}/MAGIC_analysis_usage.sh (100%)
 rename {otherresults => bak/otherresults}/MAGIC_impute.py (100%)
 rename {otherresults => bak/otherresults}/MAGIC_impute_usage.py (100%)
 rename {otherresults => bak/otherresults}/Other_Results_Evaluation.sh (100%)
 rename {otherresults => bak/otherresults}/Other_results_Reading.py (100%)
 rename {otherresults => bak/otherresults}/Other_results_celltype.py (100%)
 rename {otherresults => bak/otherresults}/Other_results_impute.py (100%)
 rename {otherresults => bak/otherresults}/README.md (100%)
 rename {otherresults => bak/otherresults}/SAUCIE_analysis.sh (100%)
 rename {otherresults => bak/otherresults}/SAUCIE_celltype.py (100%)
 rename {otherresults => bak/otherresults}/SAUCIE_impute.py (100%)
 rename {otherresults => bak/otherresults}/SAVER_impute.R (100%)
 rename {otherresults => bak/otherresults}/SCIMPUTE_impute.R (100%)
 rename {otherresults => bak/otherresults}/dca_impute.py (100%)
 rename {otherresults => bak/otherresults}/scVi_impute.py (100%)
 rename {otherresults => bak/otherresults}/simulation_generator.R (100%)
 rename {results/NotForFinalUsage => bak/results}/calculateROGUE.R (100%)
 rename {results/NotForFinalUsage => bak/results}/compare_varID.py (100%)
 rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_23dropout.txt (100%)
 rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_explore.txt (100%)
 rename {results/NotForFinalUsage => bak/results}/jobinfo_imp_louvain_2.txt (100%)
 rename {results/NotForFinalUsage => bak/results}/results_ROGUE.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_Reading.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_Reading_23.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_Reading_23dropout.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_Reading_explore.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_Reading_graph.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_imputation.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/results_imputation_0.3.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/results_imputation_grid.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/results_impute.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_impute_graph_ROC.py (100%)
 rename {results/NotForFinalUsage => bak/results}/results_impute_graph_ROC.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Celltype.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_23.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_23dropout.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_explore.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/submitCluster_Result_Impute_graph.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/summary.sh (100%)
 rename {results/NotForFinalUsage => bak/results}/summary_cmd.py (100%)

diff --git a/otherresults/BAK_MAGIC.py b/bak/otherresults/BAK_MAGIC.py
similarity index 100%
rename from otherresults/BAK_MAGIC.py
rename to bak/otherresults/BAK_MAGIC.py
diff --git a/otherresults/MAGIC_analysis.sh b/bak/otherresults/MAGIC_analysis.sh
similarity index 100%
rename from otherresults/MAGIC_analysis.sh
rename to bak/otherresults/MAGIC_analysis.sh
diff --git a/otherresults/MAGIC_analysis_usage.sh b/bak/otherresults/MAGIC_analysis_usage.sh
similarity index 100%
rename from otherresults/MAGIC_analysis_usage.sh
rename to bak/otherresults/MAGIC_analysis_usage.sh
diff --git a/otherresults/MAGIC_impute.py b/bak/otherresults/MAGIC_impute.py
similarity index 100%
rename from otherresults/MAGIC_impute.py
rename to bak/otherresults/MAGIC_impute.py
diff --git a/otherresults/MAGIC_impute_usage.py b/bak/otherresults/MAGIC_impute_usage.py
similarity index 100%
rename from otherresults/MAGIC_impute_usage.py
rename to bak/otherresults/MAGIC_impute_usage.py
diff --git a/otherresults/Other_Results_Evaluation.sh b/bak/otherresults/Other_Results_Evaluation.sh
similarity index 100%
rename from otherresults/Other_Results_Evaluation.sh
rename to bak/otherresults/Other_Results_Evaluation.sh
diff --git a/otherresults/Other_results_Reading.py b/bak/otherresults/Other_results_Reading.py
similarity index 100%
rename from otherresults/Other_results_Reading.py
rename to bak/otherresults/Other_results_Reading.py
diff --git a/otherresults/Other_results_celltype.py b/bak/otherresults/Other_results_celltype.py
similarity index 100%
rename from otherresults/Other_results_celltype.py
rename to bak/otherresults/Other_results_celltype.py
diff --git a/otherresults/Other_results_impute.py b/bak/otherresults/Other_results_impute.py
similarity index 100%
rename from otherresults/Other_results_impute.py
rename to bak/otherresults/Other_results_impute.py
diff --git a/otherresults/README.md b/bak/otherresults/README.md
similarity index 100%
rename from otherresults/README.md
rename to bak/otherresults/README.md
diff --git a/otherresults/SAUCIE_analysis.sh b/bak/otherresults/SAUCIE_analysis.sh
similarity index 100%
rename from otherresults/SAUCIE_analysis.sh
rename to bak/otherresults/SAUCIE_analysis.sh
diff --git a/otherresults/SAUCIE_celltype.py b/bak/otherresults/SAUCIE_celltype.py
similarity index 100%
rename from otherresults/SAUCIE_celltype.py
rename to bak/otherresults/SAUCIE_celltype.py
diff --git a/otherresults/SAUCIE_impute.py b/bak/otherresults/SAUCIE_impute.py
similarity index 100%
rename from otherresults/SAUCIE_impute.py
rename to bak/otherresults/SAUCIE_impute.py
diff --git a/otherresults/SAVER_impute.R b/bak/otherresults/SAVER_impute.R
similarity index 100%
rename from otherresults/SAVER_impute.R
rename to bak/otherresults/SAVER_impute.R
diff --git a/otherresults/SCIMPUTE_impute.R b/bak/otherresults/SCIMPUTE_impute.R
similarity index 100%
rename from otherresults/SCIMPUTE_impute.R
rename to bak/otherresults/SCIMPUTE_impute.R
diff --git a/otherresults/dca_impute.py b/bak/otherresults/dca_impute.py
similarity index 100%
rename from otherresults/dca_impute.py
rename to bak/otherresults/dca_impute.py
diff --git a/otherresults/scVi_impute.py b/bak/otherresults/scVi_impute.py
similarity index 100%
rename from otherresults/scVi_impute.py
rename to bak/otherresults/scVi_impute.py
diff --git a/otherresults/simulation_generator.R b/bak/otherresults/simulation_generator.R
similarity index 100%
rename from otherresults/simulation_generator.R
rename to bak/otherresults/simulation_generator.R
diff --git a/results/NotForFinalUsage/calculateROGUE.R b/bak/results/calculateROGUE.R
similarity index 100%
rename from results/NotForFinalUsage/calculateROGUE.R
rename to bak/results/calculateROGUE.R
diff --git a/results/NotForFinalUsage/compare_varID.py b/bak/results/compare_varID.py
similarity index 100%
rename from results/NotForFinalUsage/compare_varID.py
rename to bak/results/compare_varID.py
diff --git a/results/NotForFinalUsage/jobinfo_imp_23dropout.txt b/bak/results/jobinfo_imp_23dropout.txt
similarity index 100%
rename from results/NotForFinalUsage/jobinfo_imp_23dropout.txt
rename to bak/results/jobinfo_imp_23dropout.txt
diff --git a/results/NotForFinalUsage/jobinfo_imp_explore.txt b/bak/results/jobinfo_imp_explore.txt
similarity index 100%
rename from results/NotForFinalUsage/jobinfo_imp_explore.txt
rename to bak/results/jobinfo_imp_explore.txt
diff --git a/results/NotForFinalUsage/jobinfo_imp_louvain_2.txt b/bak/results/jobinfo_imp_louvain_2.txt
similarity index 100%
rename from results/NotForFinalUsage/jobinfo_imp_louvain_2.txt
rename to bak/results/jobinfo_imp_louvain_2.txt
diff --git a/results/NotForFinalUsage/results_ROGUE.py b/bak/results/results_ROGUE.py
similarity index 100%
rename from results/NotForFinalUsage/results_ROGUE.py
rename to bak/results/results_ROGUE.py
diff --git a/results/NotForFinalUsage/results_Reading.py b/bak/results/results_Reading.py
similarity index 100%
rename from results/NotForFinalUsage/results_Reading.py
rename to bak/results/results_Reading.py
diff --git a/results/NotForFinalUsage/results_Reading_23.py b/bak/results/results_Reading_23.py
similarity index 100%
rename from results/NotForFinalUsage/results_Reading_23.py
rename to bak/results/results_Reading_23.py
diff --git a/results/NotForFinalUsage/results_Reading_23dropout.py b/bak/results/results_Reading_23dropout.py
similarity index 100%
rename from results/NotForFinalUsage/results_Reading_23dropout.py
rename to bak/results/results_Reading_23dropout.py
diff --git a/results/NotForFinalUsage/results_Reading_explore.py b/bak/results/results_Reading_explore.py
similarity index 100%
rename from results/NotForFinalUsage/results_Reading_explore.py
rename to bak/results/results_Reading_explore.py
diff --git a/results/NotForFinalUsage/results_Reading_graph.py b/bak/results/results_Reading_graph.py
similarity index 100%
rename from results/NotForFinalUsage/results_Reading_graph.py
rename to bak/results/results_Reading_graph.py
diff --git a/results/NotForFinalUsage/results_imputation.sh b/bak/results/results_imputation.sh
similarity index 100%
rename from results/NotForFinalUsage/results_imputation.sh
rename to bak/results/results_imputation.sh
diff --git a/results/NotForFinalUsage/results_imputation_0.3.sh b/bak/results/results_imputation_0.3.sh
similarity index 100%
rename from results/NotForFinalUsage/results_imputation_0.3.sh
rename to bak/results/results_imputation_0.3.sh
diff --git a/results/NotForFinalUsage/results_imputation_grid.sh b/bak/results/results_imputation_grid.sh
similarity index 100%
rename from results/NotForFinalUsage/results_imputation_grid.sh
rename to bak/results/results_imputation_grid.sh
diff --git a/results/NotForFinalUsage/results_impute.py b/bak/results/results_impute.py
similarity index 100%
rename from results/NotForFinalUsage/results_impute.py
rename to bak/results/results_impute.py
diff --git a/results/NotForFinalUsage/results_impute_graph_ROC.py b/bak/results/results_impute_graph_ROC.py
similarity index 100%
rename from results/NotForFinalUsage/results_impute_graph_ROC.py
rename to bak/results/results_impute_graph_ROC.py
diff --git a/results/NotForFinalUsage/results_impute_graph_ROC.sh b/bak/results/results_impute_graph_ROC.sh
similarity index 100%
rename from results/NotForFinalUsage/results_impute_graph_ROC.sh
rename to bak/results/results_impute_graph_ROC.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Celltype.sh b/bak/results/submitCluster_Result_Celltype.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Celltype.sh
rename to bak/results/submitCluster_Result_Celltype.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute.sh b/bak/results/submitCluster_Result_Impute.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Impute.sh
rename to bak/results/submitCluster_Result_Impute.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_23.sh b/bak/results/submitCluster_Result_Impute_23.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Impute_23.sh
rename to bak/results/submitCluster_Result_Impute_23.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh b/bak/results/submitCluster_Result_Impute_23dropout.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Impute_23dropout.sh
rename to bak/results/submitCluster_Result_Impute_23dropout.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh b/bak/results/submitCluster_Result_Impute_explore.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Impute_explore.sh
rename to bak/results/submitCluster_Result_Impute_explore.sh
diff --git a/results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh b/bak/results/submitCluster_Result_Impute_graph.sh
similarity index 100%
rename from results/NotForFinalUsage/submitCluster_Result_Impute_graph.sh
rename to bak/results/submitCluster_Result_Impute_graph.sh
diff --git a/results/NotForFinalUsage/summary.sh b/bak/results/summary.sh
similarity index 100%
rename from results/NotForFinalUsage/summary.sh
rename to bak/results/summary.sh
diff --git a/results/NotForFinalUsage/summary_cmd.py b/bak/results/summary_cmd.py
similarity index 100%
rename from results/NotForFinalUsage/summary_cmd.py
rename to bak/results/summary_cmd.py
diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py
index 124743c..e64b509 100644
--- a/results/results_Reading_recheck.py
+++ b/results/results_Reading_recheck.py
@@ -12,7 +12,12 @@
                     help="method used: 1-13")
 args = parser.parse_args()
 
-# Note:
+# New notes:
+# We used this in paper revision, will generate lots of .sh files.
+# This file is called by submitCluster_Result_Impute_recheck.sh, and only check .out files.
+# The results can be get by cat *.out
+
+# Old Note:
 # Generate results in python other than in shell for better organization
 # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
 # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel
@@ -21,7 +26,7 @@
 if args.splitMode:
     #The split of batch, more batches, more parallel
 
-    if args.batchStr == 8:
+    if args.batchStr == 9:
         datasetList = [
         '9.Chung',
         # '9.Chung --discreteTag'
diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index ba68356..f0d494e 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -1,13 +1,13 @@
 # for i in {0..59}
 # do
-# for j in {8,11,12,13}
+# for j in {9,11,12,13}
 # do
 # python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
 # done
 # done
 
 # submit
-for j in {8,11,12,13}
+for j in {9,11,12,13}
 do
 for i in {0..59}
 do

From 5e4cedf078034a99c42cd4dd507596f713f98fe2 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 20:07:55 -0600
Subject: [PATCH 067/117] debugscimpute

---
 codesfromJGandYJ/impute/SCIMPUTE_impute.py | 8 ++++----
 codesfromJGandYJ/impute/scimpute.r         | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index b19a8da..8e2480d 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -46,11 +46,11 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
         writer.writerows(features)
 
     #run the R script
-    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"/tmpscimpute/ scimpute_output.csv")
+    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"tmpscimpute/")
 
-    filename=save_path+"/tmpscimpute/scimpute_output.csv"
-    imputed_values = pd.read_csv(filename,sep=",")
-    imputed_values=imputed_values.T
+    filename=save_path+"tmpscimpute/scimpute_count.csv"
+    imputed_values = pd.read_csv(filename,sep=",",index_col=0)
+    imputed_values = imputed_values.to_numpy()
 
     np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r
index d7fbb16..ec91006 100644
--- a/codesfromJGandYJ/impute/scimpute.r
+++ b/codesfromJGandYJ/impute/scimpute.r
@@ -1,5 +1,5 @@
 # Usage:
-# Rscript scImpute.r input.txt output.txt
+# Rscript scImpute.r input.txt outputdir
 # test if there is one argument: if not, return an error
 args = commandArgs(trailingOnly=TRUE)
 if (length(args)==0) {
@@ -9,7 +9,6 @@ if (length(args)==0) {
 library(scImpute)
 inputfile = args[1]
 outputDir = args[2]
-outputfile = args[3]
 scimpute(# full path to raw count matrix
          count_path = inputfile, 
          infile = "csv",           # format of input file

From ab1d2c80ea2a7b0275b1a72f172a425dcebbd603 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 20:53:39 -0600
Subject: [PATCH 068/117] fix saver issue

---
 codesfromJGandYJ/impute/SAVER_impute.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
index 05a5b2a..eca2323 100644
--- a/codesfromJGandYJ/impute/SAVER_impute.py
+++ b/codesfromJGandYJ/impute/SAVER_impute.py
@@ -39,7 +39,7 @@ def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1):
     os.system("Rscript saver.r "+save_path+"saver_input.csv "+save_path+"saver_output.csv ")
 
     filename=save_path+"saver_output.csv"
-    imputed_values = pd.read_csv(filename,sep="\t")
+    imputed_values = pd.read_csv(filename,sep="\t",header=None)
     imputed_values=imputed_values.T
 
     np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)

From dec349ffcd6a36fba65e4c05f37f4d22c33869f2 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 21:44:53 -0600
Subject: [PATCH 069/117] scimpute for all possible scenarios

---
 codesfromJGandYJ/impute/SCIMPUTE_impute.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index 8e2480d..9d8649f 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -54,14 +54,9 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
 
     np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
-# datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
-# seedList = ['1','2','3']
-# ratioList = [0.1, 0.3, 0.6, 0.8]
-
-# Debug
-datasetNameList = ['12.Klein']
-seedList = ['1']
-ratioList = [0.1]
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
 
 for datasetName in datasetNameList:
     for seed in seedList:

From 9c78428ec3573852d9f2ed75537e4314403988d4 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 23:27:48 -0600
Subject: [PATCH 070/117] imputation on other results

---
 benchmark_util.py                    |  4 +-
 results/results_impute_others_all.py | 59 ++++++++++++++++++++++++++++
 results/submit_Impute_others.sh      | 14 +++++++
 3 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 results/results_impute_others_all.py
 create mode 100644 results/submit_Impute_others.sh

diff --git a/benchmark_util.py b/benchmark_util.py
index f85f128..6088885 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -530,6 +530,7 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - y)
+        rmse = ((x - y)**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -538,8 +539,9 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - yuse)
+        rmse = ((x - yuse)**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
 
 # IMPUTATION METRICS
diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
new file mode 100644
index 0000000..4bd8f6f
--- /dev/null
+++ b/results/results_impute_others_all.py
@@ -0,0 +1,59 @@
+import os
+import numpy as np
+import pandas as pd
+import argparse
+import scipy.sparse
+import sys
+sys.path.append('../')
+from util_function import *
+from benchmark_util import *
+from R_util import generateLouvainCluster
+from sklearn.cluster import KMeans
+import argparse
+parser = argparse.ArgumentParser(description='Read Results in different methods')
+args = parser.parse_args()
+
+# Notes:
+# Call by submit_Impute_others.sh
+
+
+datasetList = [
+    '9.Chung',
+    '11.Kolodziejczyk',
+    '12.Klein',
+    '13.Zeisel',
+]
+
+oridirStr = '../npyImputeG2E'
+medirStr = '../'
+
+seedList = ['1','2','3']
+ratioList = [0.1,0.3,0.6,0.8]
+methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute']
+
+def outResults(datasetName,seed,ratio,method):
+    featuresOriginal = load_data(datasetName, discreteTag=False)
+
+    features         = None
+    dropi            = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropi.npy')
+    dropj            = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropj.npy')
+    dropix           = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropix.npy')
+
+    # scGNN results
+    # featuresImpute   = np.load(npyDir+datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
+    featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
+
+    if method=='dca' or method=='deepimpute':
+        l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+        cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)    
+    else:
+        l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+        cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+    print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse))
+
+
+for method in methodList:
+    for datasetName in datasetList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                outResults(datasetName=datasetName, seed=seed, ratio=ratio, method=method)
\ No newline at end of file
diff --git a/results/submit_Impute_others.sh b/results/submit_Impute_others.sh
new file mode 100644
index 0000000..38d0534
--- /dev/null
+++ b/results/submit_Impute_others.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J OthersResults
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W results_impute_others_all.py
\ No newline at end of file

From 1e30f6ff127648580d8d72fe56aef99ce02f0f06 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 23:32:41 -0600
Subject: [PATCH 071/117] imputation on other results

---
 results/submit_Impute_others.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/submit_Impute_others.sh b/results/submit_Impute_others.sh
index 38d0534..55e89f4 100644
--- a/results/submit_Impute_others.sh
+++ b/results/submit_Impute_others.sh
@@ -11,4 +11,4 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W results_impute_others_all.py
\ No newline at end of file
+python3 -W ignore results_impute_others_all.py
\ No newline at end of file

From 9dcf7062452c81b292cce23b0c67ddda2eb6c913 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 26 Nov 2020 23:37:54 -0600
Subject: [PATCH 072/117] imputation on other results

---
 results/results_impute_others_all.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index 4bd8f6f..067e0aa 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -28,7 +28,7 @@
 medirStr = '../'
 
 seedList = ['1','2','3']
-ratioList = [0.1,0.3,0.6,0.8]
+ratioList = ['0.1','0.3','0.6','0.8']
 methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute']
 
 def outResults(datasetName,seed,ratio,method):

From d170da2d7d0fbed399da1b9d41bc492899a45ea9 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Fri, 27 Nov 2020 07:03:53 -0600
Subject: [PATCH 073/117] fix a log error in imputation of scGNN, rerun

---
 benchmark_util.py               | 2 +-
 results/results_impute_graph.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmark_util.py b/benchmark_util.py
index 6088885..d2fc1ba 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -577,7 +577,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
     # return np.median(np.abs(x - yuse))
     return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
-# cosine similarity
+# cosine similarity with log
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):
     """
     X_mean: imputed dataset
diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py
index b964534..3f33f3c 100644
--- a/results/results_impute_graph.py
+++ b/results/results_impute_graph.py
@@ -74,7 +74,7 @@
 # featuresImpute = featuresImpute.to_numpy()
 
 l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
 print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse), end='')
 
 def imputeResult(inputData):

From 5fa98224abc05058b3a2f4b2063ff6c5761f55b3 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Fri, 27 Nov 2020 07:18:41 -0600
Subject: [PATCH 074/117] update sbatch infor

---
 results/submitCluster_Result_Impute_recheck.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
index f0d494e..157350a 100644
--- a/results/submitCluster_Result_Impute_recheck.sh
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -1,10 +1,10 @@
-# for i in {0..59}
-# do
-# for j in {9,11,12,13}
-# do
-# python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
-# done
-# done
+for i in {0..59}
+do
+for j in {9,11,12,13}
+do
+python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
+done
+done
 
 # submit
 for j in {9,11,12,13}

From 58808fe9f9058f00275904a63cca765f47857384 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 07:30:22 -0600
Subject: [PATCH 075/117] Partly add scNMF and scGAN

---
 results/results_impute_others_all.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index 067e0aa..fe54037 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -29,7 +29,7 @@
 
 seedList = ['1','2','3']
 ratioList = ['0.1','0.3','0.6','0.8']
-methodList = ['magic','saucie','saver','scimpute','scvi','dca','deepimpute']
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc']
 
 def outResults(datasetName,seed,ratio,method):
     featuresOriginal = load_data(datasetName, discreteTag=False)
@@ -41,11 +41,22 @@ def outResults(datasetName,seed,ratio,method):
 
     # scGNN results
     # featuresImpute   = np.load(npyDir+datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
-    featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
+    if method == 'scvinorm':
+        featuresImpute   = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy')
+    # not using now
+    elif method == 'scIGANs':
+        featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
+    elif method == 'netNMFsc':
+        featuresImpute   = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy')
+        featruesImpute = featruesImpute.T
+    else:
+        featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
 
+    # No log
     if method=='dca' or method=='deepimpute':
         l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
         cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)    
+    # log
     else:
         l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
         cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)

From 49787b6d96d2f6533ca99bf3456cfa0d2af846e7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 12:44:06 -0600
Subject: [PATCH 076/117] update a new version of main_benchmark with timer and
 mem infor

---
 main_benchmark_timer.py | 738 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 738 insertions(+)
 create mode 100644 main_benchmark_timer.py

diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py
new file mode 100644
index 0000000..6de83ef
--- /dev/null
+++ b/main_benchmark_timer.py
@@ -0,0 +1,738 @@
+import time
+import resource
+import datetime
+import argparse
+import sys
+import numpy as np
+import pickle as pkl
+import networkx as nx
+import scipy.sparse as sp
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn, optim
+from torch.nn import functional as F
+from sklearn.decomposition import PCA
+from sklearn.metrics import silhouette_samples, silhouette_score
+from sklearn.cluster import KMeans,SpectralClustering,AffinityPropagation,AgglomerativeClustering,Birch,DBSCAN,FeatureAgglomeration,MeanShift,OPTICS 
+from model import AE, VAE, VAE2d
+from util_function import *
+from graph_function import *
+from benchmark_util import *
+from gae_embedding import GAEembedding,measure_clustering_results,test_clustering_benchmark_results
+from LTMG_R import *
+import pandas as pd
+
+# Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script.
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+parser.add_argument('--datasetName', type=str, default='1.Biase',
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
+parser.add_argument('--batch-size', type=int, default=12800, metavar='N',
+                    help='input batch size for training (default: 12800)')
+parser.add_argument('--epochs', type=int, default=500, metavar='N',
+                    help='number of epochs to train in Regulatory Autoencoder (default: 500)')
+parser.add_argument('--EM-epochs', type=int, default=200, metavar='N',
+                    help='number of epochs to train in iteration EM (default: 200)')
+parser.add_argument('--EM-iteration', type=int, default=10, metavar='N',
+                    help='number of epochs in EM iteration (default: 10)')
+parser.add_argument('--EMtype', type=str, default='EM',
+                    help='EM process type (default: celltypeEM) or EM')
+parser.add_argument('--alpha', type=float, default=0.5,
+                    help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1')
+parser.add_argument('--converge-type', type=str, default='celltype',
+                    help='type of converge: celltype/graph/both/either (default: celltype) ')
+parser.add_argument('--converge-graphratio', type=float, default=0.01,
+                    help='ratio of cell type change in EM iteration (default: 0.01), 0-1')
+parser.add_argument('--converge-celltyperatio', type=float, default=0.95,
+                    help='ratio of cell type change in EM iteration (default: 0.99), 0-1')
+parser.add_argument('--cluster-epochs', type=int, default=200, metavar='N',
+                    help='number of epochs in cluster autoencoder training (default: 200)')
+parser.add_argument('--no-cuda', action='store_true', default=True,
+                    help='enables CUDA training')
+parser.add_argument('--seed', type=int, default=1, metavar='S',
+                    help='random seed (default: 1)')
+parser.add_argument('--regulized-type', type=str, default='LTMG',
+                    help='regulized type (default: LTMG) in EM, otherwise: noregu/LTMG/LTMG01')
+parser.add_argument('--reduction', type=str, default='sum',
+                    help='reduction type: mean/sum, default(sum)')
+parser.add_argument('--model', type=str, default='AE',
+                    help='VAE/AE (default: AE)')
+parser.add_argument('--gammaPara', type=float, default=0.1,
+                    help='regulized parameter (default: 0.1)')
+parser.add_argument('--alphaRegularizePara', type=float, default=0.9,
+                    help='regulized parameter (default: 0.9)')
+
+# imputation related
+parser.add_argument('--EMregulized-type', type=str, default='Celltype',
+                    help='regulized type (default: noregu) in EM, otherwise: noregu/Graph/GraphR/Celltype/CelltypeR')
+# parser.add_argument('--adjtype', type=str, default='unweighted',
+#                     help='adjtype (default: weighted) otherwise: unweighted') 
+# parser.add_argument('--aePara', type=str, default='start', 
+#                     help='whether use parameter of first feature autoencoder: start/end/cont') 
+parser.add_argument('--gammaImputePara', type=float, default=0.0,
+                    help='regulized parameter (default: 0.0)')
+parser.add_argument('--graphImputePara', type=float, default=0.3,
+                    help='graph parameter (default: 0.3)')
+parser.add_argument('--celltypeImputePara', type=float, default=0.1,
+                    help='celltype parameter (default: 0.1)')
+parser.add_argument('--L1Para', type=float, default=1.0,
+                    help='L1 regulized parameter (default: 0.001)')
+parser.add_argument('--L2Para', type=float, default=0.0,
+                    help='L2 regulized parameter (default: 0.001)')
+parser.add_argument('--EMreguTag', action='store_true', default=False,
+                    help='whether regu in EM process')
+parser.add_argument('--discreteTag', action='store_true', default=False, 
+                    help='whether input is raw or 0/1 (default: False)')
+#Build cell graph
+parser.add_argument('--k', type=int, default=10,
+                    help='parameter k in KNN graph (default: 10)')
+parser.add_argument('--knn-distance', type=str, default='euclidean',
+                    help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)')
+parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread',
+                    help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)')
+parser.add_argument('--zerofillFlag', action='store_true', default=False, 
+                    help='fill zero or not before EM process (default: False)')
+
+#Debug related
+parser.add_argument('--precisionModel', type=str, default='Float', 
+                    help='Single Precision/Double precision: Float/Double (default:Float)')
+parser.add_argument('--coresUsage', type=str, default='1', 
+                    help='how many cores used: all/1/... (default:1)')
+parser.add_argument('--npyDir', type=str, default='npyGraphTest/',
+                    help='save npy results in directory')
+parser.add_argument('--log-interval', type=int, default=100, metavar='N',
+                    help='how many batches to wait before logging training status')
+parser.add_argument('--saveinternal', action='store_true', default=False, 
+                    help='whether save internal interation results or not')
+parser.add_argument('--debuginfo', action='store_true', default=False, 
+                    help='whether output debuginfo in cpu time and memory info')
+
+#LTMG related
+parser.add_argument('--inferLTMGTag', action='store_true', default=False,
+                    help='Whether infer LTMG')                   
+parser.add_argument('--LTMGDir', type=str, default='/home/jwang/data/scData/',
+                    help='directory of LTMGDir, default:(/home/wangjue/biodata/scData/allBench/)')
+parser.add_argument('--expressionFile', type=str, default='Biase_expression.csv',
+                    help='expression File in csv')
+parser.add_argument('--ltmgFile', type=str, default='ltmg.csv',
+                    help='expression File in csv')
+
+#Clustering related
+parser.add_argument('--useGAEembedding', action='store_true', default=False, 
+                    help='whether use GAE embedding for clustering(default: False)')
+parser.add_argument('--useBothembedding', action='store_true', default=False, 
+                    help='whether use both embedding and Graph embedding for clustering(default: False)')
+parser.add_argument('--n-clusters', default=20, type=int, help='number of clusters if predifined for KMeans/Birch ')
+parser.add_argument('--clustering-method', type=str, default='LouvainK',
+                    help='Clustering method: Louvain/KMeans/SpectralClustering/AffinityPropagation/AgglomerativeClustering/AgglomerativeClusteringK/Birch/BirchN/MeanShift/OPTICS/LouvainK/LouvainB')
+parser.add_argument('--maxClusterNumber', type=int, default=30,
+                    help='max cluster for celltypeEM without setting number of clusters (default: 30)') 
+parser.add_argument('--minMemberinCluster', type=int, default=5,
+                    help='max cluster for celltypeEM without setting number of clusters (default: 100)')
+parser.add_argument('--resolution', type=str, default='auto',
+                    help='the number of resolution on Louvain (default: auto/0.5/0.8)')
+
+
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+
+#Aggrelated
+parser.add_argument('--linkage', type=str, default='ward',
+                    help='linkage should be: ward, average, complete, single')
+
+#GAE related
+parser.add_argument('--GAEmodel', type=str, default='gcn_vae', help="models used")
+parser.add_argument('--GAEepochs', type=int, default=200, help='Number of epochs to train.')
+parser.add_argument('--GAEhidden1', type=int, default=32, help='Number of units in hidden layer 1.')
+parser.add_argument('--GAEhidden2', type=int, default=16, help='Number of units in hidden layer 2.')
+parser.add_argument('--GAElr', type=float, default=0.01, help='Initial learning rate.')
+parser.add_argument('--GAEdropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
+parser.add_argument('--GAElr_dw', type=float, default=0.001, help='Initial learning rate for regularization.')
+
+#Start Impute or not, only used for evaluating Impute
+parser.add_argument('--imputeMode', default=False, action='store_true',
+                    help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true')
+parser.add_argument('--dropoutRatio', type=float, default=0.1,
+                    help='dropout ratio for impute (default: 0.1)')
+
+args = parser.parse_args()
+args.cuda = not args.no_cuda and torch.cuda.is_available()
+
+#TODO
+#As we have lots of parameters, should check args
+checkargs(args)
+
+torch.manual_seed(args.seed)
+device = torch.device("cuda" if args.cuda else "cpu")
+
+if not args.coresUsage == 'all':
+    torch.set_num_threads(int(args.coresUsage))
+
+kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
+print(args)
+start_time = time.time()
+print ('---0:00:00---scRNA starts loading.')
+
+if not args.imputeMode:
+    # if args.discreteTag:
+    #     scData = scBenchDataset(args.datasetName, args.discreteTag)
+    # else:
+    #     scData = scBenchDataset(args.datasetName, args.discreteTag, transform=logtransform)
+    scData = scBenchDataset(args.datasetName, args.discreteTag)
+else:
+    # if args.discreteTag:
+    #     scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio)
+    # else:
+    #     scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio, transform=logtransform)
+    scData = scDatasetDropout(datasetName=args.datasetName, discreteTag=args.discreteTag, ratio=args.dropoutRatio, seed=args.seed)
+train_loader = DataLoader(scData, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+if args.inferLTMGTag:
+    #run LTMG in R
+    runLTMG(args.LTMGDir+'test/'+args.expressionFile,args.LTMGDir+'test/')
+    ltmgFile = args.ltmgFile
+else:
+    ltmgFile = args.datasetName+'/T2000_UsingOriginalMatrix/T2000_LTMG.txt'
+
+regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile)
+regulationMatrix = torch.from_numpy(regulationMatrix)
+
+# Original
+if args.model == 'VAE':
+    # model = VAE(dim=scData.features.shape[1]).to(device)
+    model = VAE2d(dim=scData.features.shape[1]).to(device)
+elif args.model == 'AE':
+    model = AE(dim=scData.features.shape[1]).to(device)
+if args.precisionModel == 'Double':
+    model=model.double()
+optimizer = optim.Adam(model.parameters(), lr=1e-3)
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+#t1=pd.read_csv('/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+#whether to output debuginfo in running time and memory consumption 
+def debuginfoStr(info):
+    if args.debuginfo:
+        print ('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+'---'+info)
+        mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        print('Init Mem consumption: '+str(mem))
+
+debuginfoStr('scRNA has been successfully loaded')
+
+#TODO: have to improve save npy
+def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
+    '''
+    EMFlag indicates whether in EM processes. 
+        If in EM, use regulized-type parsed from program entrance,
+        Otherwise, noregu
+        taskType: celltype or imputation
+    '''
+    model.train()
+    train_loss = 0 
+    # for batch_idx, (data, _) in enumerate(train_loader):
+    # for batch_idx, data in enumerate(train_loader):
+    for batch_idx, (data, dataindex) in enumerate(train_loader):
+        if args.precisionModel == 'Double':
+            data = data.type(torch.DoubleTensor)
+        elif args.precisionModel == 'Float':
+            data = data.type(torch.FloatTensor)
+        data = data.to(device)
+        regulationMatrixBatch = regulationMatrix[dataindex,:]
+        optimizer.zero_grad()
+        if args.model == 'VAE':
+            recon_batch, mu, logvar, z = model(data)
+            # Original
+            # loss = loss_function(recon_batch, data, mu, logvar)
+            if taskType == 'celltype':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+                else: 
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+            elif taskType == 'imputation':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)
+                else: 
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)    
+                            
+        elif args.model == 'AE':
+            recon_batch, z = model(data)
+            mu_dummy = ''
+            logvar_dummy = ''
+            # Original
+            # loss = loss_function(recon_batch, data, mu, logvar)
+            if taskType == 'celltype':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)    
+                else:
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+            elif taskType == 'imputation':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)    
+                else:
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)
+
+        # L1 and L2 regularization in imputation
+        # 0.0 for no regularization 
+        if taskType == 'imputation': 
+            l1 = 0.0
+            l2 = 0.0
+            for p in model.parameters():
+                l1 = l1 + p.abs().sum()
+                l2 = l2 + p.pow(2).sum()
+            loss = loss + args.L1Para * l1 + args.L2Para * l2
+
+        loss.backward()
+        train_loss += loss.item()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader),
+                loss.item() / len(data)))
+
+        # for batch        
+        if batch_idx == 0:
+            recon_batch_all=recon_batch 
+            data_all = data 
+            z_all = z
+        else:
+            recon_batch_all=torch.cat((recon_batch_all, recon_batch), 0) 
+            data_all = torch.cat((data_all, data), 0) 
+            z_all = torch.cat((z_all,z),0)
+
+    print('====> Epoch: {} Average loss: {:.4f}'.format(
+          epoch, train_loss / len(train_loader.dataset)))
+
+    return recon_batch_all, data_all, z_all
+
+if __name__ == "__main__":
+    outParaTag = str(args.k)+'-'+str(args.gammaPara)+'-'+str(args.alphaRegularizePara)+'-'+str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara)
+    # outParaTag = str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara)   
+    ptfileStart = args.npyDir+args.datasetName+'_'+outParaTag+'_EMtrainingStart.pt'
+    stateStart = {
+        # 'epoch': epoch,
+        'state_dict': model.state_dict(),
+        'optimizer': optimizer.state_dict(),
+    }
+    ptfile      = args.npyDir+args.datasetName+'_EMtraining.pt'
+
+    # Step 1. celltype clustering
+    # store parameter
+    torch.save(stateStart,ptfileStart)
+
+    # Save results only when impute
+    discreteStr = ''
+    if args.discreteTag:
+        discreteStr = 'D'
+
+    if args.imputeMode:
+        # Does not need now
+        # save_sparse_matrix(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features)
+        # sp.save_npz(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_features.npy',scData.features)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropi.npy',scData.i)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropj.npy',scData.j)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropix.npy',scData.ix)
+    
+    debuginfoStr('Start feature autoencoder training')
+
+    for epoch in range(1, args.epochs + 1):
+        recon, original, z = train(epoch, EMFlag=False)
+
+    debuginfoStr('Feature autoencoder training finished')
+        
+    zOut = z.detach().cpu().numpy()
+    # torch.save(model.state_dict(),ptfile)
+    ptstatus = model.state_dict()
+
+    # Store reconOri for imputation
+    reconOri = recon.clone()
+    reconOri = reconOri.detach().cpu().numpy()
+
+    # Step 1. Inferring celltype
+    #Define resolution
+    #Default: auto, otherwise use user defined resolution
+    if args.resolution == 'auto':
+        if zOut.shape[0]< 2000:
+            resolution = 0.8
+        else:
+            resolution = 0.5
+    else:
+        resolution = float(args.resolution)
+ 
+    debuginfoStr('Start construct cell grpah')    
+    # Here para = 'euclidean:10'
+    # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k))
+    adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding))  
+    # if args.adjtype == 'unweighted':
+    #     adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) 
+    #     adjdense = sp.csr_matrix.todense(adj)
+    # elif args.adjtype == 'weighted':
+    #     adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))         
+    #     adjdense = adj.toarray() 
+    debuginfoStr('Cell Graph constructed and pruned')
+
+    # if args.saveinternal:
+    #     reconOut = recon.detach().cpu().numpy()
+    #     if args.imputeMode:
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut)
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z.npy',zOut)
+    #     else:  
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon.npy',reconOut)
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z.npy',zOut)
+    
+    # Whether use GAE embedding
+    debuginfoStr('Start Graph Autoencoder training')
+    if args.useGAEembedding or args.useBothembedding:
+        zDiscret = zOut>np.mean(zOut,axis=0)
+        zDiscret = 1.0*zDiscret
+        if args.useGAEembedding:
+            zOut=GAEembedding(zDiscret, adj, args)
+        elif args.useBothembedding:
+            zEmbedding=GAEembedding(zDiscret, adj, args)
+            zOut=np.concatenate((zOut,zEmbedding),axis=1)
+    debuginfoStr('Graph Autoencoder training finished')
+    
+    # For iteration studies
+    G0 = nx.Graph()
+    G0.add_weighted_edges_from(edgeList)
+    nlG0=nx.normalized_laplacian_matrix(G0)
+    # set iteration criteria for converge
+    adjOld = nlG0
+    # set celltype criteria for converge
+    listResultOld = [1 for i in range(zOut.shape[0])]
+
+    #Fill the zeros before EM iteration
+    # TODO: better implementation later, now we don't filling zeros for now
+    if args.zerofillFlag:
+        for nz_index in range(len(scData.nz_i)):
+            # tmp = scipy.sparse.lil_matrix.todense(scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]])
+            # tmp = np.asarray(tmp).reshape(-1)[0]
+            tmp = scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]]
+            reconOut[scData.nz_i[nz_index], scData.nz_j[nz_index]] = tmp
+        recon = reconOut
+
+    debuginfoStr('EM Iteration started')
+    for bigepoch in range(0, args.EM_iteration):
+        iteration_time = time.time()
+
+        # Now for both methods, we need do clustering, using clustering results to check converge
+        # TODO May reimplement later
+        # Clustering: Get cluster
+        clustering_time = time.time()
+        if args.clustering_method=='Louvain':
+            # Louvain: the only function has R dependent
+            # Seperate here for platforms without R support
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+        elif args.clustering_method=='LouvainK':
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+            # resolution of louvain cluster:
+            k = int(k*resolution) if k>3 else 2
+            clustering = KMeans(n_clusters=k, random_state=0).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='LouvainB':
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+            # resolution of louvain cluster:
+            k = int(k*resolution) if k>3 else 2
+            clustering = Birch(n_clusters=k).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='KMeans':
+            clustering = KMeans(n_clusters=args.n_clusters, random_state=0).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='SpectralClustering':
+            clustering = SpectralClustering(n_clusters=args.n_clusters, assign_labels="discretize", random_state=0).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='AffinityPropagation':
+            clustering = AffinityPropagation().fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='AgglomerativeClustering':
+            clustering = AgglomerativeClustering(linkage=args.linkage).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='AgglomerativeClusteringK':
+            clustering = AgglomerativeClustering(n_clusters=args.n_clusters).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='Birch':
+            clustering = Birch(n_clusters=args.n_clusters).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='BirchN':
+            clustering = Birch(n_clusters=None).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='MeanShift':
+            clustering = MeanShift().fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='OPTICS':
+            clustering = OPTICS(min_samples=int(args.k/2), min_cluster_size=args.minMemberinCluster).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        else:
+            print("Error: Clustering method not appropriate")
+        # print("---Clustering takes %s seconds ---" % (time.time() - clustering_time))
+
+        # If clusters more than maxclusters, then have to stop
+        if len(set(listResult))>args.maxClusterNumber or len(set(listResult))<=1:
+            print("Stopping: Number of clusters is " + str(len(set(listResult))) + ".")
+            # Exit
+            # return None
+            # Else: dealing with the number
+            listResult = trimClustering(listResult,minMemberinCluster=args.minMemberinCluster,maxClusterNumber=args.maxClusterNumber)
+        
+        #Calculate silhouette
+        measure_clustering_results(zOut, listResult)
+        print('Total Cluster Number: '+str(len(set(listResult))))
+
+        debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training started')
+        #Graph regulizated EM AE with celltype AE, do the additional AE
+        if args.EMtype == 'celltypeEM': 
+            # Each cluster has a autoencoder, and organize them back in iteraization
+            clusterIndexList = []
+            for i in range(len(set(listResult))):
+                clusterIndexList.append([])
+            for i in range(len(listResult)):
+                clusterIndexList[listResult[i]].append(i)
+
+            reconNew = np.zeros((scData.features.shape[0],scData.features.shape[1]))
+            
+            # Convert to Tensor
+            reconNew = torch.from_numpy(reconNew)
+            if args.precisionModel == 'Double':
+                reconNew = reconNew.type(torch.DoubleTensor)
+            elif args.precisionModel == 'Float':
+                reconNew = reconNew.type(torch.FloatTensor)
+            reconNew = reconNew.to(device)
+            
+            # model.load_state_dict(torch.load(ptfile))
+            model.load_state_dict(ptstatus)
+            
+            for clusterIndex in clusterIndexList:
+                reconUsage = recon[clusterIndex]
+                scDataInter = scDatasetInter(reconUsage)
+                train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+                for epoch in range(1, args.cluster_epochs + 1):
+                    reconCluster, originalCluster, zCluster = train(epoch, EMFlag=True)                
+                count = 0
+                for i in clusterIndex:
+                    reconNew[i] = reconCluster[count,:]
+                    count +=1
+            # Update
+            recon = reconNew
+            # torch.save(model.state_dict(),ptfile)
+            ptstatus = model.state_dict()
+
+        debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training succeed')
+
+        # Use new dataloader
+        scDataInter = scDatasetInter(recon)
+        train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+        debuginfoStr(str(bigepoch)+' th iter: Start construct cell grpah')
+        for epoch in range(1, args.EM_epochs + 1):
+            recon, original, z = train(epoch, EMFlag=True)
+        
+        zOut = z.detach().cpu().numpy()
+        
+        # Here para = 'euclidean:10'
+        # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k))
+        adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding or (bigepoch == int(args.EM_iteration)-1))) 
+        # if args.adjtype == 'unweighted':
+        #     adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) 
+        #     adjdense = sp.csr_matrix.todense(adj)
+        # elif args.adjtype == 'weighted':
+        #     adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))         
+        #     adjdense = adj.toarray()
+        debuginfoStr(str(bigepoch)+' th iter: Cell Graph constructed and pruned')
+
+        debuginfoStr(str(bigepoch)+' th iter: Start Graph Autoencoder training')
+        # Whether use GAE embedding
+        if args.useGAEembedding or args.useBothembedding:
+            zDiscret = zOut>np.mean(zOut,axis=0)
+            zDiscret = 1.0*zDiscret
+            if args.useGAEembedding:
+                zOut=GAEembedding(zDiscret, adj, args)
+            elif args.useBothembedding:
+                zEmbedding=GAEembedding(zDiscret, adj, args)
+                zOut=np.concatenate((zOut,zEmbedding),axis=1)
+
+        debuginfoStr(str(bigepoch)+' th iter: Graph Autoencoder training finished')
+
+        if args.saveinternal:
+            reconOut = recon.detach().cpu().numpy()
+            if args.imputeMode:
+                # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut)
+                np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut)
+            else:
+                # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut)
+                np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut)
+        
+        # print("---One iteration in EM process, proceeded %s seconds ---" % (time.time() - iteration_time))
+
+        #Iteration usage
+        Gc = nx.Graph()
+        Gc.add_weighted_edges_from(edgeList)
+        adjGc = nx.adjacency_matrix(Gc)
+        
+        # Update new adj
+        adjNew = args.alpha*nlG0 + (1-args.alpha) * adjGc/np.sum(adjGc,axis=0)
+        
+        #debug
+        graphChange = np.mean(abs(adjNew-adjOld))
+        graphChangeThreshold = args.converge_graphratio * np.mean(abs(nlG0))
+        print('adjNew:{} adjOld:{} G0:{}'.format(adjNew, adjOld, nlG0))
+        print('mean:{} threshold:{}'.format(graphChange, graphChangeThreshold))
+        silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+        ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(listResultOld, listResult)
+        print(listResultOld)
+        print(listResult)
+        print('celltype similarity:'+str(ari))
+        ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+        resultarray=[]
+        resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+        resultarray.append(resultstr)
+        print('All Results: ')
+        print(resultstr)
+
+        if args.saveinternal:
+            if args.imputeMode:
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d')
+            else:
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d')
+
+        # graph criteria
+        if args.converge_type == 'graph':       
+            if graphChange < graphChangeThreshold:
+                print('Graph Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # celltype criteria
+        elif args.converge_type == 'celltype':            
+            if ari>args.converge_celltyperatio:
+                print('Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # if both criteria are meets
+        elif args.converge_type == 'both': 
+            if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio:
+                print('Graph and Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # if either criteria are meets
+        elif args.converge_type == 'either': 
+            if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio:
+                print('Graph or Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+
+        # Update
+        adjOld = adjNew
+        listResultOld = listResult
+        # torch.cuda.empty_cache()
+        debuginfoStr(str(bigepoch)+' th iter: Iteration finished')
+        
+
+    # Output celltype related results
+    if args.imputeMode:
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_final_edgeList.npy',edgeList)
+    else:
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_final_edgeList.npy',edgeList)
+    
+    # np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_embedding.csv',zOut, delimiter=",",fmt='%10.4f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_graph.csv',edgeList,fmt='%d,%d,%2.1f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt',listResult,fmt='%d')
+
+    resultarray=[]
+    silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+    ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+    resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+    resultarray.append(resultstr)
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_benchmark.txt',resultarray,fmt='%s')
+
+    # save internal results for imputation
+    # if args.imputeMode:
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy',reconOri)
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',adj)
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy',listResult)
+    # else:
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy',reconOri)
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',adj)
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy',listResult)
+    
+    # Step 2. Imputation with best results of graph and celltype
+    
+    # if args.imputeMode:
+    #     reconOri = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy')
+    #     adj = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',allow_pickle=True)
+    #     listResult = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy')
+    # else:
+    #     reconOri = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy')
+    #     adj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',allow_pickle=True)
+    #     listResult = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy')
+
+    # Use new dataloader
+    scDataInter = scDatasetInter(reconOri)
+    train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+    stateStart = torch.load(ptfileStart)
+    model.load_state_dict(stateStart['state_dict'])
+    optimizer.load_state_dict(stateStart['optimizer'])
+    # if args.aePara == 'start':
+    #     model.load_state_dict(torch.load(ptfileStart))
+    # elif args.aePara == 'end':
+    #     model.load_state_dict(torch.load(ptfileEnd))
+    
+    # generate graph regularizer from graph
+    # adj = adj.tolist() # Used for read/load
+    # adjdense = sp.csr_matrix.todense(adj)
+
+    # generate adj from edgeList
+    adjdense = sp.csr_matrix.todense(adj)
+    adjsample = torch.from_numpy(adjdense)
+    if args.precisionModel == 'Float':
+        adjsample = adjsample.float()
+    elif args.precisionModel == 'Double':
+        adjsample = adjsample.type(torch.DoubleTensor)
+
+    # generate celltype regularizer from celltype
+    celltypesample = generateCelltypeRegu(listResult)
+
+    celltypesample = torch.from_numpy(celltypesample)
+    if args.precisionModel == 'Float':
+        celltypesample = celltypesample.float()
+    elif args.precisionModel == 'Double':
+        celltypesample = celltypesample.type(torch.DoubleTensor)
+
+    for epoch in range(1, args.EM_epochs + 1):
+        recon, original, z = train(epoch, EMFlag=True, taskType='imputation')
+    
+    reconOut = recon.detach().cpu().numpy()
+
+    # out imputation Results    
+    if args.imputeMode:
+        np.save   (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut)        
+        np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+    else:
+        np.save   (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.npy',reconOut)        
+        np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+ 
+    debuginfoStr(str(bigepoch)+'scGNN finished')

From 169864c08dbd377a5a027b88aa188d88e8117485 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 16:44:37 -0600
Subject: [PATCH 077/117] ratio 0.0

---
 generating_Impute_0.0.py        | 78 +++++++++++++++++++++++++++++++++
 submitCluster_imputation_0.0.sh | 14 ++++++
 2 files changed, 92 insertions(+)
 create mode 100644 generating_Impute_0.0.py
 create mode 100644 submitCluster_imputation_0.0.sh

diff --git a/generating_Impute_0.0.py b/generating_Impute_0.0.py
new file mode 100644
index 0000000..dd50d28
--- /dev/null
+++ b/generating_Impute_0.0.py
@@ -0,0 +1,78 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running imputation of original scGNN ')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+parser.add_argument('--imputeMode', action='store_true', default=True,
+                    help='whether impute')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'),
+]
+
+dropoutList = ['0.0',]
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,scGNNparam,outDirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    if args.imputeMode:
+        tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute')
+        tmpstr2 = "I"+tmpstr2
+        # tmpstr2 = "I"+tmpstr2[2:]
+        imputeStr = ' --imputeMode  '
+        outDirStr = "npyImpute"+outDirStr[3:]
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2 
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()  
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
diff --git a/submitCluster_imputation_0.0.sh b/submitCluster_imputation_0.0.sh
new file mode 100644
index 0000000..dec5bd9
--- /dev/null
+++ b/submitCluster_imputation_0.0.sh
@@ -0,0 +1,14 @@
+for i in {1}
+do
+for j in {0.0}
+do
+sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh
+
+sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh
+
+sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
+
+sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
+
+done
+done
\ No newline at end of file

From 9a27b23df948af3da2a2b94b44f84577820c52c5 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 16:50:16 -0600
Subject: [PATCH 078/117] ratio 0.0

---
 submitCluster_imputation_0.0.sh | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/submitCluster_imputation_0.0.sh b/submitCluster_imputation_0.0.sh
index dec5bd9..5dcd876 100644
--- a/submitCluster_imputation_0.0.sh
+++ b/submitCluster_imputation_0.0.sh
@@ -1,14 +1,4 @@
-for i in {1}
-do
-for j in {0.0}
-do
-sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh
-
-sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh
-
-sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
-
-sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
-
-done
-done
\ No newline at end of file
+sbatch run_experimentImpute_2_g_e_1_9_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_11_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_12_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_13_0.0.sh

From 3dddc7f23f56c0a3ead97ecb4a83f9710db25181 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 20:26:39 -0600
Subject: [PATCH 079/117] for ratio 0.0

---
 codesfromJGandYJ/impute/MAGIC_impute.py      | 16 +++++++++-------
 codesfromJGandYJ/impute/SAUCIE_impute.py     | 16 +++++++++-------
 codesfromJGandYJ/impute/SAVER_impute.py      | 20 +++++++++-----------
 codesfromJGandYJ/impute/SCIMPUTE_impute.py   | 17 +++++++++--------
 codesfromJGandYJ/impute/dca_impute.py        | 16 +++++++++-------
 codesfromJGandYJ/impute/deepimpute_impute.py | 15 +++++++++------
 codesfromJGandYJ/impute/other_dca.sh         |  4 +++-
 codesfromJGandYJ/impute/other_deepimpute.sh  |  3 ++-
 codesfromJGandYJ/impute/other_magic.sh       |  3 ++-
 codesfromJGandYJ/impute/other_saucie.sh      |  6 ++----
 codesfromJGandYJ/impute/other_saver.sh       |  3 ++-
 codesfromJGandYJ/impute/other_scimpute.sh    |  3 ++-
 codesfromJGandYJ/impute/other_scvi.sh        |  3 ++-
 codesfromJGandYJ/impute/scVi_impute.py       | 17 +++++++++--------
 14 files changed, 78 insertions(+), 64 deletions(-)

diff --git a/codesfromJGandYJ/impute/MAGIC_impute.py b/codesfromJGandYJ/impute/MAGIC_impute.py
index da7b573..95fe325 100644
--- a/codesfromJGandYJ/impute/MAGIC_impute.py
+++ b/codesfromJGandYJ/impute/MAGIC_impute.py
@@ -8,9 +8,7 @@
 #from benchmark_util import impute_dropout
 
 parser = argparse.ArgumentParser(description='MAGIC Impute')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 
@@ -35,10 +33,14 @@ def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio)
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_Magic(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio)
 
 # From scVI
 # # Load single-cell RNA-seq data
diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 07d7bdc..874c7c1 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -14,9 +14,7 @@
 # numpy==1.19.4
 
 parser = argparse.ArgumentParser(description='Impute use SAUCIE')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
@@ -43,7 +41,11 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_saucie(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
index eca2323..f0f7381 100644
--- a/codesfromJGandYJ/impute/SAVER_impute.py
+++ b/codesfromJGandYJ/impute/SAVER_impute.py
@@ -12,9 +12,7 @@
 # Use python to generate input for saver.r, then output
 
 parser = argparse.ArgumentParser(description='Impute SAVER')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
@@ -48,11 +46,11 @@ def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_saver(seed=seed, datasetName=datasetName, ratio=ratio)
-
-
-
-
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_saver(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_saver(seed=seed, datasetName=datasetName, ratio=ratio)
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
index 9d8649f..879a6ca 100644
--- a/codesfromJGandYJ/impute/SCIMPUTE_impute.py
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -12,9 +12,7 @@
 # Ref: https://github.com/Vivianstats/scImpute
 
 parser = argparse.ArgumentParser(description='Impute scImpute')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
@@ -58,8 +56,11 @@ def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio)
-
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_scimpute(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index 46ecc8a..ffa4504 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -15,9 +15,7 @@
 # scanpy==1.5.1
 
 parser = argparse.ArgumentParser(description='Imputation DCA')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
@@ -48,7 +46,11 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_dca(seed=seed, datasetName=datasetName, ratio=ratio)
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_dca(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_dca(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
index bcf1a3a..9943321 100644
--- a/codesfromJGandYJ/impute/deepimpute_impute.py
+++ b/codesfromJGandYJ/impute/deepimpute_impute.py
@@ -10,8 +10,7 @@
 
 parser = argparse.ArgumentParser(description='Impute Deepimpute')
 # In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 # Ref:
@@ -43,7 +42,11 @@ def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1):
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio)
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_deepimpute(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
index 02f64ca..f41c874 100644
--- a/codesfromJGandYJ/impute/other_dca.sh
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -12,4 +12,6 @@
 
 module load miniconda3
 source activate /storage/htc/joshilab/wangjue/conda_R_dca
-python3 -W ignore dca_impute.py
+# grid
+# python3 -W ignore dca_impute.py
+python3 -W ignore dca_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_deepimpute.sh b/codesfromJGandYJ/impute/other_deepimpute.sh
index b55d6c6..23d18c9 100644
--- a/codesfromJGandYJ/impute/other_deepimpute.sh
+++ b/codesfromJGandYJ/impute/other_deepimpute.sh
@@ -11,4 +11,5 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W ignore deepimpute_impute.py
+# python3 -W ignore deepimpute_impute.py
+python3 -W ignore deepimpute_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_magic.sh b/codesfromJGandYJ/impute/other_magic.sh
index fd9f5e4..6d85905 100644
--- a/codesfromJGandYJ/impute/other_magic.sh
+++ b/codesfromJGandYJ/impute/other_magic.sh
@@ -11,4 +11,5 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W ignore MAGIC_impute.py
+# python3 -W ignore MAGIC_impute.py
+python3 -W ignore MAGIC_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
index f517112..31c8ce1 100644
--- a/codesfromJGandYJ/impute/other_saucie.sh
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -9,9 +9,7 @@
 #SBATCH -n 1                        # number of cores (AKA tasks)
 #SBATCH --mem=128G
 #################################################################
-
 module load miniconda3
 source activate /storage/htc/joshilab/wangjue/conda_R_saucie
-# source activate /storage/htc/joshilab/wangjue/conda_R_gpu
-# module load cuda/cuda-10.1.243
-python3 -W ignore SAUCIE_impute.py
\ No newline at end of file
+# python3 -W ignore SAUCIE_impute.py
+python3 -W ignore SAUCIE_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/other_saver.sh b/codesfromJGandYJ/impute/other_saver.sh
index 17aa82b..2a29663 100644
--- a/codesfromJGandYJ/impute/other_saver.sh
+++ b/codesfromJGandYJ/impute/other_saver.sh
@@ -11,4 +11,5 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W ignore SAVER_impute.py
+# python3 -W ignore SAVER_impute.py
+python3 -W ignore SAVER_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_scimpute.sh b/codesfromJGandYJ/impute/other_scimpute.sh
index 8dad300..5da0040 100644
--- a/codesfromJGandYJ/impute/other_scimpute.sh
+++ b/codesfromJGandYJ/impute/other_scimpute.sh
@@ -11,4 +11,5 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W ignore SCIMPUTE_impute.py
\ No newline at end of file
+# python3 -W ignore SCIMPUTE_impute.py
+python3 -W ignore SCIMPUTE_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/other_scvi.sh b/codesfromJGandYJ/impute/other_scvi.sh
index 888d89b..7b258fa 100644
--- a/codesfromJGandYJ/impute/other_scvi.sh
+++ b/codesfromJGandYJ/impute/other_scvi.sh
@@ -11,4 +11,5 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python3 -W ignore scVi_impute.py
+# python3 -W ignore scVi_impute.py
+python3 -W ignore scVi_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
index 5710e36..643204b 100644
--- a/codesfromJGandYJ/impute/scVi_impute.py
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -12,9 +12,7 @@
 
 # pip install scvi==0.6.3
 parser = argparse.ArgumentParser(description='scVi imputation')
-# In this script, not using arguments
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--ratio', type=str, default='0.1', help='dropoutratio')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
 args = parser.parse_args()
 
 # Ref:
@@ -77,15 +75,18 @@ def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
     np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
     np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon_normalized.npy'.format(datasetName,ratio,seed),normalized_values)
 
-
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
 
-for datasetName in datasetNameList:
-    for seed in seedList:
-        for ratio in ratioList:        
-            impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio)
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_scvi(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio)
 
 # celltype:
 #np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent)

From 24c88e682e202f042f6b9aeeeccf962243fb7bfe Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 28 Nov 2020 23:32:39 -0600
Subject: [PATCH 080/117] add figure 3 interactions

---
 results/Klein_correlation.py | 70 ++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 results/Klein_correlation.py

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
new file mode 100644
index 0000000..1644ff1
--- /dev/null
+++ b/results/Klein_correlation.py
@@ -0,0 +1,70 @@
+import numpy as np
+from scipy import stats
+import csv
+
+# Get correlation from gene interactions from Klein datasets
+# Ref: Klein, Allon M., et al. "Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells." Cell 161.5 (2015): 1187-1201.
+
+geneList=[
+    'Krt8', #4
+    'S100a6', #19
+    'Id2', #895
+    'Id1', #602
+    'ld3', #1559
+    'Ccnd1',# not in the range
+    'Ccnb1',# not in the range
+    'Ccnd2',# not in the range
+    'Ccna1',# not in the range
+    'Sox17',# not in the range
+    'Col4a1', #226
+    'Pou5f1', #150
+    'Ccnd3', #255
+    'Ccna2',# not in the range
+    'Nanog', #1449
+    'Klf4',# not in the range
+    'Sox2', # 601
+    'Zfp42', #527
+    'Trim28', #136
+    'Esrrb', #849
+    'Tdh', #206
+]
+
+geneNumList=[
+    4,
+    19,
+    895,
+    602,
+    1559,
+    226,
+    150,
+    255,
+    1449,
+    601,
+    527,
+    136,
+    849,
+    206,
+]
+
+savedir = './fig3/'
+# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc']
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute']
+
+def corCal(method='magic'):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
+    x = np.load(filename,allow_pickle=True)
+    x = x.T
+
+    corr = np.zeros(len(geneNumList),len(geneNumList))
+    for i in range(len(geneNumList)):
+        for j in range(len(geneNumList)):
+            corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])
+
+    out_filename = savedir+method+".csv"
+    with open(out_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(corr)
+
+
+for method in methodList:        
+    corCal(method=method)
\ No newline at end of file

From 3992609e4f34a2efd793d1b5babcc96a0af8ffa9 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 08:01:04 -0600
Subject: [PATCH 081/117] update print format

---
 main_benchmark_timer.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py
index 6de83ef..a72fe98 100644
--- a/main_benchmark_timer.py
+++ b/main_benchmark_timer.py
@@ -490,7 +490,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         measure_clustering_results(zOut, listResult)
         print('Total Cluster Number: '+str(len(set(listResult))))
 
-        debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training started')
+        debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training started')
         #Graph regulizated EM AE with celltype AE, do the additional AE
         if args.EMtype == 'celltypeEM': 
             # Each cluster has a autoencoder, and organize them back in iteraization
@@ -528,13 +528,13 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
             # torch.save(model.state_dict(),ptfile)
             ptstatus = model.state_dict()
 
-        debuginfoStr(str(bigepoch)+' th iter: Cluster Autoencoder training succeed')
+        debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training succeed')
 
         # Use new dataloader
         scDataInter = scDatasetInter(recon)
         train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
 
-        debuginfoStr(str(bigepoch)+' th iter: Start construct cell grpah')
+        debuginfoStr(str(bigepoch)+'th iter: Start construct cell grpah')
         for epoch in range(1, args.EM_epochs + 1):
             recon, original, z = train(epoch, EMFlag=True)
         
@@ -549,9 +549,9 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         # elif args.adjtype == 'weighted':
         #     adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))         
         #     adjdense = adj.toarray()
-        debuginfoStr(str(bigepoch)+' th iter: Cell Graph constructed and pruned')
+        debuginfoStr(str(bigepoch)+'th iter: Cell Graph constructed and pruned')
 
-        debuginfoStr(str(bigepoch)+' th iter: Start Graph Autoencoder training')
+        debuginfoStr(str(bigepoch)+'th iter: Start Graph Autoencoder training')
         # Whether use GAE embedding
         if args.useGAEembedding or args.useBothembedding:
             zDiscret = zOut>np.mean(zOut,axis=0)
@@ -562,7 +562,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
                 zEmbedding=GAEembedding(zDiscret, adj, args)
                 zOut=np.concatenate((zOut,zEmbedding),axis=1)
 
-        debuginfoStr(str(bigepoch)+' th iter: Graph Autoencoder training finished')
+        debuginfoStr(str(bigepoch)+'th iter: Graph Autoencoder training finished')
 
         if args.saveinternal:
             reconOut = recon.detach().cpu().numpy()
@@ -647,7 +647,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         adjOld = adjNew
         listResultOld = listResult
         # torch.cuda.empty_cache()
-        debuginfoStr(str(bigepoch)+' th iter: Iteration finished')
+        debuginfoStr(str(bigepoch)+'th iter: Iteration finished')
         
 
     # Output celltype related results
@@ -735,4 +735,4 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         np.save   (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.npy',reconOut)        
         np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
  
-    debuginfoStr(str(bigepoch)+'scGNN finished')
+    debuginfoStr('scGNN finished')

From 5130c6470bb26d47824fb94d09b64a2261409f5b Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 14:22:11 -0600
Subject: [PATCH 082/117] update print format in interaction

---
 results/Klein_correlation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 1644ff1..8586113 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -51,7 +51,10 @@
 methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute']
 
 def corCal(method='magic'):
-    filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
+    if method == 'scvinorm':
+        filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy'
+    else:
+        filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
     x = np.load(filename,allow_pickle=True)
     x = x.T
 

From 85550ff013b06ac3aa008f50ccc40dd148e6f5a0 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 14:25:19 -0600
Subject: [PATCH 083/117] output results in interaction

---
 results/Klein_correlation.py |  2 +-
 results/Klein_correlation.sh | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 results/Klein_correlation.sh

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 8586113..2841b8a 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -2,7 +2,7 @@
 from scipy import stats
 import csv
 
-# Get correlation from gene interactions from Klein datasets
+# Get correlation from gene interactions from Klein datasets in Figure 3 of scGNN paper
 # Ref: Klein, Allon M., et al. "Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells." Cell 161.5 (2015): 1187-1201.
 
 geneList=[
diff --git a/results/Klein_correlation.sh b/results/Klein_correlation.sh
new file mode 100644
index 0000000..12ef235
--- /dev/null
+++ b/results/Klein_correlation.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J IE2geK
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python -W ignore Klein_correlation.py
\ No newline at end of file

From 1939008b5fca2cf3aa766a1273c8f8b059dac59b Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 14:28:15 -0600
Subject: [PATCH 084/117] output results in interaction

---
 results/Klein_correlation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 2841b8a..4a346e5 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -58,7 +58,7 @@ def corCal(method='magic'):
     x = np.load(filename,allow_pickle=True)
     x = x.T
 
-    corr = np.zeros(len(geneNumList),len(geneNumList))
+    corr = np.zeros((len(geneNumList),len(geneNumList)))
     for i in range(len(geneNumList)):
         for j in range(len(geneNumList)):
             corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])

From 08ef27eebb55e294d8964f61b5a0a6bae481a1aa Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 14:46:26 -0600
Subject: [PATCH 085/117] output results in interaction

---
 results/Klein_correlation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 4a346e5..34a80d8 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -61,7 +61,7 @@ def corCal(method='magic'):
     corr = np.zeros((len(geneNumList),len(geneNumList)))
     for i in range(len(geneNumList)):
         for j in range(len(geneNumList)):
-            corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])
+            corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])[0]
 
     out_filename = savedir+method+".csv"
     with open(out_filename, "w") as f:

From afdd7fa819500a251b8e0c4d7d0bc77f955884ce Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 29 Nov 2020 20:07:03 -0600
Subject: [PATCH 086/117] add final mem in scGNN.py

---
 main_benchmark_timer.py | 2 +-
 scGNN.py                | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py
index a72fe98..f74be32 100644
--- a/main_benchmark_timer.py
+++ b/main_benchmark_timer.py
@@ -217,7 +217,7 @@ def debuginfoStr(info):
     if args.debuginfo:
         print ('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+'---'+info)
         mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-        print('Init Mem consumption: '+str(mem))
+        print('Mem consumption: '+str(mem))
 
 debuginfoStr('scRNA has been successfully loaded')
 
diff --git a/scGNN.py b/scGNN.py
index 6f685a3..b4d5e7a 100644
--- a/scGNN.py
+++ b/scGNN.py
@@ -755,4 +755,6 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype', s
     results_df = pd.DataFrame(listResult,index=celllist,columns=["Celltype"])
     results_df.to_csv(args.outputDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.alphaRegularizePara)+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt')   
       
+    mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    print('Mem consumption: '+str(mem))
     print('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+"---scGNN finished")

From 012a693a956c10dbad9e6957d16fd173e125c4b6 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 08:23:59 -0600
Subject: [PATCH 087/117] update package dependence

---
 requirements.txt | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index fd0d666..9b91edd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-numpy==1.18.1
-torch==1.4.0
-networkx==2.4
-pandas==0.25.3
-rpy2==3.2.4
-matplotlib==3.1.2
-seaborn==0.9.0
-umap-learn==0.3.10
-munkres==1.1.2
+numpy
+torch>=1.4.0
+networkx>=2.4
+pandas>=0.25.3
+rpy2>=3.2.4
+matplotlib>=3.1.2
+seaborn>=0.9.0
+umap-learn
+munkres>=1.1.2
 community
-tqdm==4.48.0
\ No newline at end of file
+tqdm>=4.48.0
\ No newline at end of file

From 92f43c66fef110546d844a79c46cfddb0cd48ecb Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 10:14:02 -0600
Subject: [PATCH 088/117] add scIGANs and netNMFsc imputation evaluation

---
 results/results_impute_others_all.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index fe54037..04dddba 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -14,8 +14,7 @@
 args = parser.parse_args()
 
 # Notes:
-# Call by submit_Impute_others.sh
-
+# In HPC, call by sbatch submit_Impute_others.sh
 
 datasetList = [
     '9.Chung',
@@ -29,7 +28,12 @@
 
 seedList = ['1','2','3']
 ratioList = ['0.1','0.3','0.6','0.8']
-methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc']
+
+# sophisticated, not using
+# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc']
+
+# We should use only log(x+1) if the method permitted
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
 
 def outResults(datasetName,seed,ratio,method):
     featuresOriginal = load_data(datasetName, discreteTag=False)
@@ -45,9 +49,11 @@ def outResults(datasetName,seed,ratio,method):
         featuresImpute   = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy')
     # not using now
     elif method == 'scIGANs':
-        featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        tmp = df.to_numpy()
+        featuresImpute   = tmp.T
     elif method == 'netNMFsc':
-        featuresImpute   = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy')
+        featuresImpute   = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/'+ratio+'/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy')
         featruesImpute = featruesImpute.T
     else:
         featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')

From 86c9a726e19789f6acae970d33b5b95cb0d72cab Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 10:49:22 -0600
Subject: [PATCH 089/117] add figure3, all methods

---
 results/Klein_correlation.py | 22 +++++++++++++++-------
 results/Klein_correlation.sh |  2 +-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 34a80d8..63f2ed0 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -47,16 +47,24 @@
 ]
 
 savedir = './fig3/'
-# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsclog','netNMFsc']
-methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute']
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
 
 def corCal(method='magic'):
-    if method == 'scvinorm':
-        filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy'
+    if method == 'scIGANs':
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/'+datasetName+'/scIGANs_npyImputeG2E_1_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        x = df.to_numpy()
     else:
-        filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
-    x = np.load(filename,allow_pickle=True)
-    x = x.T
+        if method == 'scvinorm':
+            filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy'
+            x = np.load(filename,allow_pickle=True)
+            x = x.T
+        elif method == 'netNMFsc':
+            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy')
+            x = np.load(filename,allow_pickle=True)
+        else:
+            filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
+            x = np.load(filename,allow_pickle=True)
+            x = x.T
 
     corr = np.zeros((len(geneNumList),len(geneNumList)))
     for i in range(len(geneNumList)):
diff --git a/results/Klein_correlation.sh b/results/Klein_correlation.sh
index 12ef235..01eb788 100644
--- a/results/Klein_correlation.sh
+++ b/results/Klein_correlation.sh
@@ -2,7 +2,7 @@
 ######################### Batch Headers #########################
 #SBATCH -A xulab
 #SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
-#SBATCH -J IE2geK
+#SBATCH -J Fig3
 #SBATCH -o results-%j.out           # give the job output a custom name
 #SBATCH -t 2-00:00                  # two days time limit
 #SBATCH -N 1                        # number of nodes

From cd58ea64563b6bb31ac94689c2281787c6a5d82a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 11:07:08 -0600
Subject: [PATCH 090/117] add scIGANs and netNMFsc imputation evaluation

---
 results/results_impute_others_all.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index 04dddba..402ca68 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -49,7 +49,7 @@ def outResults(datasetName,seed,ratio,method):
         featuresImpute   = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy')
     # not using now
     elif method == 'scIGANs':
-        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
         tmp = df.to_numpy()
         featuresImpute   = tmp.T
     elif method == 'netNMFsc':

From decbddd740a26aa92ff2efe9ca6b60652a0154ee Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 11:08:21 -0600
Subject: [PATCH 091/117] add figure3, all methods

---
 results/Klein_correlation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 63f2ed0..9b3e476 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -59,7 +59,7 @@ def corCal(method='magic'):
             x = np.load(filename,allow_pickle=True)
             x = x.T
         elif method == 'netNMFsc':
-            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy')
+            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy'
             x = np.load(filename,allow_pickle=True)
         else:
             filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)

From cb2cec181cef3cfd3bde5e654d5954f457cc838a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 11:14:36 -0600
Subject: [PATCH 092/117] add figure3, all methods

---
 results/Klein_correlation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 9b3e476..c581b46 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -1,5 +1,6 @@
 import numpy as np
 from scipy import stats
+import pandas as pd
 import csv
 
 # Get correlation from gene interactions from Klein datasets in Figure 3 of scGNN paper

From eaa31f2225780ae282db343eb538027f29344b1a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 11:32:20 -0600
Subject: [PATCH 093/117] add figure3, all methods

---
 results/Klein_correlation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index c581b46..9b2fe51 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -52,7 +52,7 @@
 
 def corCal(method='magic'):
     if method == 'scIGANs':
-        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/'+datasetName+'/scIGANs_npyImputeG2E_1_'+datasetName+'_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
         x = df.to_numpy()
     else:
         if method == 'scvinorm':
@@ -60,7 +60,7 @@ def corCal(method='magic'):
             x = np.load(filename,allow_pickle=True)
             x = x.T
         elif method == 'netNMFsc':
-            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/'+datasetName+'/npyImputeG2E_1_log_imputation.npy'
+            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/12.Klein/npyImputeG2E_1_log_imputation.npy'
             x = np.load(filename,allow_pickle=True)
         else:
             filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)

From 0c30fc775bc3519159da3d44b05dfee6afcda03f Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 11:42:27 -0600
Subject: [PATCH 094/117] add figure3, all methods

---
 results/Klein_correlation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 9b2fe51..1dbae48 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -52,7 +52,7 @@
 
 def corCal(method='magic'):
     if method == 'scIGANs':
-        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0)
         x = df.to_numpy()
     else:
         if method == 'scvinorm':

From 879d156760f4757d1c6807776f68a6b26507f2c2 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 12:01:13 -0600
Subject: [PATCH 095/117] fix a typo

---
 results/results_impute_others_all.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index 402ca68..c2b6d9f 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -54,7 +54,7 @@ def outResults(datasetName,seed,ratio,method):
         featuresImpute   = tmp.T
     elif method == 'netNMFsc':
         featuresImpute   = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/'+ratio+'/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy')
-        featruesImpute = featruesImpute.T
+        featuresImpute = featuresImpute.T
     else:
         featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
 

From 142e91a9aa6ef23706ceb42b35deba3b42c32612 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Tue, 1 Dec 2020 12:04:48 -0600
Subject: [PATCH 096/117] fix a typo

---
 results/Klein_correlation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
index 1dbae48..05b86e3 100644
--- a/results/Klein_correlation.py
+++ b/results/Klein_correlation.py
@@ -52,7 +52,7 @@
 
 def corCal(method='magic'):
     if method == 'scIGANs':
-        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0)
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_12.Klein_LTMG_0.0_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0)
         x = df.to_numpy()
     else:
         if method == 'scvinorm':

From cf553ba7cf738bdfa6993c83fbb9d0d5d4a8f4fa Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Fri, 4 Dec 2020 11:40:51 -0600
Subject: [PATCH 097/117] only focus on distribution

---
 submitCluster_distribution.sh | 38 +++++++++++++++++------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh
index f631ff7..e36b7ec 100644
--- a/submitCluster_distribution.sh
+++ b/submitCluster_distribution.sh
@@ -8,25 +8,25 @@ sbatch plot_G2E_$i\_12.sh
 sbatch plot_G2E_$i\_13.sh
 done
 
-for i in {0.1,0.3,0.6,0.8}
-do
-sbatch plot_G2EL_$i\_9.sh
-sbatch plot_G1E_$i\_9.sh
-sbatch plot_G2F_$i\_9.sh
-sbatch plot_N2E_$i\_9.sh
+# for i in {0.1,0.3,0.6,0.8}
+# do
+# sbatch plot_G2EL_$i\_9.sh
+# sbatch plot_G1E_$i\_9.sh
+# sbatch plot_G2F_$i\_9.sh
+# sbatch plot_N2E_$i\_9.sh
 
-sbatch plot_G2EL_$i\_11.sh
-sbatch plot_G1E_$i\_11.sh
-sbatch plot_G2F_$i\_11.sh
-sbatch plot_N2E_$i\_11.sh
+# sbatch plot_G2EL_$i\_11.sh
+# sbatch plot_G1E_$i\_11.sh
+# sbatch plot_G2F_$i\_11.sh
+# sbatch plot_N2E_$i\_11.sh
 
-sbatch plot_G2EL_$i\_12.sh
-sbatch plot_G1E_$i\_12.sh
-sbatch plot_G2F_$i\_12.sh
-sbatch plot_N2E_$i\_12.sh
+# sbatch plot_G2EL_$i\_12.sh
+# sbatch plot_G1E_$i\_12.sh
+# sbatch plot_G2F_$i\_12.sh
+# sbatch plot_N2E_$i\_12.sh
 
-sbatch plot_G2EL_$i\_13.sh
-sbatch plot_G1E_$i\_13.sh
-sbatch plot_G2F_$i\_13.sh
-sbatch plot_N2E_$i\_13.sh
-done
\ No newline at end of file
+# sbatch plot_G2EL_$i\_13.sh
+# sbatch plot_G1E_$i\_13.sh
+# sbatch plot_G2F_$i\_13.sh
+# sbatch plot_N2E_$i\_13.sh
+# done
\ No newline at end of file

From 6eb4472dcfe898e49c604a5bb6bd04607105f9be Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Mon, 7 Dec 2020 18:24:23 -0600
Subject: [PATCH 098/117] add npy2csv

---
 bak/npy2csv_script.py | 50 +++++++++++++++++++++++++++++++++++++++++++
 util_function.py      | 14 ++++++++++--
 2 files changed, 62 insertions(+), 2 deletions(-)
 create mode 100644 bak/npy2csv_script.py

diff --git a/bak/npy2csv_script.py b/bak/npy2csv_script.py
new file mode 100644
index 0000000..cb35774
--- /dev/null
+++ b/bak/npy2csv_script.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pandas as pd
+
+def convert(method='dca'):
+    t=np.load(method+'\\9.Chung_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    t=np.load(method+'\\11.Kolodziejczyk_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    t=np.load(method+'\\12.Klein_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    t=np.load(method+'\\13.Zeisel_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convert('dca')
+convert('deepimpute')
+convert('magic')
+convert('netNMFsc')
+convert('saucie')
+convert('saver')
+convert('scimpute')
+convert('scvi')
+
+
+def convertCSV(method='scIGANs'):
+    df = pd.read_csv(method+'\\9.Chung_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\11.Kolodziejczyk_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\12.Klein_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\13.Zeisel_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convertCSV('scIGANs')
+
+
diff --git a/util_function.py b/util_function.py
index a997186..f159d2a 100644
--- a/util_function.py
+++ b/util_function.py
@@ -65,13 +65,23 @@ def load_data(datasetName, discreteTag):
         names = ['x', 'tx', 'allx']
     objects = []
     for i in range(len(names)):
-        with open(dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i]), 'rb') as f:
+        #windows
+        if os.name=='nt':
+            filename = dir_path+"\\data\\sc\\{}\\ind.{}.{}".format(datasetName, datasetName, names[i])
+        else:
+            filename = dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i])
+        with open(filename, 'rb') as f:
             if sys.version_info > (3, 0):
                 objects.append(pkl.load(f, encoding='latin1'))
             else:
                 objects.append(pkl.load(f))
     x, tx, allx = tuple(objects)
-    test_idx_reorder = parse_index_file(dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName))
+    #windows
+    if os.name == 'nt':
+        filename = dir_path+"\\data\\sc\\{}\\ind.{}.test.index".format(datasetName, datasetName)
+    else:
+        filename = dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName)
+    test_idx_reorder = parse_index_file(filename)
     test_idx_range = np.sort(test_idx_reorder)
 
     if datasetName == 'citeseer':

From d2cdfce9edbbd2f575b2d5a603621108cb5293fd Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Mon, 7 Dec 2020 23:41:35 -0600
Subject: [PATCH 099/117] Significant! Now provides GPU! One known bug: exclude
 r-ltmgscgnn

---
 do_timer_test.sh        |  9 +++++++++
 main_benchmark_timer.py | 11 +++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 do_timer_test.sh

diff --git a/do_timer_test.sh b/do_timer_test.sh
new file mode 100644
index 0000000..6efc029
--- /dev/null
+++ b/do_timer_test.sh
@@ -0,0 +1,9 @@
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13.txt
+
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13.txt
\ No newline at end of file
diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py
index f74be32..77d1a9d 100644
--- a/main_benchmark_timer.py
+++ b/main_benchmark_timer.py
@@ -19,7 +19,7 @@
 from graph_function import *
 from benchmark_util import *
 from gae_embedding import GAEembedding,measure_clustering_results,test_clustering_benchmark_results
-from LTMG_R import *
+# from LTMG_R import *
 import pandas as pd
 
 # Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script.
@@ -46,7 +46,7 @@
                     help='ratio of cell type change in EM iteration (default: 0.99), 0-1')
 parser.add_argument('--cluster-epochs', type=int, default=200, metavar='N',
                     help='number of epochs in cluster autoencoder training (default: 200)')
-parser.add_argument('--no-cuda', action='store_true', default=True,
+parser.add_argument('--no-cuda', action='store_true', default=False,
                     help='enables CUDA training')
 parser.add_argument('--seed', type=int, default=1, metavar='S',
                     help='random seed (default: 1)')
@@ -196,6 +196,10 @@
 
 regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile)
 regulationMatrix = torch.from_numpy(regulationMatrix)
+if args.precisionModel == 'Double':
+    regulationMatrix = regulationMatrix.type(torch.DoubleTensor)
+elif args.precisionModel == 'Float':
+    regulationMatrix = regulationMatrix.type(torch.FloatTensor)
 
 # Original
 if args.model == 'VAE':
@@ -240,6 +244,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
             data = data.type(torch.FloatTensor)
         data = data.to(device)
         regulationMatrixBatch = regulationMatrix[dataindex,:]
+        regulationMatrixBatch = regulationMatrixBatch.to(device)
         optimizer.zero_grad()
         if args.model == 'VAE':
             recon_batch, mu, logvar, z = model(data)
@@ -712,6 +717,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         adjsample = adjsample.float()
     elif args.precisionModel == 'Double':
         adjsample = adjsample.type(torch.DoubleTensor)
+    adjsample = adjsample.to(device)
 
     # generate celltype regularizer from celltype
     celltypesample = generateCelltypeRegu(listResult)
@@ -721,6 +727,7 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         celltypesample = celltypesample.float()
     elif args.precisionModel == 'Double':
         celltypesample = celltypesample.type(torch.DoubleTensor)
+    celltypesample = celltypesample.to(device)
 
     for epoch in range(1, args.EM_epochs + 1):
         recon, original, z = train(epoch, EMFlag=True, taskType='imputation')

From 36f4ccbea5ef3a2f15996dc8bf1ef8c04e6028ac Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@hotmail.com>
Date: Tue, 8 Dec 2020 14:28:22 -0600
Subject: [PATCH 100/117] add time test for both cpu and gpu

---
 do_timer_test.sh | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/do_timer_test.sh b/do_timer_test.sh
index 6efc029..43c06e8 100644
--- a/do_timer_test.sh
+++ b/do_timer_test.sh
@@ -1,9 +1,9 @@
-python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13gpu.txt
 
-python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12.txt
-python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13.txt
\ No newline at end of file
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13cpu.txt

From 3b0b1e8475c6467a0f9a04787e0fe15a1d347185 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 17:21:41 -0600
Subject: [PATCH 101/117] add louvain

---
 results/louvain.py       | 39 +++++++++++++++++++++++++++++++++++++++
 results/louvain_magic.sh | 17 +++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 results/louvain.py
 create mode 100644 results/louvain_magic.sh

diff --git a/results/louvain.py b/results/louvain.py
new file mode 100644
index 0000000..7306341
--- /dev/null
+++ b/results/louvain.py
@@ -0,0 +1,39 @@
+import os, sys
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import numpy as np
+from util_function import *
+from graph_function import *
+from R_util import generateLouvainCluster
+import argparse
+
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+parser.add_argument('--k', type=int, default=10,
+                    help='parameter k in KNN graph (default: 10)')
+parser.add_argument('--knn-distance', type=str, default='euclidean',
+                    help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)')
+parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread',
+                    help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)')
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+parser.add_argument('--input', type=str, default='filename',
+                    help='input filename')
+parser.add_argument('--output', type=str, default='filename',
+                    help='input filename')
+args = parser.parse_args()
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+zOut = np.load(args.input,allow_pickle=True)
+zOut = pcaFunc(zOut, n_components=10)
+adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))
+listResult,size = generateLouvainCluster(edgeList)
+silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+print(resultstr)
+
+with open(args.output) as fw:
+    fw.writelines("%s\n" % strr for strr in listResult)
diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh
new file mode 100644
index 0000000..f752cb9
--- /dev/null
+++ b/results/louvain_magic.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Louvain_magic
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt
+python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt
+python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt
+python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt
\ No newline at end of file

From d535095f45d6f8505a93633db5d7dab4768ab170 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 17:32:16 -0600
Subject: [PATCH 102/117] fix a bug

---
 results/louvain.py       | 2 +-
 results/louvain_magic.sh | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/results/louvain.py b/results/louvain.py
index 7306341..b8bf3d4 100644
--- a/results/louvain.py
+++ b/results/louvain.py
@@ -27,7 +27,7 @@
 bench_celltype=bench_pd.iloc[:,0].to_numpy()
 
 zOut = np.load(args.input,allow_pickle=True)
-zOut = pcaFunc(zOut, n_components=10)
+zOut,re = pcaFunc(zOut, n_components=10)
 adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))
 listResult,size = generateLouvainCluster(edgeList)
 silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh
index f752cb9..ba200aa 100644
--- a/results/louvain_magic.sh
+++ b/results/louvain_magic.sh
@@ -2,7 +2,7 @@
 ######################### Batch Headers #########################
 #SBATCH -A xulab
 #SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
-#SBATCH -J Louvain_magic
+#SBATCH -J L_magic
 #SBATCH -o results-%j.out           # give the job output a custom name
 #SBATCH -t 2-00:00                  # two days time limit
 #SBATCH -N 1                        # number of nodes
@@ -12,6 +12,6 @@
 module load miniconda3
 source activate conda_R
 python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt
-python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt
-python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt
-python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt
\ No newline at end of file
+# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt
+# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt
+# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt
\ No newline at end of file

From 07d9af1b785fe3a27e1ba51c059d2339be6ad089 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 17:54:27 -0600
Subject: [PATCH 103/117] add benchmark

---
 results/louvain_magic.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh
index ba200aa..a3c0d89 100644
--- a/results/louvain_magic.sh
+++ b/results/louvain_magic.sh
@@ -11,7 +11,7 @@
 #################################################################
 module load miniconda3
 source activate conda_R
-python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt
-# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt
-# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt
-# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt
\ No newline at end of file
+python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
\ No newline at end of file

From 4f050f3cfb5732b2a69a4a33b22ebd44c9c80f80 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 18:00:44 -0600
Subject: [PATCH 104/117] add benchmark fw

---
 results/louvain.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/louvain.py b/results/louvain.py
index b8bf3d4..05f3ec8 100644
--- a/results/louvain.py
+++ b/results/louvain.py
@@ -35,5 +35,5 @@
 resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
 print(resultstr)
 
-with open(args.output) as fw:
+with open(args.output,'w') as fw:
     fw.writelines("%s\n" % strr for strr in listResult)

From 35eadb91c3890d215521dbdd4df0a3fc0d5299c2 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 18:16:48 -0600
Subject: [PATCH 105/117] =?UTF-8?q?=E2=80=98update=E2=80=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 graph_function.py        | 25 +++++++++++++++++++++++++
 results/louvain.py       |  2 +-
 results/louvain_magic.sh |  7 ++++++-
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/graph_function.py b/graph_function.py
index f1c65b2..9e30b9a 100644
--- a/graph_function.py
+++ b/graph_function.py
@@ -71,6 +71,12 @@ def generateAdj(featureMatrix, graphType='KNNgraph', para = None, parallelLimit
             distanceType = parawords[0]
             k = int(parawords[1])
         edgeList = calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType=distanceType, k=k)
+    elif graphType == 'KNNgraphStatsSingleThreadNoPrune':
+        if para != None:
+            parawords = para.split(':')
+            distanceType = parawords[0]
+            k = int(parawords[1])
+        edgeList =  calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType=distanceType, k=k)      
     else:
         print('Should give graphtype')
 
@@ -330,6 +336,25 @@ def calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType
     
     return edgeList
 
+#para: measuareName:k:threshold no prune only
+def calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType='euclidean', k=10, param=None):
+    r"""
+    Thresholdgraph: KNN Graph with stats one-std based methods, SingleThread version, no boundary,
+    """       
+
+    edgeList=[]
+    for i in np.arange(featureMatrix.shape[0]):
+        tmp=featureMatrix[i,:].reshape(1,-1)
+        distMat = distance.cdist(tmp,featureMatrix, distanceType)
+        res = distMat.argsort()[:k+1]
+        for j in np.arange(1,k+1):
+            # TODO: check, only exclude large outliners
+            # if (distMat[0,res[0][j]]<=mean+std) and (distMat[0,res[0][j]]>=mean-std):
+            weight = 1.0
+            edgeList.append((i,res[0][j],weight))
+    
+    return edgeList
+
 # kernelDistance
 def kernelDistance(distance,delta=1.0):
     '''
diff --git a/results/louvain.py b/results/louvain.py
index 05f3ec8..967b2d6 100644
--- a/results/louvain.py
+++ b/results/louvain.py
@@ -11,7 +11,7 @@
                     help='parameter k in KNN graph (default: 10)')
 parser.add_argument('--knn-distance', type=str, default='euclidean',
                     help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)')
-parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread',
+parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThreadNoPrune',
                     help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)')
 #Benchmark related
 parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh
index a3c0d89..5669824 100644
--- a/results/louvain_magic.sh
+++ b/results/louvain_magic.sh
@@ -14,4 +14,9 @@ source activate conda_R
 python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
 # python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
 # python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
\ No newline at end of file
+# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
\ No newline at end of file

From 696fcce085aaa937ad9eae746c355400a120c969 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 18:23:26 -0600
Subject: [PATCH 106/117] add all methods

---
 results/louvain.sh       | 57 ++++++++++++++++++++++++++++++++++++++++
 results/louvain_magic.sh | 22 ----------------
 2 files changed, 57 insertions(+), 22 deletions(-)
 create mode 100644 results/louvain.sh
 delete mode 100644 results/louvain_magic.sh

diff --git a/results/louvain.sh b/results/louvain.sh
new file mode 100644
index 0000000..99af7cc
--- /dev/null
+++ b/results/louvain.sh
@@ -0,0 +1,57 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J L_magic
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+# python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+# python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
diff --git a/results/louvain_magic.sh b/results/louvain_magic.sh
deleted file mode 100644
index 5669824..0000000
--- a/results/louvain_magic.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#! /bin/bash
-######################### Batch Headers #########################
-#SBATCH -A xulab
-#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
-#SBATCH -J L_magic
-#SBATCH -o results-%j.out           # give the job output a custom name
-#SBATCH -t 2-00:00                  # two days time limit
-#SBATCH -N 1                        # number of nodes
-#SBATCH -n 1                        # number of cores (AKA tasks)
-#SBATCH --mem=128G
-#################################################################
-module load miniconda3
-source activate conda_R
-python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
\ No newline at end of file

From 64e9b37622de65abea036a7afd4fe416d7b09a01 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 18:26:07 -0600
Subject: [PATCH 107/117] add all methods

---
 results/louvain.sh | 78 +++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/results/louvain.sh b/results/louvain.sh
index 99af7cc..4f0e9e0 100644
--- a/results/louvain.sh
+++ b/results/louvain.sh
@@ -16,42 +16,42 @@ python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_re
 python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
 python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
 
-# python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
-
-# python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
-# python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv

From 534ed1b33b19f864d1a8dd51f20c65ad46e6abb7 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 20:13:27 -0600
Subject: [PATCH 108/117] fix a bug in dca

---
 codesfromJGandYJ/impute/dca_impute.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
index ffa4504..6b7b2a2 100644
--- a/codesfromJGandYJ/impute/dca_impute.py
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -26,20 +26,17 @@ def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
     x = x.tolist()
     x=x.todense()
     x=np.asarray(x)
+    x=x.astype(int)
     features=x.T
-
     #write
     dropout_filename = save_path+"dca_input.csv"
     with open(dropout_filename, "w") as f:
         writer = csv.writer(f)
         writer.writerows(features)
-
     os.system("dca "+dropout_filename+ " "+save_path+"tmpdca")
-
     filename=save_path+"tmpdca/mean.tsv"
     imputed_values = pd.read_csv(filename,sep="\t")
     imputed_values=imputed_values.T
-
     np.save('/storage/htc/joshilab/wangjue/scGNN/dca/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
 
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']

From 68f2e6d2924a7d1b99afc6378b5a8fd98e8136ab Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Wed, 9 Dec 2020 21:15:05 -0600
Subject: [PATCH 109/117] update name

---
 results/louvain.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/results/louvain.sh b/results/louvain.sh
index 4f0e9e0..3f51bea 100644
--- a/results/louvain.sh
+++ b/results/louvain.sh
@@ -2,7 +2,7 @@
 ######################### Batch Headers #########################
 #SBATCH -A xulab
 #SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
-#SBATCH -J L_magic
+#SBATCH -J Louvain
 #SBATCH -o results-%j.out           # give the job output a custom name
 #SBATCH -t 2-00:00                  # two days time limit
 #SBATCH -N 1                        # number of nodes

From 137b5c61027d8ccdc17915737c76322f562ad435 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 10 Dec 2020 08:04:03 -0600
Subject: [PATCH 110/117] recheck dca

---
 results/results_impute_others_all.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index c2b6d9f..da15c51 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -33,7 +33,10 @@
 # methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc']
 
 # We should use only log(x+1) if the method permitted
-methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
+# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
+
+# Temp: just test dca
+methodList = ['dca']
 
 def outResults(datasetName,seed,ratio,method):
     featuresOriginal = load_data(datasetName, discreteTag=False)

From 98649a84abe51f73cccb0cda846eddc24020b7c4 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 10 Dec 2020 09:14:43 -0600
Subject: [PATCH 111/117] only use 12/13 for dca

---
 results/results_impute_others_all.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index da15c51..d828554 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -17,8 +17,8 @@
 # In HPC, call by sbatch submit_Impute_others.sh
 
 datasetList = [
-    '9.Chung',
-    '11.Kolodziejczyk',
+    # '9.Chung',
+    # '11.Kolodziejczyk',
     '12.Klein',
     '13.Zeisel',
 ]

From f3fbc5ff51282734787a43afb9a4524e525c5c3a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 10 Dec 2020 09:47:02 -0600
Subject: [PATCH 112/117] back to full methods

---
 results/results_impute_others_all.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
index d828554..c2b6d9f 100644
--- a/results/results_impute_others_all.py
+++ b/results/results_impute_others_all.py
@@ -17,8 +17,8 @@
 # In HPC, call by sbatch submit_Impute_others.sh
 
 datasetList = [
-    # '9.Chung',
-    # '11.Kolodziejczyk',
+    '9.Chung',
+    '11.Kolodziejczyk',
     '12.Klein',
     '13.Zeisel',
 ]
@@ -33,10 +33,7 @@
 # methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc']
 
 # We should use only log(x+1) if the method permitted
-# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
-
-# Temp: just test dca
-methodList = ['dca']
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
 
 def outResults(datasetName,seed,ratio,method):
     featuresOriginal = load_data(datasetName, discreteTag=False)

From 5e5853abcc32a44301ba3a92bea8c4277f12ca3a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Thu, 10 Dec 2020 17:37:53 -0600
Subject: [PATCH 113/117] add zero percentage calculation

---
 results/zeroPercentage.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 results/zeroPercentage.py

diff --git a/results/zeroPercentage.py b/results/zeroPercentage.py
new file mode 100644
index 0000000..cef85e8
--- /dev/null
+++ b/results/zeroPercentage.py
@@ -0,0 +1,24 @@
+#Calculate Zero percentage in each of the datasets
+import numpy as np
+
+def calcu(dataset='9.Chung',ratio=0.0):
+    t=np.load('npyImputeG2E_1/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(dataset,ratio),allow_pickle=True)
+    t=t.tolist()
+    t=t.todense()
+    zeroNum = np.where(t==0)[0].shape[0]
+    allNum = t.shape[0]*t.shape[1]
+    percent = zeroNum/allNum
+    print('{} {} {}'.format(zeroNum,allNum,percent))
+
+datasetList = [
+    '9.Chung',
+    '11.Kolodziejczyk',
+    '12.Klein',
+    '13.Zeisel',
+]
+
+ratioList = ['0.0','0.1','0.3','0.6','0.8']
+
+for dataset in datasetList:
+    for ratio in ratioList:
+        calcu(dataset, ratio)
\ No newline at end of file

From 4cf1e7df2b8c9e389e05617b539341aaf8224774 Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 12 Dec 2020 14:45:21 -0600
Subject: [PATCH 114/117] add tmp results of celltype

---
 results/results.sh     | 25 +++++++++++++++++++++++++
 results/results_tmp.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 results/results.sh
 create mode 100644 results/results_tmp.py

diff --git a/results/results.sh b/results/results.sh
new file mode 100644
index 0000000..e1f5d8e
--- /dev/null
+++ b/results/results.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Louvain
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+
+python -W ignore results_tmp.py --inputOri othermethods/saucie/12.Klein_0.0_1_recon.npy --input otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/saucie/13.Zeisel_0.0_1_recon.npy --input otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/scvi/12.Klein_0.0_1_recon.npy --input otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/scvi/13.Zeisel_0.0_1_recon.npy --input otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --input otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --input otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/scIGANs/12.Klein_0.0_1_recon.npy --input otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --input otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
diff --git a/results/results_tmp.py b/results/results_tmp.py
new file mode 100644
index 0000000..97aab4d
--- /dev/null
+++ b/results/results_tmp.py
@@ -0,0 +1,31 @@
+import os, sys
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+# sys.path.append('../')
+import numpy as np
+from util_function import *
+from graph_function import *
+import argparse
+
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+parser.add_argument('--input', type=str, default='filename',
+                    help='input filename')
+parser.add_argument('--inputOri', type=str, default='filename',
+                    help='input filename')
+args = parser.parse_args()
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+
+#'saucie/13.txt'
+z_pd = pd.read_csv(args.input,header=None)
+listResult = z_pd.iloc[:,0].to_numpy()
+zOut = np.load(args.inputOri,allow_pickle=True)
+silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+print(resultstr)

From b1317c522d64fae796d050a492c696cfd463951a Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sat, 12 Dec 2020 16:38:01 -0600
Subject: [PATCH 115/117] add saucie plot

---
 codesfromJGandYJ/impute/SAUCIE_impute.py | 25 +++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
index 874c7c1..a4b0b14 100644
--- a/codesfromJGandYJ/impute/SAUCIE_impute.py
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -33,10 +33,31 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
     model = SAUCIE.SAUCIE(x.shape[1])
     # train the model!
     model.train(loader_train, steps=2000)
+    #imputation
     reconstruction = model.get_reconstruction(loader_eval)
     reconstruction=np.transpose(reconstruction)
     np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction)
 
+def plot_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    loader_eval = SAUCIE.Loader(x, shuffle=False)
+    # clear the computational graph
+    #plot
+    tf.reset_default_graph()
+    model = SAUCIE.SAUCIE(x.shape[1])
+    model.train(loader_eval, steps=2000)
+    embedding = model.get_embedding(loader_eval)
+    num_clusters, clusters = model.get_clusters(loader_eval)
+    fig = plt.figure()
+    ax = fig.add_subplot(1, 1, 1)
+    ax.scatter(embedding[:, 0], embedding[:, 1], c=clusters)
+    fig.savefig('saucie_'+datasetName+'.png')
+
 datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
 seedList = ['1','2','3']
 ratioList = [0.1, 0.3, 0.6, 0.8]
@@ -48,4 +69,6 @@ def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
     for datasetName in datasetNameList:
         for seed in seedList:
             for ratio in ratioList:        
-                impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
\ No newline at end of file
+                impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
+
+# plot_saucie(seed='1', datasetName=datasetName, ratio='0.0')
\ No newline at end of file

From 7d2c74a3148f92f6844787484f567b9b7951d2ff Mon Sep 17 00:00:00 2001
From: Wang <wangjue@umsystem.edu>
Date: Sun, 27 Dec 2020 10:37:11 -0600
Subject: [PATCH 116/117] add netNMF and scIGAN

---
 .../codeForCellcluster/Run_netNMF_celltype.py | 71 +++++++++++++++
 .../impute/Run_netNMF_imputation.py           | 87 +++++++++++++++++++
 .../impute/run_scIGANS_imputation.py          | 51 +++++++++++
 3 files changed, 209 insertions(+)
 create mode 100644 codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
 create mode 100644 codesfromJGandYJ/impute/Run_netNMF_imputation.py
 create mode 100644 codesfromJGandYJ/impute/run_scIGANS_imputation.py

diff --git a/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
new file mode 100644
index 0000000..19e5b1f
--- /dev/null
+++ b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
@@ -0,0 +1,71 @@
+# This code has not cleaned yet
+# run netNMF-sc from command line and save outputs to specified directory
+from __future__ import print_function
+import numpy as np
+from warnings import warn
+from joblib import Parallel, delayed
+import copy,argparse,os,math,random,time
+from scipy import sparse, io,linalg
+from scipy.sparse import csr_matrix
+import warnings,os
+from netNMFsc import plot
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
+def main(args):
+    if args.method == 'GD':
+        from netNMFsc import netNMFGD
+        operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+    elif args.method == 'MU':
+        from netNMFsc import netNMFMU
+        operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+
+
+    chung = pd.read_csv(args.filename, header=0,
+                        index_col=0, sep=',')
+    X = chung.values
+    genes = []
+    for gen in chung.index.values:
+        if '.' in gen:
+            genes.append(gen.upper().split('.')[0])
+        else:
+            genes.append(gen.upper())
+    #print(genes)
+    operator.X = X
+    operator.genes = np.asarray(genes)
+    #operator.load_10X(direc=args.tenXdir,genome='mm10')
+    operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity)
+    dictW = operator.fit_transform()
+    W, H = dictW['W'], dictW['H']
+    k,clusters = plot.select_clusters(H,max_clusters=20)
+    plot.tSNE(H,clusters,fname=args.direc + '/netNMFsc_tsne')
+    os.system('mkdir -p %s'%(args.direc))
+    np.save(os.path.join(args.direc,'W.npy'),W)
+    np.save(os.path.join(args.direc,'H.npy'),H)
+    np.save(os.path.join(args.direc, 'cluster.npy'), clusters)
+    return
+#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD')
+    parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx')
+    parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv')
+    parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='')
+    parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='')
+    parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human')
+    parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble')
+    parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez')
+    parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0)
+    parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99)
+    parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500)
+    parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2)
+    parser.add_argument("-d","--direc", help="directory to save files",default='')
+    parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10)
+    parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0)
+    parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '')
+    args = parser.parse_args()
+    main(args)
+
+
+#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv'
diff --git a/codesfromJGandYJ/impute/Run_netNMF_imputation.py b/codesfromJGandYJ/impute/Run_netNMF_imputation.py
new file mode 100644
index 0000000..8c74b72
--- /dev/null
+++ b/codesfromJGandYJ/impute/Run_netNMF_imputation.py
@@ -0,0 +1,87 @@
+# This code has not cleaned yet
+# run netNMF-sc from command line and save outputs to specified directory
+from __future__ import print_function
+import numpy as np
+from warnings import warn
+from joblib import Parallel, delayed
+import copy,argparse,os,math,random,time
+from scipy import sparse, io,linalg
+from scipy.sparse import csr_matrix
+import warnings,os
+from netNMFsc import plot
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
+def main(args):
+    if args.method == 'GD':
+        from netNMFsc import netNMFGD
+        operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4)
+    elif args.method == 'MU':
+        from netNMFsc import netNMFMU
+        operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4)
+
+    filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(
+        args.Randomdata, args.datasetName,args.dropratio)
+    x = np.load(filename, allow_pickle=True)
+    x = x.tolist()
+    x = x.todense()
+    x = np.asarray(x)
+    if args.process == 'log':
+        x = np.log(x + 1)
+
+    # transpose and add names for rows and cols
+    features = np.transpose(x)
+
+    chung = pd.read_csv(args.filename, header=0,
+                        index_col=0, sep=',')
+    X = features
+    genes = []
+    for gen in chung.index.values:
+        if '.' in gen:
+            genes.append(gen.upper().split('.')[0])
+        else:
+            genes.append(gen.upper())
+    #print(genes)
+    operator.genes = np.asarray(genes)
+    operator.X = X
+    #operator.load_10X(direc=args.tenXdir,genome='mm10')
+    operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity)
+    dictW = operator.fit_transform()
+    W, H = dictW['W'], dictW['H']
+    # k,clusters = plot.select_clusters(H,max_clusters=20)
+    # plot.tSNE(H,clusters,fname=args.direc+ '/netNMFsc_tsne_imputation_' +args.process +'_'+args.Randomdata)
+    # os.system('mkdir -p %s'%(args.direc))
+    np.save(os.path.join(args.direc,args.Randomdata+'_'+args.process+'_imputation.npy'),np.dot(W,H))
+    #np.save(os.path.join(args.direc,'H.npy'),H)
+    #np.save(os.path.join(args.direc, 'cluster.npy'), H)
+    return
+#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD')
+    parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx')
+    parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv')
+    parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='')
+    parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='')
+    parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human')
+    parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble')
+    parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez')
+    parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0)
+    parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99)
+    parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500)
+    parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2)
+    parser.add_argument("-d","--direc", help="directory to save files",default='')
+    parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10)
+    parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0)
+    parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '')
+    parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1', help='npyImputeG2E_1,2,3')
+    parser.add_argument('--datasetName', type=str, default='12.Klein', help='12.Klein,13.Zeisel')
+    parser.add_argument('--process', type=str, default='null', help='log/null to process data')
+    parser.add_argument("-Hasdot","--Hasdot",type = bool, help="data gene names has dot",default = True)
+    parser.add_argument('--dropratio', type=str, default='0.1', help='0.1，0.3，0.6，0.8')
+    args = parser.parse_args()
+    main(args)
+
+
+#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv'
diff --git a/codesfromJGandYJ/impute/run_scIGANS_imputation.py b/codesfromJGandYJ/impute/run_scIGANS_imputation.py
new file mode 100644
index 0000000..20faf7d
--- /dev/null
+++ b/codesfromJGandYJ/impute/run_scIGANS_imputation.py
@@ -0,0 +1,51 @@
+# This code has not cleaned yet
+import sys,os
+import numpy as np
+import pandas as pd
+import argparse
+sys.path.append('../')
+sys.path.append('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/scIGANs/')
+
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1',help='npyImputeG2E_1,2,3')
+parser.add_argument('--datasetName', type=str, default='12.Klein',help='12.Klein,13.Zeisel')
+parser.add_argument('--process', type=str, default='null',help='log/null to process data')
+parser.add_argument('--exec', type=str, default='scIGANs',help='12.Klein')
+parser.add_argument('--dropratio', type=str, default='0.1',help='0.1，0.3，0.6，0.8')
+parser.add_argument('--csvsavepath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein')
+parser.add_argument('--labelpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein')
+parser.add_argument('--outpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200/',help='12.Klein')
+parser.add_argument('--Epotch', type=str, default='200',help='epotch')
+args = parser.parse_args()
+
+# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
+
+filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(args.Randomdata,args.datasetName,args.dropratio)
+x = np.load(filename,allow_pickle=True)
+x = x.tolist()
+x=x.todense()
+x=np.asarray(x)
+if args.process=='log':
+    x=np.log(x+1)
+    saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features_log.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata,
+                                                                                args.datasetName,args.dropratio)
+elif args.process=='null':
+    saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata,
+                                                                                args.datasetName,args.dropratio)
+#transpose and add names for rows and cols
+features=np.transpose(x)
+
+pd.DataFrame(features).to_csv(saveintedir,sep='\t')
+
+label = '{}{}/{}_only_label.csv'.format(args.labelpath,args.datasetName,args.datasetName.split('.')[-1])
+#/storage/htc/joshilab/jghhd/singlecellTest/Data/12.Klein/Klein_only_label.csv
+
+cmd = '{} {} -l {} -e {} -o {}{}'.format(args.exec,saveintedir,label,args.Epotch,args.outpath,args.datasetName)
+print(cmd)
+os.system(cmd)
+#scIGANs saveintedir -l  -e 50
+
+# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix)
+# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+
+#np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction)

From 89af13cfc0fe53cac38a88344bc5b07d65f59c3e Mon Sep 17 00:00:00 2001
From: juexinwang <juexinwang@users.noreply.github.com>
Date: Thu, 18 Feb 2021 22:41:21 -0600
Subject: [PATCH 117/117] Create choose_louvain.py

Add how and why Louvain works with python-igraph, but python-louvain does not work
---
 scripts/choose_louvain.py | 104 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 scripts/choose_louvain.py

diff --git a/scripts/choose_louvain.py b/scripts/choose_louvain.py
new file mode 100644
index 0000000..42cbe0c
--- /dev/null
+++ b/scripts/choose_louvain.py
@@ -0,0 +1,104 @@
+# Script to test efficiency of louvain
+
+# Option 1: Original version, use r version of louvain, it takes time to link R, and need install rpy2.
+# Not use anymore
+# Clustering is different between Case one and two 
+import pandas as pd
+import rpy2.robjects as ro
+from rpy2.robjects.packages import importr
+from rpy2.robjects import r, pandas2ri
+pandas2ri.activate()
+
+# case one:
+edgeList = []
+edgeList.append((0,2,1.0))
+edgeList.append((1,2,1.0))
+edgeList.append((2,3,1.0))
+edgeList.append((3,4,1.0))
+edgeList.append((4,5,1.0))
+edgeList.append((4,6,1.0))
+
+# case two:
+edgeList.append((0,2,1.0))
+edgeList.append((1,2,1.0))
+edgeList.append((2,3,0.1))
+edgeList.append((3,4,1.0))
+edgeList.append((4,5,1.0))
+edgeList.append((4,6,1.0))
+
+fromVec = []
+toVec   = []
+weightVec = []
+for edge in edgeList:
+    fromVec.append(edge[0])
+    toVec.append(edge[1])
+    weightVec.append(edge[2])
+
+igraph = importr('igraph')
+base   = importr('base')
+fromV  = ro.FloatVector(fromVec)
+toV    = ro.FloatVector(toVec)
+# weightV= ro.FloatVector([0.1,1.0,1.0,0.1,1.0])
+weightV= ro.FloatVector(weightVec)
+links  = ro.DataFrame({'from':fromV,'to':toV,'weight':weightV})
+g  = igraph.graph_from_data_frame(links,directed = False)
+cl = igraph.cluster_louvain(g)
+
+def as_dict(vector):
+    """Convert an RPy2 ListVector to a Python dict"""
+    result = {}
+    for i, name in enumerate(vector.names):
+        if isinstance(vector[i], ro.ListVector):
+            result[name] = as_dict(vector[i])
+        elif len(vector[i]) == 1:
+            result[name] = vector[i][0]
+        else:
+            result[name] = vector[i]
+    return result
+
+cl_dict = as_dict(cl)
+df = pd.DataFrame()
+# df['Cluster']=cl_dict['membership']
+size = float(len(set(cl_dict['membership'])))
+
+listResult=[]
+count = 0
+for i in range(len(cl_dict['membership'])):
+    listResult.append(int(cl_dict['membership'][i])-1)
+    count += 1
+
+# Option 2: use package python-louvain, but does not work
+# Clustering is identical between Case one and two, so we cannot use it
+import networkx as nx
+import community as community_louvain
+G = nx.Graph()
+G.add_weighted_edges_from(edgeList)
+partition = community_louvain.best_partition(G,weight='weight')
+
+
+# Option 3: use igraph, pure python and looks right
+# Clustering is identical between Case one and two, so we cannot use it
+import numpy as np
+from igraph import *
+#Case 1:
+W=np.zeros((7,7))
+W[0,2]=1.0
+W[1,2]=1.0
+W[2,3]=1.0
+W[3,4]=1.0
+W[4,5]=1.0
+W[4,6]=1.0
+
+#Case 2:
+W=np.zeros((7,7))
+W[0,2]=1.0
+W[1,2]=1.0
+W[2,3]=0.1
+W[3,4]=1.0
+W[4,5]=1.0
+W[4,6]=1.0
+
+graph = Graph.Weighted_Adjacency(W.tolist(), mode=ADJ_UNDIRECTED, attr="weight", loops=False)
+louvain_partition = graph.community_multilevel(weights=graph.es['weight'], return_levels=False)
+print(louvain_partition)
+