diff --git a/bak/npy2csv_script.py b/bak/npy2csv_script.py
new file mode 100644
index 0000000..cb35774
--- /dev/null
+++ b/bak/npy2csv_script.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pandas as pd
+
+def convert(method='dca'):
+    t=np.load(method+'\\9.Chung_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    t=np.load(method+'\\11.Kolodziejczyk_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    t=np.load(method+'\\12.Klein_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    t=np.load(method+'\\13.Zeisel_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convert('dca')
+convert('deepimpute')
+convert('magic')
+convert('netNMFsc')
+convert('saucie')
+convert('saver')
+convert('scimpute')
+convert('scvi')
+
+
+def convertCSV(method='scIGANs'):
+    df = pd.read_csv(method+'\\9.Chung_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\11.Kolodziejczyk_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\12.Klein_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\13.Zeisel_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convertCSV('scIGANs')
+
+
diff --git a/otherresults/BAK_MAGIC.py b/bak/otherresults/BAK_MAGIC.py
similarity index 100%
rename from otherresults/BAK_MAGIC.py
rename to bak/otherresults/BAK_MAGIC.py
diff --git a/otherresults/MAGIC_analysis.sh b/bak/otherresults/MAGIC_analysis.sh
similarity index 100%
rename from otherresults/MAGIC_analysis.sh
rename to bak/otherresults/MAGIC_analysis.sh
diff --git a/otherresults/MAGIC_analysis_usage.sh b/bak/otherresults/MAGIC_analysis_usage.sh
similarity index 100%
rename from otherresults/MAGIC_analysis_usage.sh
rename to bak/otherresults/MAGIC_analysis_usage.sh
diff --git a/otherresults/MAGIC_impute.py b/bak/otherresults/MAGIC_impute.py
similarity index 100%
rename from otherresults/MAGIC_impute.py
rename to bak/otherresults/MAGIC_impute.py
diff --git a/otherresults/MAGIC_impute_usage.py b/bak/otherresults/MAGIC_impute_usage.py
similarity index 100%
rename from otherresults/MAGIC_impute_usage.py
rename to bak/otherresults/MAGIC_impute_usage.py
diff --git a/otherresults/Other_Results_Evaluation.sh b/bak/otherresults/Other_Results_Evaluation.sh
similarity index 100%
rename from otherresults/Other_Results_Evaluation.sh
rename to bak/otherresults/Other_Results_Evaluation.sh
diff --git a/otherresults/Other_results_Reading.py b/bak/otherresults/Other_results_Reading.py
similarity index 100%
rename from otherresults/Other_results_Reading.py
rename to bak/otherresults/Other_results_Reading.py
diff --git a/otherresults/Other_results_celltype.py b/bak/otherresults/Other_results_celltype.py
similarity index 100%
rename from otherresults/Other_results_celltype.py
rename to bak/otherresults/Other_results_celltype.py
diff --git a/otherresults/Other_results_impute.py b/bak/otherresults/Other_results_impute.py
similarity index 100%
rename from otherresults/Other_results_impute.py
rename to bak/otherresults/Other_results_impute.py
diff --git a/otherresults/README.md b/bak/otherresults/README.md
similarity index 100%
rename from otherresults/README.md
rename to bak/otherresults/README.md
diff --git a/otherresults/SAUCIE_analysis.sh b/bak/otherresults/SAUCIE_analysis.sh
similarity index 100%
rename from otherresults/SAUCIE_analysis.sh
rename to bak/otherresults/SAUCIE_analysis.sh
diff --git a/otherresults/SAUCIE_celltype.py b/bak/otherresults/SAUCIE_celltype.py
similarity index 100%
rename from otherresults/SAUCIE_celltype.py
rename to bak/otherresults/SAUCIE_celltype.py
diff --git a/otherresults/SAUCIE_impute.py b/bak/otherresults/SAUCIE_impute.py
similarity index 100%
rename from otherresults/SAUCIE_impute.py
rename to bak/otherresults/SAUCIE_impute.py
diff --git a/otherresults/SAVER_impute.R b/bak/otherresults/SAVER_impute.R
similarity index 100%
rename from otherresults/SAVER_impute.R
rename to bak/otherresults/SAVER_impute.R
diff --git a/otherresults/SCIMPUTE_impute.R b/bak/otherresults/SCIMPUTE_impute.R
similarity index 100%
rename from otherresults/SCIMPUTE_impute.R
rename to bak/otherresults/SCIMPUTE_impute.R
diff --git a/otherresults/dca_impute.py b/bak/otherresults/dca_impute.py
similarity index 100%
rename from otherresults/dca_impute.py
rename to bak/otherresults/dca_impute.py
diff --git a/otherresults/scVi_impute.py b/bak/otherresults/scVi_impute.py
similarity index 100%
rename from otherresults/scVi_impute.py
rename to bak/otherresults/scVi_impute.py
diff --git a/otherresults/simulation_generator.R b/bak/otherresults/simulation_generator.R
similarity index 100%
rename from otherresults/simulation_generator.R
rename to bak/otherresults/simulation_generator.R
diff --git a/results/calculateROGUE.R b/bak/results/calculateROGUE.R
similarity index 100%
rename from results/calculateROGUE.R
rename to bak/results/calculateROGUE.R
diff --git a/results/compare_varID.py b/bak/results/compare_varID.py
similarity index 100%
rename from results/compare_varID.py
rename to bak/results/compare_varID.py
diff --git a/results/jobinfo_imp_23dropout.txt b/bak/results/jobinfo_imp_23dropout.txt
similarity index 100%
rename from results/jobinfo_imp_23dropout.txt
rename to bak/results/jobinfo_imp_23dropout.txt
diff --git a/results/jobinfo_imp_explore.txt b/bak/results/jobinfo_imp_explore.txt
similarity index 100%
rename from results/jobinfo_imp_explore.txt
rename to bak/results/jobinfo_imp_explore.txt
diff --git a/results/jobinfo_imp_louvain_2.txt b/bak/results/jobinfo_imp_louvain_2.txt
similarity index 100%
rename from results/jobinfo_imp_louvain_2.txt
rename to bak/results/jobinfo_imp_louvain_2.txt
diff --git a/results/results_ROGUE.py b/bak/results/results_ROGUE.py
similarity index 100%
rename from results/results_ROGUE.py
rename to bak/results/results_ROGUE.py
diff --git a/results/results_Reading.py b/bak/results/results_Reading.py
similarity index 99%
rename from results/results_Reading.py
rename to bak/results/results_Reading.py
index 88f34aa..50ebc2b 100644
--- a/results/results_Reading.py
+++ b/bak/results/results_Reading.py
@@ -13,6 +13,7 @@
 args = parser.parse_args()
 
 # Note:
+# Main Check results
 # Generate results in python other than in shell for better organization
 # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
 # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel
diff --git a/results/results_Reading_23.py b/bak/results/results_Reading_23.py
similarity index 100%
rename from results/results_Reading_23.py
rename to bak/results/results_Reading_23.py
diff --git a/results/results_Reading_23dropout.py b/bak/results/results_Reading_23dropout.py
similarity index 100%
rename from results/results_Reading_23dropout.py
rename to bak/results/results_Reading_23dropout.py
diff --git a/results/results_Reading_explore.py b/bak/results/results_Reading_explore.py
similarity index 100%
rename from results/results_Reading_explore.py
rename to bak/results/results_Reading_explore.py
diff --git a/results/results_Reading_graph.py b/bak/results/results_Reading_graph.py
similarity index 100%
rename from results/results_Reading_graph.py
rename to bak/results/results_Reading_graph.py
diff --git a/results/results_imputation.sh b/bak/results/results_imputation.sh
similarity index 100%
rename from results/results_imputation.sh
rename to bak/results/results_imputation.sh
diff --git a/results/results_imputation_0.3.sh b/bak/results/results_imputation_0.3.sh
similarity index 100%
rename from results/results_imputation_0.3.sh
rename to bak/results/results_imputation_0.3.sh
diff --git a/results/results_imputation_grid.sh b/bak/results/results_imputation_grid.sh
similarity index 100%
rename from results/results_imputation_grid.sh
rename to bak/results/results_imputation_grid.sh
diff --git a/results/results_impute.py b/bak/results/results_impute.py
similarity index 94%
rename from results/results_impute.py
rename to bak/results/results_impute.py
index 61796dc..f265477 100644
--- a/results/results_impute.py
+++ b/bak/results/results_impute.py
@@ -56,8 +56,8 @@
 dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy')
 
 featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy')
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='')
 
 def imputeResult(inputData):
     '''
diff --git a/results/results_impute_graph_ROC.py b/bak/results/results_impute_graph_ROC.py
similarity index 100%
rename from results/results_impute_graph_ROC.py
rename to bak/results/results_impute_graph_ROC.py
diff --git a/results/results_impute_graph_ROC.sh b/bak/results/results_impute_graph_ROC.sh
similarity index 100%
rename from results/results_impute_graph_ROC.sh
rename to bak/results/results_impute_graph_ROC.sh
diff --git a/results/submitCluster_Result_Celltype.sh b/bak/results/submitCluster_Result_Celltype.sh
similarity index 100%
rename from results/submitCluster_Result_Celltype.sh
rename to bak/results/submitCluster_Result_Celltype.sh
diff --git a/results/submitCluster_Result_Impute.sh b/bak/results/submitCluster_Result_Impute.sh
similarity index 100%
rename from results/submitCluster_Result_Impute.sh
rename to bak/results/submitCluster_Result_Impute.sh
diff --git a/results/submitCluster_Result_Impute_23.sh b/bak/results/submitCluster_Result_Impute_23.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_23.sh
rename to bak/results/submitCluster_Result_Impute_23.sh
diff --git a/results/submitCluster_Result_Impute_23dropout.sh b/bak/results/submitCluster_Result_Impute_23dropout.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_23dropout.sh
rename to bak/results/submitCluster_Result_Impute_23dropout.sh
diff --git a/results/submitCluster_Result_Impute_explore.sh b/bak/results/submitCluster_Result_Impute_explore.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_explore.sh
rename to bak/results/submitCluster_Result_Impute_explore.sh
diff --git a/results/submitCluster_Result_Impute_graph.sh b/bak/results/submitCluster_Result_Impute_graph.sh
similarity index 100%
rename from results/submitCluster_Result_Impute_graph.sh
rename to bak/results/submitCluster_Result_Impute_graph.sh
diff --git a/results/summary.sh b/bak/results/summary.sh
similarity index 100%
rename from results/summary.sh
rename to bak/results/summary.sh
diff --git a/results/summary_cmd.py b/bak/results/summary_cmd.py
similarity index 100%
rename from results/summary_cmd.py
rename to bak/results/summary_cmd.py
diff --git a/benchmark_util.py b/benchmark_util.py
index d85d409..d2fc1ba 100644
--- a/benchmark_util.py
+++ b/benchmark_util.py
@@ -530,6 +530,7 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - y)
+        rmse = ((x - y)**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -538,8 +539,9 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - yuse)
+        rmse = ((x - yuse)**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
 
 # IMPUTATION METRICS
@@ -562,6 +564,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - np.log(y+1))
+        rmse = ((x - np.log(y+1))**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -570,10 +573,11 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - np.log(yuse+1))
+        rmse = ((x - np.log(yuse+1))**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
-# cosine similarity
+# cosine similarity with log
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):
     """
     X_mean: imputed dataset
diff --git a/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
new file mode 100644
index 0000000..19e5b1f
--- /dev/null
+++ b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
@@ -0,0 +1,71 @@
+# This code has not cleaned yet
+# run netNMF-sc from command line and save outputs to specified directory
+from __future__ import print_function
+import numpy as np
+from warnings import warn
+from joblib import Parallel, delayed
+import copy,argparse,os,math,random,time
+from scipy import sparse, io,linalg
+from scipy.sparse import csr_matrix
+import warnings,os
+from netNMFsc import plot
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
+def main(args):
+    if args.method == 'GD':
+        from netNMFsc import netNMFGD
+        operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+    elif args.method == 'MU':
+        from netNMFsc import netNMFMU
+        operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+
+
+    chung = pd.read_csv(args.filename, header=0,
+                        index_col=0, sep=',')
+    X = chung.values
+    genes = []
+    for gen in chung.index.values:
+        if '.' in gen:
+            genes.append(gen.upper().split('.')[0])
+        else:
+            genes.append(gen.upper())
+    #print(genes)
+    operator.X = X
+    operator.genes = np.asarray(genes)
+    #operator.load_10X(direc=args.tenXdir,genome='mm10')
+    operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity)
+    dictW = operator.fit_transform()
+    W, H = dictW['W'], dictW['H']
+    k,clusters = plot.select_clusters(H,max_clusters=20)
+    plot.tSNE(H,clusters,fname=args.direc + '/netNMFsc_tsne')
+    os.system('mkdir -p %s'%(args.direc))
+    np.save(os.path.join(args.direc,'W.npy'),W)
+    np.save(os.path.join(args.direc,'H.npy'),H)
+    np.save(os.path.join(args.direc, 'cluster.npy'), clusters)
+    return
+#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD')
+    parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx')
+    parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv')
+    parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='')
+    parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='')
+    parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human')
+    parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble')
+    parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez')
+    parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0)
+    parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99)
+    parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500)
+    parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2)
+    parser.add_argument("-d","--direc", help="directory to save files",default='')
+    parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10)
+    parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0)
+    parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '')
+    args = parser.parse_args()
+    main(args)
+
+
+#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv'
diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py
deleted file mode 100644
index c0c1f22..0000000
--- a/codesfromJGandYJ/impute code/MAGIC_impute.py	
+++ /dev/null
@@ -1,82 +0,0 @@
-# Analysis using MAGIC method
-import magic
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-#from benchmark_util import impute_dropout
-
-def impute_dropout(X, rate=0.1):
-    """
-    X: original testing set
-    ========
-    returns:
-    X_zero: copy of X with zeros
-    i, j, ix: indices of where dropout is applied
-    """
-    #If the input is a dense matrix
-    if isinstance(X, np.ndarray):
-        X_zero = np.copy(X)
-        # select non-zero subset
-        i,j = np.nonzero(X_zero)
-    # If the input is a sparse matrix
-    else:
-        X_zero = scipy.sparse.lil_matrix.copy(X)
-        # select non-zero subset
-        i,j = X_zero.nonzero()
-    # choice number 1 : select 10 percent of the non zero values (so that distributions overlap enough)
-    ix = np.random.choice(range(len(i)), int(np.floor(0.1 * len(i))), replace=False)
-    X_zero[i[ix], j[ix]] *= np.random.binomial(1, rate)
-    # choice number 2, focus on a few but corrupt binomially
-    #ix = np.random.choice(range(len(i)), int(slice_prop * np.floor(len(i))), replace=False)
-    #X_zero[i[ix], j[ix]] = np.random.binomial(X_zero[i[ix], j[ix]].astype(np.int), rate)
-    return X_zero, i, j, ix
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-args = parser.parse_args()
-
-
-# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-
-# Load single-cell RNA-seq data
-# Default is KNN=5
-magic_operator = magic.MAGIC()
-# magic_operator = magic.MAGIC(knn=10)
-X_magic = magic_operator.fit_transform(x, genes="all_genes")
-recon = X_magic
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/magic/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),recon)
-
-
-# From scVI
-# # Load single-cell RNA-seq data
-# scdata = magic.mg.SCData(x, "sc-seq")
-# print(scdata)
-
-# scdata.run_magic(n_pca_components=20, random_pca=True, t=6, k=30, ka=10, epsilon=1, rescale_percent=99)
-
-# if len(sys.argv) == 2:
-#     np.save("t_MAGIC.npy", scdata.magic.data.as_matrix())
diff --git a/codesfromJGandYJ/impute code/SAVER_impute.py b/codesfromJGandYJ/impute code/SAVER_impute.py
deleted file mode 100644
index 5d32405..0000000
--- a/codesfromJGandYJ/impute code/SAVER_impute.py	
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saver/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-features=x
-
-
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-
-
-
diff --git a/codesfromJGandYJ/impute code/SCIMPUTE.py b/codesfromJGandYJ/impute code/SCIMPUTE.py
deleted file mode 100644
index 246239d..0000000
--- a/codesfromJGandYJ/impute code/SCIMPUTE.py	
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/saver/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scimpute/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-features=x
-
-
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-
-
-
diff --git a/codesfromJGandYJ/impute code/dca_impute.py b/codesfromJGandYJ/impute code/dca_impute.py
deleted file mode 100644
index 0496364..0000000
--- a/codesfromJGandYJ/impute code/dca_impute.py	
+++ /dev/null
@@ -1,79 +0,0 @@
-#from dca.api import dca
-#import anndata
-#import matplotlib.pyplot as plt
-#import numpy as np
-#import time
-#import pandas as pd
-
-#Ref:
-# https://github.com/theislab/dca/blob/master/tutorial.ipynb
-#z = pd.read_csv('/home/wangjue/biodata/scData/MMPbasal.csv')
-#z = z.to_numpy()
-#z = z[:,:-1]
-
-#selected = np.std(z, axis=0).argsort()[-2000:][::-1]
-#expression_data = z[:, selected]
-
-#train = anndata.AnnData(expression_data)
-#res = dca(train, verbose=True)
-#train.X
-
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-import torch
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/dca/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-
-
-features=x.T
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-
-
-os.system("dca "+dropout_filename+ " "+save_path+datasetNameStr)
-
-filename=save_path+datasetNameStr+"/mean.tsv"
-imputed_values = pd.read_csv(filename,sep="\t")
-imputed_values=imputed_values.T
-
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/dca/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),imputed_values)
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute code/deepimpute_impute.py b/codesfromJGandYJ/impute code/deepimpute_impute.py
deleted file mode 100644
index 6c31962..0000000
--- a/codesfromJGandYJ/impute code/deepimpute_impute.py	
+++ /dev/null
@@ -1,59 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from deepimpute.multinet import MultiNet
-import torch
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/deepimpute/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-#x=np.log(x+1)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/deepimpute_nolog/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-features=x
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-data = pd.read_csv(dropout_filename, header=None)
-model = MultiNet()
-model.fit(data)
-imputed = model.predict(data)
-
-
-np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed)
-
diff --git a/codesfromJGandYJ/impute code/saucie_impute_t.py b/codesfromJGandYJ/impute code/saucie_impute_t.py
deleted file mode 100644
index 5831c63..0000000
--- a/codesfromJGandYJ/impute code/saucie_impute_t.py	
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-import tensorflow as tf
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/otherresults/SAUCIE-master/SAUCIE-master/')
-from model import SAUCIE
-from loader import Loader
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-import argparse
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False, 
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-args = parser.parse_args()
-
-# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-
-x=np.transpose(x)
-
-saucie = SAUCIE(x.shape[1])
-loadtrain = Loader(x, shuffle=True)
-saucie.train(loadtrain, steps=1000)
-
-loadeval = Loader(x, shuffle=False)
-reconstruction = saucie.get_reconstruction(loadeval)
-
-reconstruction=np.transpose(reconstruction)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix)
-# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
-
-np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction)
-
-
diff --git a/codesfromJGandYJ/impute code/scVi_impute.py b/codesfromJGandYJ/impute code/scVi_impute.py
deleted file mode 100644
index 6ce9383..0000000
--- a/codesfromJGandYJ/impute code/scVi_impute.py	
+++ /dev/null
@@ -1,102 +0,0 @@
-import os
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset
-from scvi.models import VAE
-from scvi.inference import UnsupervisedTrainer
-import torch
-import csv
-import argparse
-import sys
-sys.path.append('../')
-sys.path.append('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scGNN-master/')
-from benchmark_util import impute_dropout
-
-
-parser = argparse.ArgumentParser(description='')
-parser.add_argument('--data', type=str, default='data1',help='data1,2,3')
-parser.add_argument('--datasetName', type=str, default='MMPbasal_2000',help='MMPbasal_2000')
-parser.add_argument('--discreteTag', action='store_true', default=False,
-                    help='whether input is raw or 0/1 (default: False)')
-parser.add_argument('--ratio', type=str, default='0.1',
-                    help='dropoutratio')
-parser.add_argument('--outfolder', type=str, default='/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/',
-                    help='output filefolder')
-args = parser.parse_args()
-
-# Ref:
-# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
-
-if args.discreteTag:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scData/{}/{}.features.D.csv'.format(args.datasetName,args.datasetName)
-else:
-    filename = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/{}/{}_LTMG_0.1_features.npy'.format(args.data,args.datasetName)
-filenameFull = filename
-save_path = '/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/scvi/{}/'.format(args.data)
-
-discreteStr = ''
-if args.discreteTag:
-    discreteStr = 'D'
-datasetNameStr = args.datasetName+discreteStr
-
-x = np.load(filename,allow_pickle=True)
-x = x.tolist()
-x=x.todense()
-x=np.asarray(x)
-x=np.log(x+1)
-
-
-featuresOriginal = np.copy(x)
-features, dropi, dropj, dropix = impute_dropout(featuresOriginal, rate=float(args.ratio))
-
-#transpose and add names for rows and cols
-features=np.transpose(features)
-rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
-features=np.concatenate([rowname,features],axis=1)
-colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
-features=np.concatenate([colname,features],axis=0)
-
-#write
-dropout_filename = save_path+datasetNameStr+"_dropout.csv"
-with open(dropout_filename, "w") as f:
-    writer = csv.writer(f)
-    writer.writerows(features)
-
-# gene_dataset = CortexDataset(save_path=save_path, total_genes=558)
-gene_dataset = CsvDataset(dropout_filename, save_path=save_path+args.data+"/")
-
-n_epochs = 400 
-lr = 1e-3
-use_batches = False
-use_cuda = True 
-
-vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)
-trainer = UnsupervisedTrainer(
-    vae,
-    gene_dataset,
-    train_size=0.75,
-    use_cuda=use_cuda,
-    frequency=5,
-)
-
-trainer.train(n_epochs=n_epochs, lr=lr)
-
-
-full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
-latent, batch_indices, labels = full.sequential().get_latent()
-batch_indices = batch_indices.ravel()
-
-# use imputation
-imputed_values = full.sequential().imputation()
-normalized_values = full.sequential().get_sample_scale()
-
-np.save(save_path+'{}_{}_recon.npy'.format(datasetNameStr,args.ratio),imputed_values)
-np.save(save_path+'{}_{}_recon_normalized.npy'.format(datasetNameStr,args.ratio),normalized_values)
-np.save(save_path+'{}_{}_featuresOriginal.npy'.format(datasetNameStr,args.ratio),featuresOriginal)
-np.save(save_path+'{}_{}_dropi.npy'.format(datasetNameStr,args.ratio),dropi)
-np.save(save_path+'{}_{}_dropj.npy'.format(datasetNameStr,args.ratio),dropj)
-np.save(save_path+'{}_{}_dropix.npy'.format(datasetNameStr,args.ratio),dropix)
-
-# celltype:
-#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent)
diff --git a/codesfromJGandYJ/impute/MAGIC_impute.py b/codesfromJGandYJ/impute/MAGIC_impute.py
new file mode 100644
index 0000000..95fe325
--- /dev/null
+++ b/codesfromJGandYJ/impute/MAGIC_impute.py
@@ -0,0 +1,53 @@
+# Analysis using MAGIC method
+import magic
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+import sys
+#from benchmark_util import impute_dropout
+
+parser = argparse.ArgumentParser(description='MAGIC Impute')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+
+def impute_Magic(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+
+    # Load single-cell RNA-seq data
+    # Default is KNN=5
+    magic_operator = magic.MAGIC()
+    # magic_operator = magic.MAGIC(knn=10)
+    X_magic = magic_operator.fit_transform(x, genes="all_genes")
+    recon = X_magic
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/magic/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),recon)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_Magic(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_Magic(seed=seed, datasetName=datasetName, ratio=ratio)
+
+# From scVI
+# # Load single-cell RNA-seq data
+# scdata = magic.mg.SCData(x, "sc-seq")
+# print(scdata)
+
+# scdata.run_magic(n_pca_components=20, random_pca=True, t=6, k=30, ka=10, epsilon=1, rescale_percent=99)
+
+# if len(sys.argv) == 2:
+#     np.save("t_MAGIC.npy", scdata.magic.data.as_matrix())
diff --git a/codesfromJGandYJ/impute/Run_netNMF_imputation.py b/codesfromJGandYJ/impute/Run_netNMF_imputation.py
new file mode 100644
index 0000000..8c74b72
--- /dev/null
+++ b/codesfromJGandYJ/impute/Run_netNMF_imputation.py
@@ -0,0 +1,87 @@
+# This code has not cleaned yet
+# run netNMF-sc from command line and save outputs to specified directory
+from __future__ import print_function
+import numpy as np
+from warnings import warn
+from joblib import Parallel, delayed
+import copy,argparse,os,math,random,time
+from scipy import sparse, io,linalg
+from scipy.sparse import csr_matrix
+import warnings,os
+from netNMFsc import plot
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
+def main(args):
+    if args.method == 'GD':
+        from netNMFsc import netNMFGD
+        operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4)
+    elif args.method == 'MU':
+        from netNMFsc import netNMFMU
+        operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=4)
+
+    filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(
+        args.Randomdata, args.datasetName,args.dropratio)
+    x = np.load(filename, allow_pickle=True)
+    x = x.tolist()
+    x = x.todense()
+    x = np.asarray(x)
+    if args.process == 'log':
+        x = np.log(x + 1)
+
+    # transpose and add names for rows and cols
+    features = np.transpose(x)
+
+    chung = pd.read_csv(args.filename, header=0,
+                        index_col=0, sep=',')
+    X = features
+    genes = []
+    for gen in chung.index.values:
+        if '.' in gen:
+            genes.append(gen.upper().split('.')[0])
+        else:
+            genes.append(gen.upper())
+    #print(genes)
+    operator.genes = np.asarray(genes)
+    operator.X = X
+    #operator.load_10X(direc=args.tenXdir,genome='mm10')
+    operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity)
+    dictW = operator.fit_transform()
+    W, H = dictW['W'], dictW['H']
+    # k,clusters = plot.select_clusters(H,max_clusters=20)
+    # plot.tSNE(H,clusters,fname=args.direc+ '/netNMFsc_tsne_imputation_' +args.process +'_'+args.Randomdata)
+    # os.system('mkdir -p %s'%(args.direc))
+    np.save(os.path.join(args.direc,args.Randomdata+'_'+args.process+'_imputation.npy'),np.dot(W,H))
+    #np.save(os.path.join(args.direc,'H.npy'),H)
+    #np.save(os.path.join(args.direc, 'cluster.npy'), H)
+    return
+#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD')
+    parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx')
+    parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv')
+    parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='')
+    parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='')
+    parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human')
+    parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble')
+    parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez')
+    parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0)
+    parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99)
+    parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500)
+    parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2)
+    parser.add_argument("-d","--direc", help="directory to save files",default='')
+    parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10)
+    parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0)
+    parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '')
+    parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1', help='npyImputeG2E_1,2,3')
+    parser.add_argument('--datasetName', type=str, default='12.Klein', help='12.Klein,13.Zeisel')
+    parser.add_argument('--process', type=str, default='null', help='log/null to process data')
+    parser.add_argument("-Hasdot","--Hasdot",type = bool, help="data gene names has dot",default = True)
+    parser.add_argument('--dropratio', type=str, default='0.1', help='0.1，0.3，0.6，0.8')
+    args = parser.parse_args()
+    main(args)
+
+
+#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv'
diff --git a/codesfromJGandYJ/impute/SAUCIE_impute.py b/codesfromJGandYJ/impute/SAUCIE_impute.py
new file mode 100644
index 0000000..a4b0b14
--- /dev/null
+++ b/codesfromJGandYJ/impute/SAUCIE_impute.py
@@ -0,0 +1,74 @@
+import sys
+sys.path.append("/storage/htc/joshilab/wangjue/")
+import SAUCIE
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import argparse
+
+# modified from official tutorial: https://colab.research.google.com/github/KrishnaswamyLab/SingleCellWorkshop/blob/master/exercises/Deep_Learning/notebooks/02_Answers_Exploratory_analysis_of_single_cell_data_with_SAUCIE.ipynb
+# Notes: Have to use very old tensorflow downloaded from conda:
+# python==3.6.12
+# tensorflow==1.4.0
+# numpy==1.19.4
+
+parser = argparse.ArgumentParser(description='Impute use SAUCIE')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+def impute_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    x=np.transpose(x)
+    loader_train = SAUCIE.Loader(x, shuffle=True)
+    loader_eval = SAUCIE.Loader(x, shuffle=False)
+    # clear the computational graph
+    tf.reset_default_graph()
+    # build the SAUCIE model
+    model = SAUCIE.SAUCIE(x.shape[1])
+    # train the model!
+    model.train(loader_train, steps=2000)
+    #imputation
+    reconstruction = model.get_reconstruction(loader_eval)
+    reconstruction=np.transpose(reconstruction)
+    np.save('/storage/htc/joshilab/wangjue/scGNN/saucie/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),reconstruction)
+
+def plot_saucie(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    loader_eval = SAUCIE.Loader(x, shuffle=False)
+    # clear the computational graph
+    #plot
+    tf.reset_default_graph()
+    model = SAUCIE.SAUCIE(x.shape[1])
+    model.train(loader_eval, steps=2000)
+    embedding = model.get_embedding(loader_eval)
+    num_clusters, clusters = model.get_clusters(loader_eval)
+    fig = plt.figure()
+    ax = fig.add_subplot(1, 1, 1)
+    ax.scatter(embedding[:, 0], embedding[:, 1], c=clusters)
+    fig.savefig('saucie_'+datasetName+'.png')
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_saucie(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_saucie(seed=seed, datasetName=datasetName, ratio=ratio)
+
+# plot_saucie(seed='1', datasetName=datasetName, ratio='0.0')
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/SAVER_impute.py b/codesfromJGandYJ/impute/SAVER_impute.py
new file mode 100644
index 0000000..f0f7381
--- /dev/null
+++ b/codesfromJGandYJ/impute/SAVER_impute.py
@@ -0,0 +1,56 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import csv
+import argparse
+import sys
+
+# Ref:
+# https://github.com/mohuangx/SAVER
+# https://mohuangx.github.io/SAVER/articles/saver-tutorial.html
+# Use python to generate input for saver.r, then output
+
+parser = argparse.ArgumentParser(description='Impute SAVER')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_saver(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+    features=x.T
+
+    #write
+    dropout_filename = save_path+"saver_input.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    #run the R script
+    os.system("Rscript saver.r "+save_path+"saver_input.csv "+save_path+"saver_output.csv ")
+
+    filename=save_path+"saver_output.csv"
+    imputed_values = pd.read_csv(filename,sep="\t",header=None)
+    imputed_values=imputed_values.T
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/saver/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_saver(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_saver(seed=seed, datasetName=datasetName, ratio=ratio)
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/SCIMPUTE_impute.py b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
new file mode 100644
index 0000000..879a6ca
--- /dev/null
+++ b/codesfromJGandYJ/impute/SCIMPUTE_impute.py
@@ -0,0 +1,66 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import csv
+import argparse
+import sys
+
+# Notes in install scimpute:
+# Have to add in R: 
+# Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS=TRUE)
+# Ref: https://github.com/Vivianstats/scImpute
+
+parser = argparse.ArgumentParser(description='Impute scImpute')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_scimpute(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+
+    features = np.copy(x)
+
+    #transpose and add names for rows and cols
+    features=np.transpose(features)
+    rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
+    features=np.concatenate([rowname,features],axis=1)
+    colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
+    features=np.concatenate([colname,features],axis=0)
+
+    features=features.T
+
+    #write
+    dropout_filename = save_path+"scimpute_input.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    #run the R script
+    os.system("Rscript scimpute.r "+save_path+"scimpute_input.csv "+save_path+"tmpscimpute/")
+
+    filename=save_path+"tmpscimpute/scimpute_count.csv"
+    imputed_values = pd.read_csv(filename,sep=",",index_col=0)
+    imputed_values = imputed_values.to_numpy()
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_scimpute(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_scimpute(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/dca_impute.py b/codesfromJGandYJ/impute/dca_impute.py
new file mode 100644
index 0000000..6b7b2a2
--- /dev/null
+++ b/codesfromJGandYJ/impute/dca_impute.py
@@ -0,0 +1,53 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import csv
+import argparse
+import sys
+
+# Ref: https://github.com/theislab/dca
+# Notes: As tensorflow comes to 2.0 version, lots of things chagned, here is the version tested in Nov.26, 2020
+# python==3.7.9
+# tensorflow==1.15.4
+# keras==2.3.1
+# theano==1.0.5
+# scanpy==1.5.1
+
+parser = argparse.ArgumentParser(description='Imputation DCA')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_dca(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=x.astype(int)
+    features=x.T
+    #write
+    dropout_filename = save_path+"dca_input.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+    os.system("dca "+dropout_filename+ " "+save_path+"tmpdca")
+    filename=save_path+"tmpdca/mean.tsv"
+    imputed_values = pd.read_csv(filename,sep="\t")
+    imputed_values=imputed_values.T
+    np.save('/storage/htc/joshilab/wangjue/scGNN/dca/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_dca(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_dca(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/deepimpute_impute.py b/codesfromJGandYJ/impute/deepimpute_impute.py
new file mode 100644
index 0000000..9943321
--- /dev/null
+++ b/codesfromJGandYJ/impute/deepimpute_impute.py
@@ -0,0 +1,52 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from deepimpute.multinet import MultiNet
+import torch
+import csv
+import argparse
+import sys
+
+parser = argparse.ArgumentParser(description='Impute Deepimpute')
+# In this script, not using arguments
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+# Ref:
+# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_deepimpute(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    # x=np.log(x+1)
+
+    features=x
+    dropout_filename = save_path+"deepimpute.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    data = pd.read_csv(dropout_filename, header=None)
+    model = MultiNet()
+    model.fit(data)
+    imputed = model.predict(data)
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/deepimpute/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_deepimpute(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_deepimpute(seed=seed, datasetName=datasetName, ratio=ratio)
diff --git a/codesfromJGandYJ/impute/other_dca.sh b/codesfromJGandYJ/impute/other_dca.sh
new file mode 100644
index 0000000..f41c874
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_dca.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J DCA
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+
+module load miniconda3
+source activate /storage/htc/joshilab/wangjue/conda_R_dca
+# grid
+# python3 -W ignore dca_impute.py
+python3 -W ignore dca_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_deepimpute.sh b/codesfromJGandYJ/impute/other_deepimpute.sh
new file mode 100644
index 0000000..23d18c9
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_deepimpute.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J deepimpute
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+# python3 -W ignore deepimpute_impute.py
+python3 -W ignore deepimpute_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_magic.sh b/codesfromJGandYJ/impute/other_magic.sh
new file mode 100644
index 0000000..6d85905
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_magic.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Magic
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+# python3 -W ignore MAGIC_impute.py
+python3 -W ignore MAGIC_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_saucie.sh b/codesfromJGandYJ/impute/other_saucie.sh
new file mode 100644
index 0000000..31c8ce1
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_saucie.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J saucie
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate /storage/htc/joshilab/wangjue/conda_R_saucie
+# python3 -W ignore SAUCIE_impute.py
+python3 -W ignore SAUCIE_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/other_saver.sh b/codesfromJGandYJ/impute/other_saver.sh
new file mode 100644
index 0000000..2a29663
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_saver.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Saver
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 12                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+# python3 -W ignore SAVER_impute.py
+python3 -W ignore SAVER_impute.py --origin
diff --git a/codesfromJGandYJ/impute/other_scimpute.sh b/codesfromJGandYJ/impute/other_scimpute.sh
new file mode 100644
index 0000000..5da0040
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_scimpute.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J scimpute
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 12                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+# python3 -W ignore SCIMPUTE_impute.py
+python3 -W ignore SCIMPUTE_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/other_scvi.sh b/codesfromJGandYJ/impute/other_scvi.sh
new file mode 100644
index 0000000..7b258fa
--- /dev/null
+++ b/codesfromJGandYJ/impute/other_scvi.sh
@@ -0,0 +1,15 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J scvi
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+# python3 -W ignore scVi_impute.py
+python3 -W ignore scVi_impute.py --origin
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/run_scIGANS_imputation.py b/codesfromJGandYJ/impute/run_scIGANS_imputation.py
new file mode 100644
index 0000000..20faf7d
--- /dev/null
+++ b/codesfromJGandYJ/impute/run_scIGANS_imputation.py
@@ -0,0 +1,51 @@
+# This code has not cleaned yet
+import sys,os
+import numpy as np
+import pandas as pd
+import argparse
+sys.path.append('../')
+sys.path.append('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/scIGANs/')
+
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--Randomdata', type=str, default='npyImputeG2E_1',help='npyImputeG2E_1,2,3')
+parser.add_argument('--datasetName', type=str, default='12.Klein',help='12.Klein,13.Zeisel')
+parser.add_argument('--process', type=str, default='null',help='log/null to process data')
+parser.add_argument('--exec', type=str, default='scIGANs',help='12.Klein')
+parser.add_argument('--dropratio', type=str, default='0.1',help='0.1，0.3，0.6，0.8')
+parser.add_argument('--csvsavepath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein')
+parser.add_argument('--labelpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/Data/',help='12.Klein')
+parser.add_argument('--outpath', type=str, default='/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200/',help='12.Klein')
+parser.add_argument('--Epotch', type=str, default='200',help='epotch')
+args = parser.parse_args()
+
+# x = np.concatenate([np.random.uniform(-3, -2, (1000, 40)), np.random.uniform(2, 3, (1000, 40))], axis=0)
+
+filename = '/storage/hpc/group/joshilab/scGNNdata/{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(args.Randomdata,args.datasetName,args.dropratio)
+x = np.load(filename,allow_pickle=True)
+x = x.tolist()
+x=x.todense()
+x=np.asarray(x)
+if args.process=='log':
+    x=np.log(x+1)
+    saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features_log.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata,
+                                                                                args.datasetName,args.dropratio)
+elif args.process=='null':
+    saveintedir = '{}{}/{}_{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.csv'.format(args.csvsavepath, args.datasetName,args.Randomdata,
+                                                                                args.datasetName,args.dropratio)
+#transpose and add names for rows and cols
+features=np.transpose(x)
+
+pd.DataFrame(features).to_csv(saveintedir,sep='\t')
+
+label = '{}{}/{}_only_label.csv'.format(args.labelpath,args.datasetName,args.datasetName.split('.')[-1])
+#/storage/htc/joshilab/jghhd/singlecellTest/Data/12.Klein/Klein_only_label.csv
+
+cmd = '{} {} -l {} -e {} -o {}{}'.format(args.exec,saveintedir,label,args.Epotch,args.outpath,args.datasetName)
+print(cmd)
+os.system(cmd)
+#scIGANs saveintedir -l  -e 50
+
+# l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error(recon, featuresOriginal, None, dropi, dropj, dropix)
+# print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+
+#np.save('/storage/hpc/scratch/yjiang/SCwangjuexin/scGNN-master_021720/saucie_t/{}/{}_{}_recon.npy'.format(args.data,datasetNameStr,args.ratio),reconstruction)
diff --git a/codesfromJGandYJ/impute/saver.r b/codesfromJGandYJ/impute/saver.r
new file mode 100644
index 0000000..1b0953b
--- /dev/null
+++ b/codesfromJGandYJ/impute/saver.r
@@ -0,0 +1,16 @@
+# Usage:
+# Rscript saver.r input.txt output.txt
+# test if there is one argument: if not, return an error
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file)\n", call.=FALSE)
+} 
+
+library(SAVER)
+inputfile = args[1]
+outputfile = args[2]
+raw.data <- read.csv(inputfile, header = FALSE, sep=',')
+expr <- as.matrix(raw.data)
+# Use 12 cores in saver
+expr.saver <- saver(expr, ncores = 12, estimates.only = TRUE)
+write.table(expr.saver, file=outputfile, row.names = F, col.names = F, sep = "\t")
\ No newline at end of file
diff --git a/codesfromJGandYJ/impute/scVi_impute.py b/codesfromJGandYJ/impute/scVi_impute.py
new file mode 100644
index 0000000..643204b
--- /dev/null
+++ b/codesfromJGandYJ/impute/scVi_impute.py
@@ -0,0 +1,92 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scvi.dataset import CortexDataset, RetinaDataset, CsvDataset
+from scvi.models import VAE
+from scvi.inference import UnsupervisedTrainer
+import torch
+import csv
+import argparse
+import sys
+
+# pip install scvi==0.6.3
+parser = argparse.ArgumentParser(description='scVi imputation')
+parser.add_argument('--origin', action='store_true', default=False, help='Whether use origin (default: use ratio 0.0)')
+args = parser.parse_args()
+
+# Ref:
+# https://nbviewer.jupyter.org/github/YosefLab/scVI/blob/master/tests/notebooks/data_loading.ipynb
+
+
+save_path = '/storage/htc/joshilab/wangjue/scGNN/tmp/'
+
+def impute_scvi(seed=1, datasetName='9.Chung', ratio=0.1):
+    filename = '/storage/htc/joshilab/wangjue/scGNN/npyImputeG2E_{}/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(seed, datasetName, ratio)
+
+    x = np.load(filename,allow_pickle=True)
+    x = x.tolist()
+    x=x.todense()
+    x=np.asarray(x)
+    x=np.log(x+1)
+
+    features = np.copy(x)
+
+    #transpose and add names for rows and cols
+    features=np.transpose(features)
+    rowname=np.linspace(1,features.shape[0],features.shape[0]).reshape([features.shape[0],1])
+    features=np.concatenate([rowname,features],axis=1)
+    colname=np.linspace(1,features.shape[1],features.shape[1]).reshape([1,features.shape[1]])
+    features=np.concatenate([colname,features],axis=0)
+
+    #write
+    dropout_filename = save_path+"scvi.csv"
+    with open(dropout_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(features)
+
+    # gene_dataset = CortexDataset(save_path=save_path, total_genes=558)
+    gene_dataset = CsvDataset(dropout_filename, save_path=save_path)
+
+    n_epochs = 400 
+    lr = 1e-3
+    use_batches = False
+    use_cuda = False 
+
+    vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches * use_batches)
+    trainer = UnsupervisedTrainer(
+        vae,
+        gene_dataset,
+        train_size=0.75,
+        use_cuda=use_cuda,
+        frequency=5,
+    )
+
+    trainer.train(n_epochs=n_epochs, lr=lr)
+
+    full = trainer.create_posterior(trainer.model, gene_dataset, indices=np.arange(len(gene_dataset)))
+    latent, batch_indices, labels = full.sequential().get_latent()
+    batch_indices = batch_indices.ravel()
+
+    # use imputation
+    imputed_values = full.sequential().imputation()
+    normalized_values = full.sequential().get_sample_scale()
+
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon.npy'.format(datasetName,ratio,seed),imputed_values)
+    np.save('/storage/htc/joshilab/wangjue/scGNN/scvi/{}_{}_{}_recon_normalized.npy'.format(datasetName,ratio,seed),normalized_values)
+
+datasetNameList = ['9.Chung','11.Kolodziejczyk','12.Klein','13.Zeisel']
+seedList = ['1','2','3']
+ratioList = [0.1, 0.3, 0.6, 0.8]
+
+if args.origin:
+    for datasetName in datasetNameList:
+        impute_scvi(seed='1', datasetName=datasetName, ratio='0.0')
+else:
+    for datasetName in datasetNameList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                impute_scvi(seed=seed, datasetName=datasetName, ratio=ratio)
+
+# celltype:
+#np.save(save_path+'{}_{}_z.npy'.format(datasetNameStr,args.ratio),latent)
diff --git a/codesfromJGandYJ/impute/scimpute.r b/codesfromJGandYJ/impute/scimpute.r
new file mode 100644
index 0000000..ec91006
--- /dev/null
+++ b/codesfromJGandYJ/impute/scimpute.r
@@ -0,0 +1,20 @@
+# Usage:
+# Rscript scImpute.r input.txt outputdir
+# test if there is one argument: if not, return an error
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least one argument must be supplied (input file)\n", call.=FALSE)
+} 
+
+library(scImpute)
+inputfile = args[1]
+outputDir = args[2]
+scimpute(# full path to raw count matrix
+         count_path = inputfile, 
+         infile = "csv",           # format of input file
+         outfile = "csv",          # format of output file
+         out_dir = outputDir,           # full path to output directory
+         labeled = FALSE,          # cell type labels not available
+         drop_thre = 0.5,          # threshold set on dropout probability
+         Kcluster = 2,             # 2 cell subpopulations
+         ncores = 12)              # number of cores used in parallel computation
diff --git a/do_timer_test.sh b/do_timer_test.sh
new file mode 100644
index 0000000..43c06e8
--- /dev/null
+++ b/do_timer_test.sh
@@ -0,0 +1,9 @@
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >9gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >11gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >12gpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_gpu/ --debuginfo >13gpu.txt
+
+python3 -W ignore main_benchmark_timer.py --datasetName 9.Chung --benchmark /home/wangjue/myprojects/scGNN/data/scData/9.Chung/Chung_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --no-cuda --debuginfo >9cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 11.Kolodziejczyk --benchmark /home/wangjue/myprojects/scGNN/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >11cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 12.Klein --benchmark /home/wangjue/myprojects/scGNN/data/scData/12.Klein/Klein_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >12cpu.txt
+python3 -W ignore main_benchmark_timer.py --datasetName 13.Zeisel --benchmark /home/wangjue/myprojects/scGNN/data/scData/13.Zeisel/Zeisel_cell_label.csv --LTMGDir /home/wangjue/myprojects/scGNN/data/scData/ --regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --npyDir outputDir_cpu/ --debuginfo --no-cuda >13cpu.txt
diff --git a/generating_Impute_0.0.py b/generating_Impute_0.0.py
new file mode 100644
index 0000000..dd50d28
--- /dev/null
+++ b/generating_Impute_0.0.py
@@ -0,0 +1,78 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running imputation of original scGNN ')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+parser.add_argument('--imputeMode', action='store_true', default=True,
+                    help='whether impute')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'),
+]
+
+dropoutList = ['0.0',]
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,scGNNparam,outDirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    if args.imputeMode:
+        tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute')
+        tmpstr2 = "I"+tmpstr2
+        # tmpstr2 = "I"+tmpstr2[2:]
+        imputeStr = ' --imputeMode  '
+        outDirStr = "npyImpute"+outDirStr[3:]
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2 
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()  
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
diff --git a/generating_Impute_0.1-0.8-ablation.py b/generating_Impute_0.1-0.8-ablation.py
new file mode 100644
index 0000000..ce1d245
--- /dev/null
+++ b/generating_Impute_0.1-0.8-ablation.py
@@ -0,0 +1,94 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+parser.add_argument('--imputeMode', action='store_true', default=True,
+                    help='whether impute')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p BioCompute,Lewis               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('run_experiment_2_g_e_L_1 2geL1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 1 --npyDir','npyG2EL_1/'),
+    ('run_experiment_1_g_e_1 1ge1','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG1E_1/'),
+    ('run_experiment_2_g_f_1 2gf1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 1 --npyDir','npyG2F_1/'),
+    ('run_experiment_2_n_e_1 2ne1','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyN2E_1/'),
+    ('run_experiment_2_g_e_1 2ge1','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 1 --npyDir','npyG2E_1/'),
+
+    ('run_experiment_2_g_e_L_2 2geL2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 2 --npyDir','npyG2EL_2/'),
+    ('run_experiment_1_g_e_2 1ge2','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG1E_2/'),
+    ('run_experiment_2_g_f_2 2gf2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 2 --npyDir','npyG2F_2/'),
+    ('run_experiment_2_n_e_2 2ne2','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyN2E_2/'),
+    ('run_experiment_2_g_e_2 2ge2','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 2 --npyDir','npyG2E_2/'),
+
+    ('run_experiment_2_g_e_L_3 2geL3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --L1Para 0.0 --seed 3 --npyDir','npyG2EL_3/'),
+    ('run_experiment_1_g_e_3 1ge3','--regulized-type LTMG --EMtype EM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG1E_3/'),
+    ('run_experiment_2_g_f_3 2gf3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --seed 3 --npyDir','npyG2F_3/'),
+    ('run_experiment_2_n_e_3 2ne3','--regulized-type noregu --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyN2E_3/'),
+    ('run_experiment_2_g_e_3 2ge3','--regulized-type LTMG --EMtype celltypeEM --clustering-method LouvainK --useGAEembedding --seed 3 --npyDir','npyG2E_3/'),
+]
+
+dropoutList = ['0.1','0.3','0.6','0.8']
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,scGNNparam,outDirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    if args.imputeMode:
+        tmpstr1 = tmpstr1.replace('run_experiment','run_experimentImpute')
+        tmpstr2 = "I"+tmpstr2
+        # tmpstr2 = "I"+tmpstr2[2:]
+        imputeStr = ' --imputeMode  '
+        outDirStr = "npyImpute"+outDirStr[3:]
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2 
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 9.Chung --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_9_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 11.Kolodziejczyk --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_11_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()  
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 12.Klein --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_12_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
+
+    for dropoutPara in dropoutList:
+        commandLine = "python3 -W ignore main_benchmark.py --datasetName 13.Zeisel --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv "+scGNNparam+" "+outDirStr+imputeStr+" --dropoutRatio "+dropoutPara+"\n"
+        outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+        with open(outputFilename+"_13_"+dropoutPara+".sh",'w') as fw:
+            fw.write(outStr)
+            fw.close()
diff --git a/generating_distribution.py b/generating_distribution.py
new file mode 100644
index 0000000..a69efbb
--- /dev/null
+++ b/generating_distribution.py
@@ -0,0 +1,103 @@
+import argparse
+
+# python generatingMethodsBatchshell_louvain.py
+# python generatingMethodsBatchshell_louvain.py --imputeMode
+parser = argparse.ArgumentParser(description='Generating sbatch files for HPC cluster running')
+parser.add_argument('--outputDir', type=str, default='',
+                    help='Directory of batch files for cluster running')
+args = parser.parse_args()
+
+templateStr1 = "#! /bin/bash\n"\
+"######################### Batch Headers #########################\n"\
+"#SBATCH -A xulab\n"\
+"#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute\n"\
+"#SBATCH -J "
+
+templateStr2 = "\n#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+"#SBATCH -t 2-00:00                  # two days time limit\n"\
+"#SBATCH -N 1                        # number of nodes\n"\
+"#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+"#SBATCH --mem=128G\n"\
+"#################################################################\n"\
+"module load miniconda3\n"\
+"source activate conda_R\n"
+
+#tuple list
+#batchInfo,scGNNparam,outDir
+#huge matrix
+methodsList = [
+    ('plot_G2E_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2E_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+
+    ('plot_G2EL_0.1 G2E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.3 G2E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.6 G2E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+    ('plot_G2EL_0.8 G2E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2E'),
+
+    ('plot_G1E_0.1 G1E1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.3 G1E3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.6 G1E6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+    ('plot_G1E_0.8 G1E8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG1E'),
+
+    ('plot_G2F_0.1 G2F1','LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.3 G2F3','LTMG_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.6 G2F6','LTMG_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+    ('plot_G2F_0.8 G2F8','LTMG_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeG2F'),
+
+    ('plot_N2E_0.1 N2E1','noregu_0.1_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.3 N2E3','noregu_0.3_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.6 N2E6','noregu_0.6_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+    ('plot_N2E_0.8 N2E8','noregu_0.8_10-0.1-0.9-0.0-0.3-0.1','npyImputeN2E'),
+
+]
+
+seedList = ['_1/','_2/','_3/']
+
+# generate sbatch files:
+for item in methodsList:
+    batchInfo,param,dirStr = item
+    tmp = batchInfo.split()
+    tmpstr1=tmp[0]
+    tmpstr2=tmp[1]
+    imputeStr = ''
+    outputFilename = args.outputDir + tmpstr1
+    abbrStr = tmpstr2
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 9.Chung --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 9.Chung "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_9.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 11.Kolodziejczyk --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 11.Kolodziejczyk "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_11.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()   
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 12.Klein --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 12.Klein "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_12.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
+    commandLine = ''
+    for seed in seedList:
+        commandLine += "python3 -W ignore plot_distribution.py --datasetName 13.Zeisel --para "+param+" --inDir "+dirStr+seed+" --outDir "+dirStr+seed+"\n"
+        commandLine += "Rscript plot_distribution.r 13.Zeisel "+param+" "+dirStr+seed+" "+dirStr+seed+"\n"
+    outStr = templateStr1 + abbrStr + templateStr2 + commandLine + "\n"
+    with open(outputFilename+"_13.sh",'w') as fw:
+        fw.write(outStr)
+        fw.close()
+
diff --git a/graph_function.py b/graph_function.py
index f1c65b2..9e30b9a 100644
--- a/graph_function.py
+++ b/graph_function.py
@@ -71,6 +71,12 @@ def generateAdj(featureMatrix, graphType='KNNgraph', para = None, parallelLimit
             distanceType = parawords[0]
             k = int(parawords[1])
         edgeList = calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType=distanceType, k=k)
+    elif graphType == 'KNNgraphStatsSingleThreadNoPrune':
+        if para != None:
+            parawords = para.split(':')
+            distanceType = parawords[0]
+            k = int(parawords[1])
+        edgeList =  calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType=distanceType, k=k)      
     else:
         print('Should give graphtype')
 
@@ -330,6 +336,25 @@ def calculateKNNgraphDistanceMatrixStatsSingleThread(featureMatrix, distanceType
     
     return edgeList
 
+#para: measuareName:k:threshold no prune only
+def calculateKNNgraphDistanceMatrixStatsSingleThreadNoPrune(featureMatrix, distanceType='euclidean', k=10, param=None):
+    r"""
+    Thresholdgraph: KNN Graph with stats one-std based methods, SingleThread version, no boundary,
+    """       
+
+    edgeList=[]
+    for i in np.arange(featureMatrix.shape[0]):
+        tmp=featureMatrix[i,:].reshape(1,-1)
+        distMat = distance.cdist(tmp,featureMatrix, distanceType)
+        res = distMat.argsort()[:k+1]
+        for j in np.arange(1,k+1):
+            # TODO: check, only exclude large outliners
+            # if (distMat[0,res[0][j]]<=mean+std) and (distMat[0,res[0][j]]>=mean-std):
+            weight = 1.0
+            edgeList.append((i,res[0][j],weight))
+    
+    return edgeList
+
 # kernelDistance
 def kernelDistance(distance,delta=1.0):
     '''
diff --git a/main_benchmark.py b/main_benchmark.py
index 101de9f..0a295fc 100644
--- a/main_benchmark.py
+++ b/main_benchmark.py
@@ -23,8 +23,7 @@
 # Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script.
 parser = argparse.ArgumentParser(description='Graph EM AutoEncoder for scRNA')
 parser.add_argument('--datasetName', type=str, default='1.Biase',
-                    help='TGFb/sci-CAR/sci-CAR_LTMG/MMPbasal/MMPbasal_all/MMPbasal_allgene/MMPbasal_allcell/MMPepo/MMPbasal_LTMG/MMPbasal_all_LTMG/MMPbasal_2000')
-# Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
 parser.add_argument('--batch-size', type=int, default=12800, metavar='N',
                     help='input batch size for training (default: 12800)')
 parser.add_argument('--epochs', type=int, default=500, metavar='N',
@@ -37,7 +36,7 @@
                     help='EM process type (default: celltypeEM) or EM')
 parser.add_argument('--alpha', type=float, default=0.5,
                     help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1')
-parser.add_argument('--converge-type', type=str, default='either',
+parser.add_argument('--converge-type', type=str, default='celltype',
                     help='type of converge: celltype/graph/both/either (default: celltype) ')
 parser.add_argument('--converge-graphratio', type=float, default=0.01,
                     help='ratio of cell type change in EM iteration (default: 0.01), 0-1')
@@ -588,22 +587,34 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
         # graph criteria
         if args.converge_type == 'graph':       
             if graphChange < graphChangeThreshold:
-                print('Converge now!')
+                print('Graph Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # celltype criteria
         elif args.converge_type == 'celltype':            
             if ari>args.converge_celltyperatio:
-                print('Converge now!')
+                print('Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # if both criteria are meets
         elif args.converge_type == 'both': 
             if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio:
-                print('Converge now!')
+                print('Graph and Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
         # if either criteria are meets
         elif args.converge_type == 'either': 
             if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio:
-                print('Converge now!')
+                print('Graph or Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
                 break
 
         # Update
diff --git a/main_benchmark_timer.py b/main_benchmark_timer.py
new file mode 100644
index 0000000..77d1a9d
--- /dev/null
+++ b/main_benchmark_timer.py
@@ -0,0 +1,745 @@
+import time
+import resource
+import datetime
+import argparse
+import sys
+import numpy as np
+import pickle as pkl
+import networkx as nx
+import scipy.sparse as sp
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn, optim
+from torch.nn import functional as F
+from sklearn.decomposition import PCA
+from sklearn.metrics import silhouette_samples, silhouette_score
+from sklearn.cluster import KMeans,SpectralClustering,AffinityPropagation,AgglomerativeClustering,Birch,DBSCAN,FeatureAgglomeration,MeanShift,OPTICS 
+from model import AE, VAE, VAE2d
+from util_function import *
+from graph_function import *
+from benchmark_util import *
+from gae_embedding import GAEembedding,measure_clustering_results,test_clustering_benchmark_results
+# from LTMG_R import *
+import pandas as pd
+
+# Benchmark for both celltype identification and imputation, needs Preprocessing_main.py first, then proceed by this script.
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+parser.add_argument('--datasetName', type=str, default='1.Biase',
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
+parser.add_argument('--batch-size', type=int, default=12800, metavar='N',
+                    help='input batch size for training (default: 12800)')
+parser.add_argument('--epochs', type=int, default=500, metavar='N',
+                    help='number of epochs to train in Regulatory Autoencoder (default: 500)')
+parser.add_argument('--EM-epochs', type=int, default=200, metavar='N',
+                    help='number of epochs to train in iteration EM (default: 200)')
+parser.add_argument('--EM-iteration', type=int, default=10, metavar='N',
+                    help='number of epochs in EM iteration (default: 10)')
+parser.add_argument('--EMtype', type=str, default='EM',
+                    help='EM process type (default: celltypeEM) or EM')
+parser.add_argument('--alpha', type=float, default=0.5,
+                    help='iteration alpha (default: 0.5) to control the converge rate, should be a number between 0~1')
+parser.add_argument('--converge-type', type=str, default='celltype',
+                    help='type of converge: celltype/graph/both/either (default: celltype) ')
+parser.add_argument('--converge-graphratio', type=float, default=0.01,
+                    help='ratio of cell type change in EM iteration (default: 0.01), 0-1')
+parser.add_argument('--converge-celltyperatio', type=float, default=0.95,
+                    help='ratio of cell type change in EM iteration (default: 0.99), 0-1')
+parser.add_argument('--cluster-epochs', type=int, default=200, metavar='N',
+                    help='number of epochs in cluster autoencoder training (default: 200)')
+parser.add_argument('--no-cuda', action='store_true', default=False,
+                    help='enables CUDA training')
+parser.add_argument('--seed', type=int, default=1, metavar='S',
+                    help='random seed (default: 1)')
+parser.add_argument('--regulized-type', type=str, default='LTMG',
+                    help='regulized type (default: LTMG) in EM, otherwise: noregu/LTMG/LTMG01')
+parser.add_argument('--reduction', type=str, default='sum',
+                    help='reduction type: mean/sum, default(sum)')
+parser.add_argument('--model', type=str, default='AE',
+                    help='VAE/AE (default: AE)')
+parser.add_argument('--gammaPara', type=float, default=0.1,
+                    help='regulized parameter (default: 0.1)')
+parser.add_argument('--alphaRegularizePara', type=float, default=0.9,
+                    help='regulized parameter (default: 0.9)')
+
+# imputation related
+parser.add_argument('--EMregulized-type', type=str, default='Celltype',
+                    help='regulized type (default: noregu) in EM, otherwise: noregu/Graph/GraphR/Celltype/CelltypeR')
+# parser.add_argument('--adjtype', type=str, default='unweighted',
+#                     help='adjtype (default: weighted) otherwise: unweighted') 
+# parser.add_argument('--aePara', type=str, default='start', 
+#                     help='whether use parameter of first feature autoencoder: start/end/cont') 
+parser.add_argument('--gammaImputePara', type=float, default=0.0,
+                    help='regulized parameter (default: 0.0)')
+parser.add_argument('--graphImputePara', type=float, default=0.3,
+                    help='graph parameter (default: 0.3)')
+parser.add_argument('--celltypeImputePara', type=float, default=0.1,
+                    help='celltype parameter (default: 0.1)')
+parser.add_argument('--L1Para', type=float, default=1.0,
+                    help='L1 regulized parameter (default: 0.001)')
+parser.add_argument('--L2Para', type=float, default=0.0,
+                    help='L2 regulized parameter (default: 0.001)')
+parser.add_argument('--EMreguTag', action='store_true', default=False,
+                    help='whether regu in EM process')
+parser.add_argument('--discreteTag', action='store_true', default=False, 
+                    help='whether input is raw or 0/1 (default: False)')
+#Build cell graph
+parser.add_argument('--k', type=int, default=10,
+                    help='parameter k in KNN graph (default: 10)')
+parser.add_argument('--knn-distance', type=str, default='euclidean',
+                    help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)')
+parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThread',
+                    help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)')
+parser.add_argument('--zerofillFlag', action='store_true', default=False, 
+                    help='fill zero or not before EM process (default: False)')
+
+#Debug related
+parser.add_argument('--precisionModel', type=str, default='Float', 
+                    help='Single Precision/Double precision: Float/Double (default:Float)')
+parser.add_argument('--coresUsage', type=str, default='1', 
+                    help='how many cores used: all/1/... (default:1)')
+parser.add_argument('--npyDir', type=str, default='npyGraphTest/',
+                    help='save npy results in directory')
+parser.add_argument('--log-interval', type=int, default=100, metavar='N',
+                    help='how many batches to wait before logging training status')
+parser.add_argument('--saveinternal', action='store_true', default=False, 
+                    help='whether save internal interation results or not')
+parser.add_argument('--debuginfo', action='store_true', default=False, 
+                    help='whether output debuginfo in cpu time and memory info')
+
+#LTMG related
+parser.add_argument('--inferLTMGTag', action='store_true', default=False,
+                    help='Whether infer LTMG')                   
+parser.add_argument('--LTMGDir', type=str, default='/home/jwang/data/scData/',
+                    help='directory of LTMGDir, default:(/home/wangjue/biodata/scData/allBench/)')
+parser.add_argument('--expressionFile', type=str, default='Biase_expression.csv',
+                    help='expression File in csv')
+parser.add_argument('--ltmgFile', type=str, default='ltmg.csv',
+                    help='expression File in csv')
+
+#Clustering related
+parser.add_argument('--useGAEembedding', action='store_true', default=False, 
+                    help='whether use GAE embedding for clustering(default: False)')
+parser.add_argument('--useBothembedding', action='store_true', default=False, 
+                    help='whether use both embedding and Graph embedding for clustering(default: False)')
+parser.add_argument('--n-clusters', default=20, type=int, help='number of clusters if predifined for KMeans/Birch ')
+parser.add_argument('--clustering-method', type=str, default='LouvainK',
+                    help='Clustering method: Louvain/KMeans/SpectralClustering/AffinityPropagation/AgglomerativeClustering/AgglomerativeClusteringK/Birch/BirchN/MeanShift/OPTICS/LouvainK/LouvainB')
+parser.add_argument('--maxClusterNumber', type=int, default=30,
+                    help='max cluster for celltypeEM without setting number of clusters (default: 30)') 
+parser.add_argument('--minMemberinCluster', type=int, default=5,
+                    help='max cluster for celltypeEM without setting number of clusters (default: 100)')
+parser.add_argument('--resolution', type=str, default='auto',
+                    help='the number of resolution on Louvain (default: auto/0.5/0.8)')
+
+
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+
+#Aggrelated
+parser.add_argument('--linkage', type=str, default='ward',
+                    help='linkage should be: ward, average, complete, single')
+
+#GAE related
+parser.add_argument('--GAEmodel', type=str, default='gcn_vae', help="models used")
+parser.add_argument('--GAEepochs', type=int, default=200, help='Number of epochs to train.')
+parser.add_argument('--GAEhidden1', type=int, default=32, help='Number of units in hidden layer 1.')
+parser.add_argument('--GAEhidden2', type=int, default=16, help='Number of units in hidden layer 2.')
+parser.add_argument('--GAElr', type=float, default=0.01, help='Initial learning rate.')
+parser.add_argument('--GAEdropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
+parser.add_argument('--GAElr_dw', type=float, default=0.001, help='Initial learning rate for regularization.')
+
+#Start Impute or not, only used for evaluating Impute
+parser.add_argument('--imputeMode', default=False, action='store_true',
+                    help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true')
+parser.add_argument('--dropoutRatio', type=float, default=0.1,
+                    help='dropout ratio for impute (default: 0.1)')
+
+args = parser.parse_args()
+args.cuda = not args.no_cuda and torch.cuda.is_available()
+
+#TODO
+#As we have lots of parameters, should check args
+checkargs(args)
+
+torch.manual_seed(args.seed)
+device = torch.device("cuda" if args.cuda else "cpu")
+
+if not args.coresUsage == 'all':
+    torch.set_num_threads(int(args.coresUsage))
+
+kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
+print(args)
+start_time = time.time()
+print ('---0:00:00---scRNA starts loading.')
+
+if not args.imputeMode:
+    # if args.discreteTag:
+    #     scData = scBenchDataset(args.datasetName, args.discreteTag)
+    # else:
+    #     scData = scBenchDataset(args.datasetName, args.discreteTag, transform=logtransform)
+    scData = scBenchDataset(args.datasetName, args.discreteTag)
+else:
+    # if args.discreteTag:
+    #     scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio)
+    # else:
+    #     scData = scDatasetDropout(args.datasetName, args.discreteTag, args.dropoutRatio, transform=logtransform)
+    scData = scDatasetDropout(datasetName=args.datasetName, discreteTag=args.discreteTag, ratio=args.dropoutRatio, seed=args.seed)
+train_loader = DataLoader(scData, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+if args.inferLTMGTag:
+    #run LTMG in R
+    runLTMG(args.LTMGDir+'test/'+args.expressionFile,args.LTMGDir+'test/')
+    ltmgFile = args.ltmgFile
+else:
+    ltmgFile = args.datasetName+'/T2000_UsingOriginalMatrix/T2000_LTMG.txt'
+
+regulationMatrix = readLTMGnonsparse(args.LTMGDir, ltmgFile)
+regulationMatrix = torch.from_numpy(regulationMatrix)
+if args.precisionModel == 'Double':
+    regulationMatrix = regulationMatrix.type(torch.DoubleTensor)
+elif args.precisionModel == 'Float':
+    regulationMatrix = regulationMatrix.type(torch.FloatTensor)
+
+# Original
+if args.model == 'VAE':
+    # model = VAE(dim=scData.features.shape[1]).to(device)
+    model = VAE2d(dim=scData.features.shape[1]).to(device)
+elif args.model == 'AE':
+    model = AE(dim=scData.features.shape[1]).to(device)
+if args.precisionModel == 'Double':
+    model=model.double()
+optimizer = optim.Adam(model.parameters(), lr=1e-3)
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+#t1=pd.read_csv('/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+#whether to output debuginfo in running time and memory consumption 
+def debuginfoStr(info):
+    if args.debuginfo:
+        print ('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+'---'+info)
+        mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        print('Mem consumption: '+str(mem))
+
+debuginfoStr('scRNA has been successfully loaded')
+
+#TODO: have to improve save npy
+def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype'):
+    '''
+    EMFlag indicates whether in EM processes. 
+        If in EM, use regulized-type parsed from program entrance,
+        Otherwise, noregu
+        taskType: celltype or imputation
+    '''
+    model.train()
+    train_loss = 0 
+    # for batch_idx, (data, _) in enumerate(train_loader):
+    # for batch_idx, data in enumerate(train_loader):
+    for batch_idx, (data, dataindex) in enumerate(train_loader):
+        if args.precisionModel == 'Double':
+            data = data.type(torch.DoubleTensor)
+        elif args.precisionModel == 'Float':
+            data = data.type(torch.FloatTensor)
+        data = data.to(device)
+        regulationMatrixBatch = regulationMatrix[dataindex,:]
+        regulationMatrixBatch = regulationMatrixBatch.to(device)
+        optimizer.zero_grad()
+        if args.model == 'VAE':
+            recon_batch, mu, logvar, z = model(data)
+            # Original
+            # loss = loss_function(recon_batch, data, mu, logvar)
+            if taskType == 'celltype':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+                else: 
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+            elif taskType == 'imputation':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)
+                else: 
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu, logvar, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)    
+                            
+        elif args.model == 'AE':
+            recon_batch, z = model(data)
+            mu_dummy = ''
+            logvar_dummy = ''
+            # Original
+            # loss = loss_function(recon_batch, data, mu, logvar)
+            if taskType == 'celltype':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type='noregu', reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)    
+                else:
+                    loss = loss_function_graph(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, gammaPara=args.gammaPara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.alphaRegularizePara, modelusage=args.model, reduction=args.reduction)
+            elif taskType == 'imputation':
+                if EMFlag and (not args.EMreguTag):
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.EMregulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)    
+                else:
+                    loss = loss_function_graph_celltype(recon_batch, data.view(-1, recon_batch.shape[1]), mu_dummy, logvar_dummy, graphregu=adjsample, celltyperegu=celltypesample, gammaPara=args.gammaImputePara, regulationMatrix=regulationMatrixBatch, regularizer_type=args.regulized_type, reguPara=args.graphImputePara, reguParaCelltype=args.celltypeImputePara, modelusage=args.model, reduction=args.reduction)
+
+        # L1 and L2 regularization in imputation
+        # 0.0 for no regularization 
+        if taskType == 'imputation': 
+            l1 = 0.0
+            l2 = 0.0
+            for p in model.parameters():
+                l1 = l1 + p.abs().sum()
+                l2 = l2 + p.pow(2).sum()
+            loss = loss + args.L1Para * l1 + args.L2Para * l2
+
+        loss.backward()
+        train_loss += loss.item()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader),
+                loss.item() / len(data)))
+
+        # for batch        
+        if batch_idx == 0:
+            recon_batch_all=recon_batch 
+            data_all = data 
+            z_all = z
+        else:
+            recon_batch_all=torch.cat((recon_batch_all, recon_batch), 0) 
+            data_all = torch.cat((data_all, data), 0) 
+            z_all = torch.cat((z_all,z),0)
+
+    print('====> Epoch: {} Average loss: {:.4f}'.format(
+          epoch, train_loss / len(train_loader.dataset)))
+
+    return recon_batch_all, data_all, z_all
+
+if __name__ == "__main__":
+    outParaTag = str(args.k)+'-'+str(args.gammaPara)+'-'+str(args.alphaRegularizePara)+'-'+str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara)
+    # outParaTag = str(args.gammaImputePara)+'-'+str(args.graphImputePara)+'-'+str(args.celltypeImputePara)   
+    ptfileStart = args.npyDir+args.datasetName+'_'+outParaTag+'_EMtrainingStart.pt'
+    stateStart = {
+        # 'epoch': epoch,
+        'state_dict': model.state_dict(),
+        'optimizer': optimizer.state_dict(),
+    }
+    ptfile      = args.npyDir+args.datasetName+'_EMtraining.pt'
+
+    # Step 1. celltype clustering
+    # store parameter
+    torch.save(stateStart,ptfileStart)
+
+    # Save results only when impute
+    discreteStr = ''
+    if args.discreteTag:
+        discreteStr = 'D'
+
+    if args.imputeMode:
+        # Does not need now
+        # save_sparse_matrix(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features)
+        # sp.save_npz(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_features.npz',scData.features)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_features.npy',scData.features)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropi.npy',scData.i)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropj.npy',scData.j)
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_dropix.npy',scData.ix)
+    
+    debuginfoStr('Start feature autoencoder training')
+
+    for epoch in range(1, args.epochs + 1):
+        recon, original, z = train(epoch, EMFlag=False)
+
+    debuginfoStr('Feature autoencoder training finished')
+        
+    zOut = z.detach().cpu().numpy()
+    # torch.save(model.state_dict(),ptfile)
+    ptstatus = model.state_dict()
+
+    # Store reconOri for imputation
+    reconOri = recon.clone()
+    reconOri = reconOri.detach().cpu().numpy()
+
+    # Step 1. Inferring celltype
+    #Define resolution
+    #Default: auto, otherwise use user defined resolution
+    if args.resolution == 'auto':
+        if zOut.shape[0]< 2000:
+            resolution = 0.8
+        else:
+            resolution = 0.5
+    else:
+        resolution = float(args.resolution)
+ 
+    debuginfoStr('Start construct cell grpah')    
+    # Here para = 'euclidean:10'
+    # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k))
+    adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding))  
+    # if args.adjtype == 'unweighted':
+    #     adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) 
+    #     adjdense = sp.csr_matrix.todense(adj)
+    # elif args.adjtype == 'weighted':
+    #     adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))         
+    #     adjdense = adj.toarray() 
+    debuginfoStr('Cell Graph constructed and pruned')
+
+    # if args.saveinternal:
+    #     reconOut = recon.detach().cpu().numpy()
+    #     if args.imputeMode:
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut)
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z.npy',zOut)
+    #     else:  
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon.npy',reconOut)
+    #         np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z.npy',zOut)
+    
+    # Whether use GAE embedding
+    debuginfoStr('Start Graph Autoencoder training')
+    if args.useGAEembedding or args.useBothembedding:
+        zDiscret = zOut>np.mean(zOut,axis=0)
+        zDiscret = 1.0*zDiscret
+        if args.useGAEembedding:
+            zOut=GAEembedding(zDiscret, adj, args)
+        elif args.useBothembedding:
+            zEmbedding=GAEembedding(zDiscret, adj, args)
+            zOut=np.concatenate((zOut,zEmbedding),axis=1)
+    debuginfoStr('Graph Autoencoder training finished')
+    
+    # For iteration studies
+    G0 = nx.Graph()
+    G0.add_weighted_edges_from(edgeList)
+    nlG0=nx.normalized_laplacian_matrix(G0)
+    # set iteration criteria for converge
+    adjOld = nlG0
+    # set celltype criteria for converge
+    listResultOld = [1 for i in range(zOut.shape[0])]
+
+    #Fill the zeros before EM iteration
+    # TODO: better implementation later, now we don't filling zeros for now
+    if args.zerofillFlag:
+        for nz_index in range(len(scData.nz_i)):
+            # tmp = scipy.sparse.lil_matrix.todense(scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]])
+            # tmp = np.asarray(tmp).reshape(-1)[0]
+            tmp = scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]]
+            reconOut[scData.nz_i[nz_index], scData.nz_j[nz_index]] = tmp
+        recon = reconOut
+
+    debuginfoStr('EM Iteration started')
+    for bigepoch in range(0, args.EM_iteration):
+        iteration_time = time.time()
+
+        # Now for both methods, we need do clustering, using clustering results to check converge
+        # TODO May reimplement later
+        # Clustering: Get cluster
+        clustering_time = time.time()
+        if args.clustering_method=='Louvain':
+            # Louvain: the only function has R dependent
+            # Seperate here for platforms without R support
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+        elif args.clustering_method=='LouvainK':
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+            # resolution of louvain cluster:
+            k = int(k*resolution) if k>3 else 2
+            clustering = KMeans(n_clusters=k, random_state=0).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='LouvainB':
+            from R_util import generateLouvainCluster
+            listResult,size = generateLouvainCluster(edgeList)
+            k = len(np.unique(listResult))
+            print('Louvain cluster: '+str(k))
+            # resolution of louvain cluster:
+            k = int(k*resolution) if k>3 else 2
+            clustering = Birch(n_clusters=k).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='KMeans':
+            clustering = KMeans(n_clusters=args.n_clusters, random_state=0).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='SpectralClustering':
+            clustering = SpectralClustering(n_clusters=args.n_clusters, assign_labels="discretize", random_state=0).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='AffinityPropagation':
+            clustering = AffinityPropagation().fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='AgglomerativeClustering':
+            clustering = AgglomerativeClustering(linkage=args.linkage).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='AgglomerativeClusteringK':
+            clustering = AgglomerativeClustering(n_clusters=args.n_clusters).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='Birch':
+            clustering = Birch(n_clusters=args.n_clusters).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='BirchN':
+            clustering = Birch(n_clusters=None).fit(zOut)
+            listResult = clustering.predict(zOut)
+        elif args.clustering_method=='MeanShift':
+            clustering = MeanShift().fit(zOut)
+            listResult = clustering.labels_.tolist()
+        elif args.clustering_method=='OPTICS':
+            clustering = OPTICS(min_samples=int(args.k/2), min_cluster_size=args.minMemberinCluster).fit(zOut)
+            listResult = clustering.labels_.tolist()
+        else:
+            print("Error: Clustering method not appropriate")
+        # print("---Clustering takes %s seconds ---" % (time.time() - clustering_time))
+
+        # If clusters more than maxclusters, then have to stop
+        if len(set(listResult))>args.maxClusterNumber or len(set(listResult))<=1:
+            print("Stopping: Number of clusters is " + str(len(set(listResult))) + ".")
+            # Exit
+            # return None
+            # Else: dealing with the number
+            listResult = trimClustering(listResult,minMemberinCluster=args.minMemberinCluster,maxClusterNumber=args.maxClusterNumber)
+        
+        #Calculate silhouette
+        measure_clustering_results(zOut, listResult)
+        print('Total Cluster Number: '+str(len(set(listResult))))
+
+        debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training started')
+        #Graph regulizated EM AE with celltype AE, do the additional AE
+        if args.EMtype == 'celltypeEM': 
+            # Each cluster has a autoencoder, and organize them back in iteraization
+            clusterIndexList = []
+            for i in range(len(set(listResult))):
+                clusterIndexList.append([])
+            for i in range(len(listResult)):
+                clusterIndexList[listResult[i]].append(i)
+
+            reconNew = np.zeros((scData.features.shape[0],scData.features.shape[1]))
+            
+            # Convert to Tensor
+            reconNew = torch.from_numpy(reconNew)
+            if args.precisionModel == 'Double':
+                reconNew = reconNew.type(torch.DoubleTensor)
+            elif args.precisionModel == 'Float':
+                reconNew = reconNew.type(torch.FloatTensor)
+            reconNew = reconNew.to(device)
+            
+            # model.load_state_dict(torch.load(ptfile))
+            model.load_state_dict(ptstatus)
+            
+            for clusterIndex in clusterIndexList:
+                reconUsage = recon[clusterIndex]
+                scDataInter = scDatasetInter(reconUsage)
+                train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+                for epoch in range(1, args.cluster_epochs + 1):
+                    reconCluster, originalCluster, zCluster = train(epoch, EMFlag=True)                
+                count = 0
+                for i in clusterIndex:
+                    reconNew[i] = reconCluster[count,:]
+                    count +=1
+            # Update
+            recon = reconNew
+            # torch.save(model.state_dict(),ptfile)
+            ptstatus = model.state_dict()
+
+        debuginfoStr(str(bigepoch)+'th iter: Cluster Autoencoder training succeed')
+
+        # Use new dataloader
+        scDataInter = scDatasetInter(recon)
+        train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+        debuginfoStr(str(bigepoch)+'th iter: Start construct cell grpah')
+        for epoch in range(1, args.EM_epochs + 1):
+            recon, original, z = train(epoch, EMFlag=True)
+        
+        zOut = z.detach().cpu().numpy()
+        
+        # Here para = 'euclidean:10'
+        # adj, edgeList = generateAdj(zOut, graphType='KNNgraphML', para = args.knn_distance+':'+str(args.k))
+        adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k), adjTag = (args.useGAEembedding or args.useBothembedding or (bigepoch == int(args.EM_iteration)-1))) 
+        # if args.adjtype == 'unweighted':
+        #     adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k)) 
+        #     adjdense = sp.csr_matrix.todense(adj)
+        # elif args.adjtype == 'weighted':
+        #     adj, edgeList = generateAdjWeighted(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))         
+        #     adjdense = adj.toarray()
+        debuginfoStr(str(bigepoch)+'th iter: Cell Graph constructed and pruned')
+
+        debuginfoStr(str(bigepoch)+'th iter: Start Graph Autoencoder training')
+        # Whether use GAE embedding
+        if args.useGAEembedding or args.useBothembedding:
+            zDiscret = zOut>np.mean(zOut,axis=0)
+            zDiscret = 1.0*zDiscret
+            if args.useGAEembedding:
+                zOut=GAEembedding(zDiscret, adj, args)
+            elif args.useBothembedding:
+                zEmbedding=GAEembedding(zDiscret, adj, args)
+                zOut=np.concatenate((zOut,zEmbedding),axis=1)
+
+        debuginfoStr(str(bigepoch)+'th iter: Graph Autoencoder training finished')
+
+        if args.saveinternal:
+            reconOut = recon.detach().cpu().numpy()
+            if args.imputeMode:
+                # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut)
+                np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut)
+            else:
+                # np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_recon'+str(bigepoch)+'.npy',reconOut)
+                np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_z'+str(bigepoch)+'.npy',zOut)
+        
+        # print("---One iteration in EM process, proceeded %s seconds ---" % (time.time() - iteration_time))
+
+        #Iteration usage
+        Gc = nx.Graph()
+        Gc.add_weighted_edges_from(edgeList)
+        adjGc = nx.adjacency_matrix(Gc)
+        
+        # Update new adj
+        adjNew = args.alpha*nlG0 + (1-args.alpha) * adjGc/np.sum(adjGc,axis=0)
+        
+        #debug
+        graphChange = np.mean(abs(adjNew-adjOld))
+        graphChangeThreshold = args.converge_graphratio * np.mean(abs(nlG0))
+        print('adjNew:{} adjOld:{} G0:{}'.format(adjNew, adjOld, nlG0))
+        print('mean:{} threshold:{}'.format(graphChange, graphChangeThreshold))
+        silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+        ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(listResultOld, listResult)
+        print(listResultOld)
+        print(listResult)
+        print('celltype similarity:'+str(ari))
+        ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+        resultarray=[]
+        resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+        resultarray.append(resultstr)
+        print('All Results: ')
+        print(resultstr)
+
+        if args.saveinternal:
+            if args.imputeMode:
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d')
+            else:
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_benchmark'+str(bigepoch)+'.txt',resultarray,fmt='%s')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_graph'+str(bigepoch)+'.csv',edgeList,fmt='%d,%d,%2.1f')
+                np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_results'+str(bigepoch)+'.txt',listResult,fmt='%d')
+
+        # graph criteria
+        if args.converge_type == 'graph':       
+            if graphChange < graphChangeThreshold:
+                print('Graph Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # celltype criteria
+        elif args.converge_type == 'celltype':            
+            if ari>args.converge_celltyperatio:
+                print('Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # if both criteria are meets
+        elif args.converge_type == 'both': 
+            if graphChange < graphChangeThreshold and ari > args.converge_celltyperatio:
+                print('Graph and Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+        # if either criteria are meets
+        elif args.converge_type == 'either': 
+            if graphChange < graphChangeThreshold or ari > args.converge_celltyperatio:
+                print('Graph or Celltype Converge now!')
+                # Converge, Update
+                adjOld = adjNew
+                listResultOld = listResult
+                break
+
+        # Update
+        adjOld = adjNew
+        listResultOld = listResult
+        # torch.cuda.empty_cache()
+        debuginfoStr(str(bigepoch)+'th iter: Iteration finished')
+        
+
+    # Output celltype related results
+    if args.imputeMode:
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_final_edgeList.npy',edgeList)
+    else:
+        np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+outParaTag+'_final_edgeList.npy',edgeList)
+    
+    # np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_embedding.csv',zOut, delimiter=",",fmt='%10.4f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_graph.csv',edgeList,fmt='%d,%d,%2.1f')
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt',listResult,fmt='%d')
+
+    resultarray=[]
+    silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+    ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+    resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+    resultarray.append(resultstr)
+    np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_benchmark.txt',resultarray,fmt='%s')
+
+    # save internal results for imputation
+    # if args.imputeMode:
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy',reconOri)
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',adj)
+    #     np.save(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy',listResult)
+    # else:
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy',reconOri)
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',adj)
+    #     np.save(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy',listResult)
+    
+    # Step 2. Imputation with best results of graph and celltype
+    
+    # if args.imputeMode:
+    #     reconOri = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_reconOri.npy')
+    #     adj = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_adj.npy',allow_pickle=True)
+    #     listResult = np.load(args.npyDir+args.datasetName+'_'+str(args.dropoutRatio)+'_'+args.regulized_type+'_listResult.npy')
+    # else:
+    #     reconOri = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_reconOri.npy')
+    #     adj = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_adj.npy',allow_pickle=True)
+    #     listResult = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+'_listResult.npy')
+
+    # Use new dataloader
+    scDataInter = scDatasetInter(reconOri)
+    train_loader = DataLoader(scDataInter, batch_size=args.batch_size, shuffle=False, **kwargs)
+
+    stateStart = torch.load(ptfileStart)
+    model.load_state_dict(stateStart['state_dict'])
+    optimizer.load_state_dict(stateStart['optimizer'])
+    # if args.aePara == 'start':
+    #     model.load_state_dict(torch.load(ptfileStart))
+    # elif args.aePara == 'end':
+    #     model.load_state_dict(torch.load(ptfileEnd))
+    
+    # generate graph regularizer from graph
+    # adj = adj.tolist() # Used for read/load
+    # adjdense = sp.csr_matrix.todense(adj)
+
+    # generate adj from edgeList
+    adjdense = sp.csr_matrix.todense(adj)
+    adjsample = torch.from_numpy(adjdense)
+    if args.precisionModel == 'Float':
+        adjsample = adjsample.float()
+    elif args.precisionModel == 'Double':
+        adjsample = adjsample.type(torch.DoubleTensor)
+    adjsample = adjsample.to(device)
+
+    # generate celltype regularizer from celltype
+    celltypesample = generateCelltypeRegu(listResult)
+
+    celltypesample = torch.from_numpy(celltypesample)
+    if args.precisionModel == 'Float':
+        celltypesample = celltypesample.float()
+    elif args.precisionModel == 'Double':
+        celltypesample = celltypesample.type(torch.DoubleTensor)
+    celltypesample = celltypesample.to(device)
+
+    for epoch in range(1, args.EM_epochs + 1):
+        recon, original, z = train(epoch, EMFlag=True, taskType='imputation')
+    
+    reconOut = recon.detach().cpu().numpy()
+
+    # out imputation Results    
+    if args.imputeMode:
+        np.save   (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.npy',reconOut)        
+        np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.dropoutRatio)+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+    else:
+        np.save   (args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.npy',reconOut)        
+        np.savetxt(args.npyDir+args.datasetName+'_'+args.regulized_type+'_'+outParaTag+'_recon.csv',reconOut,delimiter=",",fmt='%10.4f')
+ 
+    debuginfoStr('scGNN finished')
diff --git a/plot_distribution.py b/plot_distribution.py
new file mode 100644
index 0000000..081f6ad
--- /dev/null
+++ b/plot_distribution.py
@@ -0,0 +1,108 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import argparse
+from scipy.stats import chi2_contingency
+from scipy.stats import nbinom
+
+parser = argparse.ArgumentParser(description='Infer Spatial from Expression in single cells')
+
+parser.add_argument('--datasetName', type=str, default='1.Biase',
+                    help='Dataset: 1-13 benchmark: 1.Biase/2.Li/3.Treutlein/4.Yan/5.Goolam/6.Guo/7.Deng/8.Pollen/9.Chung/10.Usoskin/11.Kolodziejczyk/12.Klein/13.Zeisel')
+parser.add_argument('--para', type=str, default='LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1',
+                    help='save npy results in directory')
+parser.add_argument('--inDir', type=str, default='npyGraphTest/',
+                    help='save npy results in directory')
+parser.add_argument('--outDir', type=str, default='DistNpy/',
+                    help='save npy results in directory')
+args = parser.parse_args()
+
+
+ix=np.load(args.inDir+args.datasetName+'_'+args.para+'_dropix.npy')
+i =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropi.npy')
+j =np.load(args.inDir+args.datasetName+'_'+args.para+'_dropj.npy')
+# recon   =np.load('12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_recon.npy',allow_pickle=True)
+# features=np.load('/Users/juexinwang/Downloads/temp/12.Klein_LTMG_0.1_10-0.1-0.9-0.0-0.3-0.1_features.npy',allow_pickle=True)
+recon   =np.load(args.inDir+args.datasetName+'_'+args.para+'_recon.npy',allow_pickle=True)
+features=np.load(args.inDir+args.datasetName+'_'+args.para+'_features.npy',allow_pickle=True)
+features=features.tolist()
+features=features.todense()
+
+# Directly use plt histogram
+# Careful! plt.hist does not work for huge datasets
+
+# _ = plt.hist(features.ravel())
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
+# plt.close()
+
+# features_log = np.log(features+1)
+# _ = plt.hist(features_log.ravel(),bin=100)
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
+# plt.close()
+
+# _ = plt.hist(recon.ravel(),bin=100)
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
+# plt.close()
+
+# recon_exp = np.exp(recon)-1
+# plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
+# plt.close()
+
+# Something wrong, have to change to here:
+# plt.bar(bin_edges[:-1], hist)
+# plt.xlim(min(bin_edges), max(bin_edges))
+
+# Use numpy histogram
+hist, bin_edges = np.histogram(features.ravel(), bins = np.arange(0,np.max(features)+10,10))
+# print(hist)
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.png')
+plt.close()
+
+features_log = np.log(features+1)
+hist, bin_edges = np.histogram(features_log.ravel(), bins = np.arange(0,np.max(features_log)+0.1,0.1))
+# print(hist)
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_features_log.png')
+plt.close()
+
+hist, bin_edges = np.histogram(recon.ravel(), bins = np.arange(0,np.max(recon)+0.1,0.1))
+# print(hist)
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon.png')
+plt.close()
+
+recon_exp = np.exp(recon)-1
+hist, bin_edges = np.histogram(recon_exp.ravel(), bins = np.arange(0,np.max(recon_exp)+10,10))
+print(hist)
+x_pos = [i for i, _ in enumerate(hist)]
+plt.bar(x_pos, hist)
+plt.xticks(x_pos, bin_edges[:-1])
+plt.xticks(rotation=90)
+plt.savefig(args.outDir+'/'+args.datasetName+'_'+args.para+'_recon_exp.png')
+plt.close()
+
+#test
+# find x,y in 2D matrix
+# numpy.unravel_index(a.argmax(), a.shape)
+# data = [[207, 282, 241], [282, 240, 234, 3]]
+# chi2_contingency(data)
+np.savetxt(args.outDir+'/'+args.datasetName+'_'+args.para+'_features.txt', features, fmt='%d')
+
+# https://stats.stackexchange.com/questions/260580/negative-binomial-distribution-with-python-scipy-stats
+# https://en.wikipedia.org/wiki/Negative_binomial_distribution#Alternative_formulations
+# mean = np.mean(features)
+# var  = np.var(features)
+# p = (var-mean)/var
+# r = mean**2/(var-mean)
+# x = np.arange(nbinom.ppf(0.01, p, r),nbinom.ppf(0.99, p, r))
+# ax.plot(x, nbinom.pmf(x, p, r), 'bo', ms=8, label='nbinom pmf')
+
diff --git a/plot_distribution.r b/plot_distribution.r
new file mode 100644
index 0000000..c09f27b
--- /dev/null
+++ b/plot_distribution.r
@@ -0,0 +1,84 @@
+# R
+# Running after plot_distribution.py
+
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf
+# https://arxiv.org/pdf/1810.02618.pdf
+# https://rdrr.io/cran/gamlss.dist/man/ZANBI.html
+
+#install in conda:
+# https://anaconda.org/conda-forge/r-fitdistrplus
+# https://anaconda.org/conda-forge/r-gamlss
+# install.packages("fitdistrplus")
+# install.packages("gamlss")
+library(fitdistrplus)
+library(gamlss)
+
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)==0) {
+  stop("At least four argument must be supplied (input file).n", call.=FALSE)
+}
+
+datasetName=args[1]
+para=args[2]
+indir=args[3]
+outdir=args[4]
+
+features = read.table(paste(indir,"/",datasetName,"_",para,"_features.txt",sep=''), header = FALSE, sep = " ")
+features = data.matrix(features)
+features = as.vector(features)
+features = as.numeric(features)
+
+print(paste(indir,"/",datasetName,"_",para,"_features.txt",sep=''))
+mu_ = mean(features)
+sigma_ = (sd(features)-mean(features))/mean(features)**2
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 219
+fit_nbi = fitdist(features, 'NBI',   start = list(mu = mu_, sigma = sigma_ ))
+gofstat(fit_nbi)
+tiff(file= paste(outdir,"/",datasetName,"_",para,"_NBI.tiff",sep=''))
+plot(fit_nbi)
+dev.off()
+
+# http://www.gamlss.com/wp-content/uploads/2013/01/book-2010-Athens1.pdf Page 221
+fit_zinb_= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_))
+gofstat(fit_zinb_)
+tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI_.tiff",sep=''))
+plot(fit_zinb_)
+dev.off()
+
+nu_ = 1-length(which(features!=0))/(length(features))
+fit_zinb= fitdist(features, 'ZINBI', start = list(mu = mu_, sigma = sigma_, nu = nu_))
+gofstat(fit_zinb)
+tiff(file=paste(outdir,"/",datasetName,"_",para,"_ZINBI.tiff",sep=''))
+plot(fit_zinb)
+dev.off()
+
+
+
+
+# NBI:
+# Goodness-of-fit statistics
+#                                 1-mle-NBI
+# Kolmogorov-Smirnov statistic 3.671374e-01
+# Cramer-von Mises statistic   1.016737e+05
+# Anderson-Darling statistic            Inf
+
+# Goodness-of-fit criteria
+#                                1-mle-NBI
+# Akaike's Information Criterion  25429885
+# Bayesian Information Criterion  25429912
+
+
+# ZINB
+# Goodness-of-fit statistics
+#                               1-mle-ZINBI
+# Kolmogorov-Smirnov statistic 4.532250e-01
+# Cramer-von Mises statistic   1.873046e+05
+# Anderson-Darling statistic            Inf
+
+# Goodness-of-fit criteria
+#                                1-mle-ZINBI
+# Akaike's Information Criterion    25969108
+# Bayesian Information Criterion    25969135
+
+# Can learn from *
+# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.nbinom.html
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index fd0d666..9b91edd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-numpy==1.18.1
-torch==1.4.0
-networkx==2.4
-pandas==0.25.3
-rpy2==3.2.4
-matplotlib==3.1.2
-seaborn==0.9.0
-umap-learn==0.3.10
-munkres==1.1.2
+numpy
+torch>=1.4.0
+networkx>=2.4
+pandas>=0.25.3
+rpy2>=3.2.4
+matplotlib>=3.1.2
+seaborn>=0.9.0
+umap-learn
+munkres>=1.1.2
 community
-tqdm==4.48.0
\ No newline at end of file
+tqdm>=4.48.0
\ No newline at end of file
diff --git a/results/Klein_correlation.py b/results/Klein_correlation.py
new file mode 100644
index 0000000..05b86e3
--- /dev/null
+++ b/results/Klein_correlation.py
@@ -0,0 +1,82 @@
+import numpy as np
+from scipy import stats
+import pandas as pd
+import csv
+
+# Get correlation from gene interactions from Klein datasets in Figure 3 of scGNN paper
+# Ref: Klein, Allon M., et al. "Droplet barcoding for single-cell transcriptomics applied to embryonic stem cells." Cell 161.5 (2015): 1187-1201.
+
+geneList=[
+    'Krt8', #4
+    'S100a6', #19
+    'Id2', #895
+    'Id1', #602
+    'ld3', #1559
+    'Ccnd1',# not in the range
+    'Ccnb1',# not in the range
+    'Ccnd2',# not in the range
+    'Ccna1',# not in the range
+    'Sox17',# not in the range
+    'Col4a1', #226
+    'Pou5f1', #150
+    'Ccnd3', #255
+    'Ccna2',# not in the range
+    'Nanog', #1449
+    'Klf4',# not in the range
+    'Sox2', # 601
+    'Zfp42', #527
+    'Trim28', #136
+    'Esrrb', #849
+    'Tdh', #206
+]
+
+geneNumList=[
+    4,
+    19,
+    895,
+    602,
+    1559,
+    226,
+    150,
+    255,
+    1449,
+    601,
+    527,
+    136,
+    849,
+    206,
+]
+
+savedir = './fig3/'
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
+
+def corCal(method='magic'):
+    if method == 'scIGANs':
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_0.0/12.Klein/scIGANs_npyImputeG2E_1_12.Klein_LTMG_0.0_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_Klein_only_label.csv.txt',sep='\s+',index_col=0)
+        x = df.to_numpy()
+    else:
+        if method == 'scvinorm':
+            filename = '/storage/htc/joshilab/wangjue/scGNN/scvi/12.Klein_0.0_1_recon_normalized.npy'
+            x = np.load(filename,allow_pickle=True)
+            x = x.T
+        elif method == 'netNMFsc':
+            filename = '/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/0.0/12.Klein/npyImputeG2E_1_log_imputation.npy'
+            x = np.load(filename,allow_pickle=True)
+        else:
+            filename = '/storage/htc/joshilab/wangjue/scGNN/{}/12.Klein_0.0_1_recon.npy'.format(method)
+            x = np.load(filename,allow_pickle=True)
+            x = x.T
+
+    corr = np.zeros((len(geneNumList),len(geneNumList)))
+    for i in range(len(geneNumList)):
+        for j in range(len(geneNumList)):
+            corr[i,j]=stats.pearsonr(x[geneNumList[i],:], x[geneNumList[j],:])[0]
+
+    out_filename = savedir+method+".csv"
+    with open(out_filename, "w") as f:
+        writer = csv.writer(f)
+        writer.writerows(corr)
+
+
+for method in methodList:        
+    corCal(method=method)
\ No newline at end of file
diff --git a/results/Klein_correlation.sh b/results/Klein_correlation.sh
new file mode 100644
index 0000000..01eb788
--- /dev/null
+++ b/results/Klein_correlation.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Fig3
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python -W ignore Klein_correlation.py
\ No newline at end of file
diff --git a/results/louvain.py b/results/louvain.py
new file mode 100644
index 0000000..967b2d6
--- /dev/null
+++ b/results/louvain.py
@@ -0,0 +1,39 @@
+import os, sys
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import numpy as np
+from util_function import *
+from graph_function import *
+from R_util import generateLouvainCluster
+import argparse
+
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+parser.add_argument('--k', type=int, default=10,
+                    help='parameter k in KNN graph (default: 10)')
+parser.add_argument('--knn-distance', type=str, default='euclidean',
+                    help='KNN graph distance type: euclidean/cosine/correlation (default: euclidean)')
+parser.add_argument('--prunetype', type=str, default='KNNgraphStatsSingleThreadNoPrune',
+                    help='prune type, KNNgraphStats/KNNgraphML/KNNgraphStatsSingleThread (default: KNNgraphStats)')
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+parser.add_argument('--input', type=str, default='filename',
+                    help='input filename')
+parser.add_argument('--output', type=str, default='filename',
+                    help='input filename')
+args = parser.parse_args()
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+zOut = np.load(args.input,allow_pickle=True)
+zOut,re = pcaFunc(zOut, n_components=10)
+adj, edgeList = generateAdj(zOut, graphType=args.prunetype, para = args.knn_distance+':'+str(args.k))
+listResult,size = generateLouvainCluster(edgeList)
+silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+print(resultstr)
+
+with open(args.output,'w') as fw:
+    fw.writelines("%s\n" % strr for strr in listResult)
diff --git a/results/louvain.sh b/results/louvain.sh
new file mode 100644
index 0000000..3f51bea
--- /dev/null
+++ b/results/louvain.sh
@@ -0,0 +1,57 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Louvain
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python -W ignore louvain.py --input othermethods/magic/9.Chung_0.0_1_recon.npy --output otherresults/magic/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/magic/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/12.Klein_0.0_1_recon.npy --output otherresults/magic/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/magic/13.Zeisel_0.0_1_recon.npy --output otherresults/magic/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/dca/9.Chung_0.0_1_recon.npy --output otherresults/dca/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/dca/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/12.Klein_0.0_1_recon.npy --output otherresults/dca/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/dca/13.Zeisel_0.0_1_recon.npy --output otherresults/dca/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/deepimpute/9.Chung_0.0_1_recon.npy --output otherresults/deepimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/deepimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/12.Klein_0.0_1_recon.npy --output otherresults/deepimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/deepimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/deepimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/netNMFsc/9.Chung_0.0_1_recon.npy --output otherresults/netNMFsc/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/netNMFsc/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --output otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --output otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/saucie/9.Chung_0.0_1_recon.npy --output otherresults/saucie/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saucie/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/12.Klein_0.0_1_recon.npy --output otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/saucie/13.Zeisel_0.0_1_recon.npy --output otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/saver/9.Chung_0.0_1_recon.npy --output otherresults/saver/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/saver/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/12.Klein_0.0_1_recon.npy --output otherresults/saver/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/saver/13.Zeisel_0.0_1_recon.npy --output otherresults/saver/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scIGANs/9.Chung_0.0_1_recon.npy --output otherresults/scIGANs/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scIGANs/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/12.Klein_0.0_1_recon.npy --output otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --output otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scimpute/9.Chung_0.0_1_recon.npy --output otherresults/scimpute/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scimpute/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/12.Klein_0.0_1_recon.npy --output otherresults/scimpute/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scimpute/13.Zeisel_0.0_1_recon.npy --output otherresults/scimpute/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore louvain.py --input othermethods/scvi/9.Chung_0.0_1_recon.npy --output otherresults/scvi/9.txt --benchmark /home/jwang/data/scData/9.Chung/Chung_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/11.Kolodziejczyk_0.0_1_recon.npy --output otherresults/scvi/11.txt --benchmark /home/jwang/data/scData/11.Kolodziejczyk/Kolodziejczyk_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/12.Klein_0.0_1_recon.npy --output otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore louvain.py --input othermethods/scvi/13.Zeisel_0.0_1_recon.npy --output otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
diff --git a/results/results.sh b/results/results.sh
new file mode 100644
index 0000000..e1f5d8e
--- /dev/null
+++ b/results/results.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J Louvain
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+
+python -W ignore results_tmp.py --inputOri othermethods/saucie/12.Klein_0.0_1_recon.npy --input otherresults/saucie/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/saucie/13.Zeisel_0.0_1_recon.npy --input otherresults/saucie/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/scvi/12.Klein_0.0_1_recon.npy --input otherresults/scvi/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/scvi/13.Zeisel_0.0_1_recon.npy --input otherresults/scvi/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/12.Klein_0.0_1_recon.npy --input otherresults/netNMFsc/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/netNMFsc/13.Zeisel_0.0_1_recon.npy --input otherresults/netNMFsc/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
+
+python -W ignore results_tmp.py --inputOri othermethods/scIGANs/12.Klein_0.0_1_recon.npy --input otherresults/scIGANs/12.txt --benchmark /home/jwang/data/scData/12.Klein/Klein_cell_label.csv
+python -W ignore results_tmp.py --inputOri othermethods/scIGANs/13.Zeisel_0.0_1_recon.npy --input otherresults/scIGANs/13.txt --benchmark /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv
diff --git a/results/results_Reading_recheck.py b/results/results_Reading_recheck.py
new file mode 100644
index 0000000..e64b509
--- /dev/null
+++ b/results/results_Reading_recheck.py
@@ -0,0 +1,312 @@
+import os
+import argparse
+parser = argparse.ArgumentParser(description='Read Results in different methods')
+parser.add_argument('--methodName', type=int, default=0, 
+                    help="method used: 0-62")
+parser.add_argument('--imputeMode', default=True, action='store_true',
+                    help='impute or not (default: False). Caution: usually change npuDir if set imputeMode as true')
+parser.add_argument('--runMode',action='store_true', default=False, help="Run or prepare cluster script")
+parser.add_argument('--splitMode', default=False, action='store_true',
+                    help='whether split, used for long queue')
+parser.add_argument('--batchStr', type=int, default=0, 
+                    help="method used: 1-13")
+args = parser.parse_args()
+
+# New notes:
+# We used this in paper revision, will generate lots of .sh files.
+# This file is called by submitCluster_Result_Impute_recheck.sh, and only check .out files.
+# The results can be get by cat *.out
+
+# Old Note:
+# Generate results in python other than in shell for better organization
+# We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
+# We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel
+# So we use os.system('') here
+
+if args.splitMode:
+    #The split of batch, more batches, more parallel
+
+    if args.batchStr == 9:
+        datasetList = [
+        '9.Chung',
+        # '9.Chung --discreteTag'
+        ]
+    elif args.batchStr == 11:
+        datasetList = [
+        '11.Kolodziejczyk',
+        # '11.Kolodziejczyk --discreteTag'
+        ]
+    elif args.batchStr == 12:
+        datasetList = [
+        '12.Klein',
+        # '12.Klein --discreteTag'
+        ]
+    elif args.batchStr == 13:
+        datasetList = [
+        '13.Zeisel',
+        # '13.Zeisel --discreteTag'
+        ]
+else:
+    datasetList = [
+        '9.Chung',
+        '11.Kolodziejczyk',
+        '12.Klein',
+        '13.Zeisel',
+    ]
+
+if args.imputeMode:
+    pyStr = 'results_impute_graph.py'
+
+    npyList = [
+        '../npyImputeG2E_1/ --ratio 0.1', #1
+        '../npyImputeG2E_1/ --ratio 0.3', #2
+        '../npyImputeG2E_1/ --ratio 0.6', #3
+        '../npyImputeG2E_1/ --ratio 0.8', #4
+        '../npyImputeG2EL_1/ --ratio 0.1', #5
+        '../npyImputeG2EL_1/ --ratio 0.3', #6
+        '../npyImputeG2EL_1/ --ratio 0.6', #7
+        '../npyImputeG2EL_1/ --ratio 0.8', #8
+        '../npyImputeG1E_1/ --ratio 0.1', #9
+        '../npyImputeG1E_1/ --ratio 0.3', #10
+        '../npyImputeG1E_1/ --ratio 0.6', #11
+        '../npyImputeG1E_1/ --ratio 0.8', #12
+        '../npyImputeG2F_1/ --ratio 0.1', #13
+        '../npyImputeG2F_1/ --ratio 0.3', #14
+        '../npyImputeG2F_1/ --ratio 0.6', #15
+        '../npyImputeG2F_1/ --ratio 0.8', #16
+        '../npyImputeN2E_1/ --ratio 0.1', #17
+        '../npyImputeN2E_1/ --ratio 0.3', #18
+        '../npyImputeN2E_1/ --ratio 0.6', #19
+        '../npyImputeN2E_1/ --ratio 0.8', #20
+
+        '../npyImputeG2E_2/ --ratio 0.1', #21
+        '../npyImputeG2E_2/ --ratio 0.3', #22
+        '../npyImputeG2E_2/ --ratio 0.6', #23
+        '../npyImputeG2E_2/ --ratio 0.8', #24
+        '../npyImputeG2EL_2/ --ratio 0.1', #25
+        '../npyImputeG2EL_2/ --ratio 0.3', #26
+        '../npyImputeG2EL_2/ --ratio 0.6', #27
+        '../npyImputeG2EL_2/ --ratio 0.8', #28
+        '../npyImputeG1E_2/ --ratio 0.1', #29
+        '../npyImputeG1E_2/ --ratio 0.3', #30
+        '../npyImputeG1E_2/ --ratio 0.6', #31
+        '../npyImputeG1E_2/ --ratio 0.8', #32
+        '../npyImputeG2F_2/ --ratio 0.1', #33
+        '../npyImputeG2F_2/ --ratio 0.3', #34
+        '../npyImputeG2F_2/ --ratio 0.6', #35
+        '../npyImputeG2F_2/ --ratio 0.8', #36
+        '../npyImputeN2E_2/ --ratio 0.1', #37
+        '../npyImputeN2E_2/ --ratio 0.3', #38
+        '../npyImputeN2E_2/ --ratio 0.6', #39
+        '../npyImputeN2E_2/ --ratio 0.8', #40
+
+        '../npyImputeG2E_3/ --ratio 0.1', #41
+        '../npyImputeG2E_3/ --ratio 0.3', #42
+        '../npyImputeG2E_3/ --ratio 0.6', #43
+        '../npyImputeG2E_3/ --ratio 0.8', #44
+        '../npyImputeG2EL_3/ --ratio 0.1', #45
+        '../npyImputeG2EL_3/ --ratio 0.3', #46
+        '../npyImputeG2EL_3/ --ratio 0.6', #47
+        '../npyImputeG2EL_3/ --ratio 0.8', #48
+        '../npyImputeG1E_3/ --ratio 0.1', #49
+        '../npyImputeG1E_3/ --ratio 0.3', #50
+        '../npyImputeG1E_3/ --ratio 0.6', #51
+        '../npyImputeG1E_3/ --ratio 0.8', #52
+        '../npyImputeG2F_3/ --ratio 0.1', #53
+        '../npyImputeG2F_3/ --ratio 0.3', #54
+        '../npyImputeG2F_3/ --ratio 0.6', #55
+        '../npyImputeG2F_3/ --ratio 0.8', #56
+        '../npyImputeN2E_3/ --ratio 0.1', #57
+        '../npyImputeN2E_3/ --ratio 0.3', #58
+        '../npyImputeN2E_3/ --ratio 0.6', #59
+        '../npyImputeN2E_3/ --ratio 0.8', #60
+
+        ]
+
+else:
+    pyStr = 'results_celltype.py'
+
+    npyList = [
+        '../npyG1B/', #0
+        '../npyG1E/', #1
+        '../npyG1F/', #2
+        '../npyR1B/', #3
+        '../npyR1E/', #4
+        '../npyR1F/', #5
+        '../npyN1B/', #6
+        '../npyN1E/', #7
+        '../npyN1F/', #8
+        '../npyG2B/', #9
+        '../npyG2E/', #10
+        '../npyG2F/', #11
+        '../npyR2B/', #12
+        '../npyR2E/', #13
+        '../npyR2F/', #14
+        '../npyN2B/', #15
+        '../npyN2E/', #16
+        '../npyN2F/', #17
+
+        '../npyG1B_LK/', #18
+        '../npyG1E_LK/', #19
+        '../npyG1F_LK/', #20
+        '../npyR1B_LK/', #21
+        '../npyR1E_LK/', #22
+        '../npyR1F_LK/', #23
+        '../npyN1B_LK/', #24
+        '../npyN1E_LK/', #25
+        '../npyN1F_LK/', #26
+        '../npyG2B_LK/', #27
+        '../npyG2E_LK/', #28
+        '../npyG2F_LK/', #29
+        '../npyR2B_LK/', #30
+        '../npyR2E_LK/', #31
+        '../npyR2F_LK/', #32
+        '../npyN2B_LK/', #33
+        '../npyN2E_LK/', #34
+        '../npyN2F_LK/', #35
+
+        '../npyG1B_LB/', #36
+        '../npyG1E_LB/', #37
+        '../npyG1F_LB/', #38
+        '../npyR1B_LB/', #39
+        '../npyR1E_LB/', #40
+        '../npyR1F_LB/', #41
+        '../npyN1B_LB/', #42
+        '../npyN1E_LB/', #43
+        '../npyN1F_LB/', #44
+        '../npyG2B_LB/', #45
+        '../npyG2E_LB/', #46
+        '../npyG2F_LB/', #47
+        '../npyR2B_LB/', #48
+        '../npyR2E_LB/', #49
+        '../npyR2F_LB/', #50
+        '../npyN2B_LB/', #51
+        '../npyN2E_LB/', #52
+        '../npyN2F_LB/', #53
+        ]
+
+reguDict={}
+
+for i in range(0,16):
+    reguDict[i]='LTMG'
+for i in range(16,20):
+    reguDict[i]='noregu'
+for i in range(20,36):
+    reguDict[i]='LTMG'
+for i in range(36,40):
+    reguDict[i]='noregu'
+for i in range(40,56):
+    reguDict[i]='LTMG'
+for i in range(56,60):
+    reguDict[i]='noregu'
+
+reguStr=''
+if args.methodName in reguDict:
+    reguStr=' --regulized-type ' + reguDict[args.methodName] + ' '
+
+npyStr = npyList[args.methodName]
+
+benchmarkStr = ''
+
+if args.runMode:
+    labelFileDir = '/home/wangjue/biodata/scData/allBench/'
+else:
+    labelFileDir = '/home/jwang/data/scData/'
+    
+def getBenchmarkStr(count):
+    benchmarkStr = ''
+    if args.batchStr == 1:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '1.Biase/Biase_cell_label.csv '\
+                    '--n-clusters 3 '
+    elif args.batchStr == 2:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '2.Li/Li_cell_label.csv '\
+                    '--n-clusters 9 '
+    elif args.batchStr == 3:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '3.Treutlein/Treutlein_cell_label.csv '\
+                    '--n-clusters 5 '
+    elif args.batchStr == 4:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '4.Yan/Yan_cell_label.csv '\
+                    '--n-clusters 7 '
+    elif args.batchStr == 5:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '5.Goolam/Goolam_cell_label.csv '\
+                    '--n-clusters 5 '
+    elif args.batchStr == 6:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '6.Guo/Guo_cell_label.csv '\
+                    '--n-clusters 9 '
+    elif args.batchStr == 7:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '7.Deng/Deng_cell_label.csv '\
+                    '--n-clusters 10 '
+    elif args.batchStr == 8:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '8.Pollen/Pollen_cell_label.csv '\
+                    '--n-clusters 11 '
+    elif args.batchStr == 9:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '9.Chung/Chung_cell_label.csv '\
+                    '--n-clusters 4 '
+    elif args.batchStr == 10:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '10.Usoskin/Usoskin_cell_label.csv '\
+                    '--n-clusters 11 '
+    elif args.batchStr == 11:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '11.Kolodziejczyk/Kolodziejczyk_cell_label.csv '\
+                    '--n-clusters 3 '
+    elif args.batchStr == 12:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '12.Klein/Klein_cell_label.csv '\
+                    '--n-clusters 4 '
+    elif args.batchStr == 13:
+        benchmarkStr = ' --benchmark '\
+                    '--labelFilename ' + labelFileDir + '13.Zeisel/Zeisel_cell_label.csv '\
+                    '--n-clusters 7 '
+    
+    return benchmarkStr
+
+
+if not args.runMode:
+    if args.imputeMode:
+        imputeStr = 'I'
+    else:
+        imputeStr = 'C'
+    splitStr = ''
+    if args.splitMode:
+        splitStr = '_'+str(args.batchStr)
+    templateStr = "#! /bin/bash\n"\
+    "######################### Batch Headers #########################\n"\
+    "#SBATCH -A xulab\n"\
+    "#SBATCH -p Lewis,BioCompute               # use the BioCompute partition\n"\
+    "#SBATCH -J R" + imputeStr + '_' + str(args.methodName) + splitStr +              " \n"\
+    "#SBATCH -o results-%j.out           # give the job output a custom name\n"\
+    "#SBATCH -t 2-00:00                  # two days time limit\n"\
+    "#SBATCH -N 1                        # number of nodes\n"\
+    "#SBATCH -n 1                        # number of cores (AKA tasks)\n"\
+    "#SBATCH --mem=128G\n"\
+    "#################################################################\n"\
+    "module load miniconda3\n"\
+    "source activate conda_R\n"
+    print(templateStr)
+
+count = 0
+for datasetStr in datasetList:
+    commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --npyDir ' + npyStr
+    if args.runMode:
+        os.system(commandStr)
+    else:
+        print(commandStr)
+    # for i in range(10):
+    #     commandStr = 'python -W ignore ' + pyStr + ' --datasetName ' + datasetStr + reguStr + getBenchmarkStr(count) + ' --reconstr '+ str(i) + ' --npyDir ' + npyStr
+    #     if args.runMode:
+    #         os.system(commandStr)
+    #     else:
+    #         print(commandStr)
+    count += 1
+
+
diff --git a/results/results_impute_graph.py b/results/results_impute_graph.py
index 4145dd1..3f33f3c 100644
--- a/results/results_impute_graph.py
+++ b/results/results_impute_graph.py
@@ -63,20 +63,19 @@
 # dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropi.npy')
 # dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropj.npy')
 # dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_dropix.npy')
-dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropi.npy')
-dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropj.npy')
-dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_dropix.npy')
-
-
+dropi            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropi.npy')
+dropj            = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropj.npy')
+dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_dropix.npy')
 
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_'+args.regupara+'_recon'+args.reconstr+'.npy')
-featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-'+args.regupara+'_recon'+args.reconstr+'.npy')
+featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
 # featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_0.0-0.3-0.1_recon'+args.reconstr+'.npy')
 # featuresImpute   = pd.read_csv(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.regupara+'_0.0_0.0_recon'+args.reconstr+'.csv')
 # featuresImpute = featuresImpute.to_numpy()
 
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse), end='')
 
 def imputeResult(inputData):
     '''
diff --git a/results/results_impute_others_all.py b/results/results_impute_others_all.py
new file mode 100644
index 0000000..c2b6d9f
--- /dev/null
+++ b/results/results_impute_others_all.py
@@ -0,0 +1,76 @@
+import os
+import numpy as np
+import pandas as pd
+import argparse
+import scipy.sparse
+import sys
+sys.path.append('../')
+from util_function import *
+from benchmark_util import *
+from R_util import generateLouvainCluster
+from sklearn.cluster import KMeans
+import argparse
+parser = argparse.ArgumentParser(description='Read Results in different methods')
+args = parser.parse_args()
+
+# Notes:
+# In HPC, call by sbatch submit_Impute_others.sh
+
+datasetList = [
+    '9.Chung',
+    '11.Kolodziejczyk',
+    '12.Klein',
+    '13.Zeisel',
+]
+
+oridirStr = '../npyImputeG2E'
+medirStr = '../'
+
+seedList = ['1','2','3']
+ratioList = ['0.1','0.3','0.6','0.8']
+
+# sophisticated, not using
+# methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANslog','scIGANs','netNMFsclog','netNMFsc']
+
+# We should use only log(x+1) if the method permitted
+methodList = ['magic','saucie','saver','scimpute','scvi','scvinorm','dca','deepimpute','scIGANs','netNMFsc']
+
+def outResults(datasetName,seed,ratio,method):
+    featuresOriginal = load_data(datasetName, discreteTag=False)
+
+    features         = None
+    dropi            = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropi.npy')
+    dropj            = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropj.npy')
+    dropix           = np.load(oridirStr+'_'+seed+'/'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_dropix.npy')
+
+    # scGNN results
+    # featuresImpute   = np.load(npyDir+datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_10-0.1-0.9-0.0-0.3-'+args.regupara+'_recon'+args.reconstr+'.npy')
+    if method == 'scvinorm':
+        featuresImpute   = np.load(medirStr+'scvi/'+datasetName+'_'+ratio+'_'+seed+'_recon_normalized.npy')
+    # not using now
+    elif method == 'scIGANs':
+        df = pd.read_csv('/storage/htc/joshilab/jghhd/singlecellTest/scIGAN/Result_200_'+ratio+'/'+datasetName+'/scIGANs_npyImputeG2E_'+seed+'_'+datasetName+'_LTMG_'+ratio+'_10-0.1-0.9-0.0-0.3-0.1_features_log.csv_'+datasetName.split('.')[1]+'_only_label.csv.txt',sep='\s+',index_col=0)
+        tmp = df.to_numpy()
+        featuresImpute   = tmp.T
+    elif method == 'netNMFsc':
+        featuresImpute   = np.load('/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/result_mi_100000/'+ratio+'/'+datasetName+'/npyImputeG2E_'+seed+'_log_imputation.npy')
+        featuresImpute = featuresImpute.T
+    else:
+        featuresImpute   = np.load(medirStr+method+'/'+datasetName+'_'+ratio+'_'+seed+'_recon.npy')
+
+    # No log
+    if method=='dca' or method=='deepimpute':
+        l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+        cosine = imputation_cosine(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)    
+    # log
+    else:
+        l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+        cosine = imputation_cosine_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+    print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, cosine, rmse))
+
+
+for method in methodList:
+    for datasetName in datasetList:
+        for seed in seedList:
+            for ratio in ratioList:        
+                outResults(datasetName=datasetName, seed=seed, ratio=ratio, method=method)
\ No newline at end of file
diff --git a/results/results_tmp.py b/results/results_tmp.py
new file mode 100644
index 0000000..97aab4d
--- /dev/null
+++ b/results/results_tmp.py
@@ -0,0 +1,31 @@
+import os, sys
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+# sys.path.append('../')
+import numpy as np
+from util_function import *
+from graph_function import *
+import argparse
+
+parser = argparse.ArgumentParser(description='main benchmark for scRNA with timer and mem')
+#Benchmark related
+parser.add_argument('--benchmark', type=str, default='/home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv',
+                    help='the benchmark file of celltype (default: /home/jwang/data/scData/13.Zeisel/Zeisel_cell_label.csv)')
+parser.add_argument('--input', type=str, default='filename',
+                    help='input filename')
+parser.add_argument('--inputOri', type=str, default='filename',
+                    help='input filename')
+args = parser.parse_args()
+
+#Benchmark
+bench_pd=pd.read_csv(args.benchmark,index_col=0)
+bench_celltype=bench_pd.iloc[:,0].to_numpy()
+
+
+#'saucie/13.txt'
+z_pd = pd.read_csv(args.input,header=None)
+listResult = z_pd.iloc[:,0].to_numpy()
+zOut = np.load(args.inputOri,allow_pickle=True)
+silhouette, chs, dbs = measureClusteringNoLabel(zOut, listResult)
+ari, ami, nmi, cs, fms, vms, hs = measureClusteringTrueLabel(bench_celltype, listResult)
+resultstr = str(silhouette)+' '+str(chs)+' '+str(dbs)+' '+str(ari)+' '+str(ami)+' '+str(nmi)+' '+str(cs)+' '+str(fms)+' '+str(vms)+' '+str(hs)
+print(resultstr)
diff --git a/results/submitCluster_Result_Impute_recheck.sh b/results/submitCluster_Result_Impute_recheck.sh
new file mode 100644
index 0000000..157350a
--- /dev/null
+++ b/results/submitCluster_Result_Impute_recheck.sh
@@ -0,0 +1,17 @@
+for i in {0..59}
+do
+for j in {9,11,12,13}
+do
+python results_Reading_recheck.py --methodName $i --splitMode --batchStr $j > run_Results_Impute_$i-$j.sh
+done
+done
+
+# submit
+for j in {9,11,12,13}
+do
+for i in {0..59}
+do
+sbatch run_Results_Impute_$i-$j.sh
+sleep 1
+done
+done
\ No newline at end of file
diff --git a/results/submit_Impute_others.sh b/results/submit_Impute_others.sh
new file mode 100644
index 0000000..55e89f4
--- /dev/null
+++ b/results/submit_Impute_others.sh
@@ -0,0 +1,14 @@
+#! /bin/bash
+######################### Batch Headers #########################
+#SBATCH -A xulab
+#SBATCH -p Lewis,BioCompute               # use the BioCompute partition Lewis,BioCompute
+#SBATCH -J OthersResults
+#SBATCH -o results-%j.out           # give the job output a custom name
+#SBATCH -t 2-00:00                  # two days time limit
+#SBATCH -N 1                        # number of nodes
+#SBATCH -n 1                        # number of cores (AKA tasks)
+#SBATCH --mem=128G
+#################################################################
+module load miniconda3
+source activate conda_R
+python3 -W ignore results_impute_others_all.py
\ No newline at end of file
diff --git a/results/zeroPercentage.py b/results/zeroPercentage.py
new file mode 100644
index 0000000..cef85e8
--- /dev/null
+++ b/results/zeroPercentage.py
@@ -0,0 +1,24 @@
+#Calculate Zero percentage in each of the datasets
+import numpy as np
+
+def calcu(dataset='9.Chung',ratio=0.0):
+    t=np.load('npyImputeG2E_1/{}_LTMG_{}_10-0.1-0.9-0.0-0.3-0.1_features.npy'.format(dataset,ratio),allow_pickle=True)
+    t=t.tolist()
+    t=t.todense()
+    zeroNum = np.where(t==0)[0].shape[0]
+    allNum = t.shape[0]*t.shape[1]
+    percent = zeroNum/allNum
+    print('{} {} {}'.format(zeroNum,allNum,percent))
+
+datasetList = [
+    '9.Chung',
+    '11.Kolodziejczyk',
+    '12.Klein',
+    '13.Zeisel',
+]
+
+ratioList = ['0.0','0.1','0.3','0.6','0.8']
+
+for dataset in datasetList:
+    for ratio in ratioList:
+        calcu(dataset, ratio)
\ No newline at end of file
diff --git a/scGNN.py b/scGNN.py
index 6f685a3..b4d5e7a 100644
--- a/scGNN.py
+++ b/scGNN.py
@@ -755,4 +755,6 @@ def train(epoch, train_loader=train_loader, EMFlag=False, taskType='celltype', s
     results_df = pd.DataFrame(listResult,index=celllist,columns=["Celltype"])
     results_df.to_csv(args.outputDir+args.datasetName+'_'+args.regulized_type+'_'+str(args.alphaRegularizePara)+'_'+str(args.L1Para)+'_'+str(args.L2Para)+'_results.txt')   
       
+    mem=resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    print('Mem consumption: '+str(mem))
     print('---'+str(datetime.timedelta(seconds=int(time.time()-start_time)))+"---scGNN finished")
diff --git a/scripts/choose_louvain.py b/scripts/choose_louvain.py
new file mode 100644
index 0000000..42cbe0c
--- /dev/null
+++ b/scripts/choose_louvain.py
@@ -0,0 +1,104 @@
+# Script to test efficiency of louvain
+
+# Option 1: Original version, use r version of louvain, it takes time to link R, and need install rpy2.
+# Not use anymore
+# Clustering is different between Case one and two 
+import pandas as pd
+import rpy2.robjects as ro
+from rpy2.robjects.packages import importr
+from rpy2.robjects import r, pandas2ri
+pandas2ri.activate()
+
+# case one:
+edgeList = []
+edgeList.append((0,2,1.0))
+edgeList.append((1,2,1.0))
+edgeList.append((2,3,1.0))
+edgeList.append((3,4,1.0))
+edgeList.append((4,5,1.0))
+edgeList.append((4,6,1.0))
+
+# case two:
+edgeList.append((0,2,1.0))
+edgeList.append((1,2,1.0))
+edgeList.append((2,3,0.1))
+edgeList.append((3,4,1.0))
+edgeList.append((4,5,1.0))
+edgeList.append((4,6,1.0))
+
+fromVec = []
+toVec   = []
+weightVec = []
+for edge in edgeList:
+    fromVec.append(edge[0])
+    toVec.append(edge[1])
+    weightVec.append(edge[2])
+
+igraph = importr('igraph')
+base   = importr('base')
+fromV  = ro.FloatVector(fromVec)
+toV    = ro.FloatVector(toVec)
+# weightV= ro.FloatVector([0.1,1.0,1.0,0.1,1.0])
+weightV= ro.FloatVector(weightVec)
+links  = ro.DataFrame({'from':fromV,'to':toV,'weight':weightV})
+g  = igraph.graph_from_data_frame(links,directed = False)
+cl = igraph.cluster_louvain(g)
+
+def as_dict(vector):
+    """Convert an RPy2 ListVector to a Python dict"""
+    result = {}
+    for i, name in enumerate(vector.names):
+        if isinstance(vector[i], ro.ListVector):
+            result[name] = as_dict(vector[i])
+        elif len(vector[i]) == 1:
+            result[name] = vector[i][0]
+        else:
+            result[name] = vector[i]
+    return result
+
+cl_dict = as_dict(cl)
+df = pd.DataFrame()
+# df['Cluster']=cl_dict['membership']
+size = float(len(set(cl_dict['membership'])))
+
+listResult=[]
+count = 0
+for i in range(len(cl_dict['membership'])):
+    listResult.append(int(cl_dict['membership'][i])-1)
+    count += 1
+
+# Option 2: use package python-louvain, but does not work
+# Clustering is identical between Case one and two, so we cannot use it
+import networkx as nx
+import community as community_louvain
+G = nx.Graph()
+G.add_weighted_edges_from(edgeList)
+partition = community_louvain.best_partition(G,weight='weight')
+
+
+# Option 3: use igraph, pure python and looks right
+# Clustering is identical between Case one and two, so we cannot use it
+import numpy as np
+from igraph import *
+#Case 1:
+W=np.zeros((7,7))
+W[0,2]=1.0
+W[1,2]=1.0
+W[2,3]=1.0
+W[3,4]=1.0
+W[4,5]=1.0
+W[4,6]=1.0
+
+#Case 2:
+W=np.zeros((7,7))
+W[0,2]=1.0
+W[1,2]=1.0
+W[2,3]=0.1
+W[3,4]=1.0
+W[4,5]=1.0
+W[4,6]=1.0
+
+graph = Graph.Weighted_Adjacency(W.tolist(), mode=ADJ_UNDIRECTED, attr="weight", loops=False)
+louvain_partition = graph.community_multilevel(weights=graph.es['weight'], return_levels=False)
+print(louvain_partition)
+
diff --git a/submitCluster_distribution.sh b/submitCluster_distribution.sh
new file mode 100644
index 0000000..e36b7ec
--- /dev/null
+++ b/submitCluster_distribution.sh
@@ -0,0 +1,32 @@
+#submit plotting
+
+for i in {0.1,0.3,0.6,0.8}
+do
+sbatch plot_G2E_$i\_9.sh
+sbatch plot_G2E_$i\_11.sh
+sbatch plot_G2E_$i\_12.sh
+sbatch plot_G2E_$i\_13.sh
+done
+
+# for i in {0.1,0.3,0.6,0.8}
+# do
+# sbatch plot_G2EL_$i\_9.sh
+# sbatch plot_G1E_$i\_9.sh
+# sbatch plot_G2F_$i\_9.sh
+# sbatch plot_N2E_$i\_9.sh
+
+# sbatch plot_G2EL_$i\_11.sh
+# sbatch plot_G1E_$i\_11.sh
+# sbatch plot_G2F_$i\_11.sh
+# sbatch plot_N2E_$i\_11.sh
+
+# sbatch plot_G2EL_$i\_12.sh
+# sbatch plot_G1E_$i\_12.sh
+# sbatch plot_G2F_$i\_12.sh
+# sbatch plot_N2E_$i\_12.sh
+
+# sbatch plot_G2EL_$i\_13.sh
+# sbatch plot_G1E_$i\_13.sh
+# sbatch plot_G2F_$i\_13.sh
+# sbatch plot_N2E_$i\_13.sh
+# done
\ No newline at end of file
diff --git a/submitCluster_imputation_0.0.sh b/submitCluster_imputation_0.0.sh
new file mode 100644
index 0000000..5dcd876
--- /dev/null
+++ b/submitCluster_imputation_0.0.sh
@@ -0,0 +1,4 @@
+sbatch run_experimentImpute_2_g_e_1_9_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_11_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_12_0.0.sh
+sbatch run_experimentImpute_2_g_e_1_13_0.0.sh
diff --git a/submitCluster_imputation_0.1-0.8-ablation.sh b/submitCluster_imputation_0.1-0.8-ablation.sh
new file mode 100644
index 0000000..a822d20
--- /dev/null
+++ b/submitCluster_imputation_0.1-0.8-ablation.sh
@@ -0,0 +1,47 @@
+mkdir npyImputeG2E_1
+mkdir npyImputeG2EL_1
+mkdir npyImputeG2F_1
+mkdir npyImputeN2E_1
+mkdir npyImputeG1E_1
+
+mkdir npyImputeG2E_2
+mkdir npyImputeG2EL_2
+mkdir npyImputeG2F_2
+mkdir npyImputeN2E_2
+mkdir npyImputeG1E_2
+
+mkdir npyImputeG2E_3
+mkdir npyImputeG2EL_3
+mkdir npyImputeG2F_3
+mkdir npyImputeN2E_3
+mkdir npyImputeG1E_3
+
+for i in {1..3}
+do
+for j in {0.1,0.3,0.6,0.8}
+do
+sbatch run_experimentImpute_1_g_e_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_9_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_9_$j\.sh
+
+sbatch run_experimentImpute_1_g_e_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_11_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_11_$j\.sh
+
+sbatch run_experimentImpute_1_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_12_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_12_$j\.sh
+
+sbatch run_experimentImpute_1_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_e_L_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_g_f_$i\_13_$j\.sh
+sbatch run_experimentImpute_2_n_e_$i\_13_$j\.sh
+done
+done
\ No newline at end of file
diff --git a/util_function.py b/util_function.py
index 103b585..f159d2a 100644
--- a/util_function.py
+++ b/util_function.py
@@ -65,13 +65,23 @@ def load_data(datasetName, discreteTag):
         names = ['x', 'tx', 'allx']
     objects = []
     for i in range(len(names)):
-        with open(dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i]), 'rb') as f:
+        #windows
+        if os.name=='nt':
+            filename = dir_path+"\\data\\sc\\{}\\ind.{}.{}".format(datasetName, datasetName, names[i])
+        else:
+            filename = dir_path+"/data/sc/{}/ind.{}.{}".format(datasetName, datasetName, names[i])
+        with open(filename, 'rb') as f:
             if sys.version_info > (3, 0):
                 objects.append(pkl.load(f, encoding='latin1'))
             else:
                 objects.append(pkl.load(f))
     x, tx, allx = tuple(objects)
-    test_idx_reorder = parse_index_file(dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName))
+    #windows
+    if os.name == 'nt':
+        filename = dir_path+"\\data\\sc\\{}\\ind.{}.test.index".format(datasetName, datasetName)
+    else:
+        filename = dir_path+"/data/sc/{}/ind.{}.test.index".format(datasetName, datasetName)
+    test_idx_reorder = parse_index_file(filename)
     test_idx_range = np.sort(test_idx_reorder)
 
     if datasetName == 'citeseer':
@@ -199,6 +209,48 @@ def __getitem__(self, idx):
        
         return sample,idx
 
+class scDatasetDropoutSparse(Dataset):
+    def __init__(self, data=None, discreteTag=False, ratio=0.1, seed=1, transform=None):
+        """
+        Args:
+            Sparse
+            datasetName (String): TGFb, etc.
+            transform (callable, optional):
+        """
+
+        self.featuresOriginal = data.transpose()
+        self.ratio = ratio
+        # Random seed
+        # np.random.uniform(1, 2) 
+        self.features, self.i, self.j, self.ix = impute_dropout(self.featuresOriginal, seed=seed, rate=self.ratio) 
+        # Now lines are cells, and cols are genes
+        # self.features = self.features.transpose()
+        self.transform = transform  
+        # check whether log or not
+        self.discreteTag = discreteTag       
+
+    def __len__(self):
+        return self.features.shape[0]
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+
+        sample = self.features[idx,:]
+        if type(sample)==sp.lil_matrix:
+            sample = torch.from_numpy(sample.toarray())
+        else:
+            sample = torch.from_numpy(sample)
+        
+        # transform after get the data
+        if self.transform:
+            sample = self.transform(sample)
+        
+        if not self.discreteTag:
+            sample = torch.log(sample+1)
+       
+        return sample,idx
+
 class scDataset(Dataset):
     def __init__(self, data=None, transform=None):
         """