juexinwang · juexinwang · Nov 13, 2020 · Nov 13, 2020 · Nov 15, 2020 · Nov 15, 2020
diff --git a/bak/npy2csv_script.py b/bak/npy2csv_script.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pandas as pd
+
+def convert(method='dca'):
+    t=np.load(method+'\\9.Chung_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    t=np.load(method+'\\11.Kolodziejczyk_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    t=np.load(method+'\\12.Klein_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    t=np.load(method+'\\13.Zeisel_0.0_1_recon.npy')
+    df = pd.DataFrame(t)
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convert('dca')
+convert('deepimpute')
+convert('magic')
+convert('netNMFsc')
+convert('saucie')
+convert('saver')
+convert('scimpute')
+convert('scvi')
+
+
+def convertCSV(method='scIGANs'):
+    df = pd.read_csv(method+'\\9.Chung_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_9.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\11.Kolodziejczyk_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_11.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\12.Klein_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_12.csv',header=None,index=False)
+
+    df = pd.read_csv(method+'\\13.Zeisel_0.0_1_recon.csv.txt',sep='\s+',index_col=0)
+    df = df.T
+    df.to_csv(method+'_13.csv',header=None,index=False)
+
+convertCSV('scIGANs')
+
+
diff --git a/otherresults/BAK_MAGIC.py → bak/otherresults/BAK_MAGIC.py b/otherresults/BAK_MAGIC.py → bak/otherresults/BAK_MAGIC.py
diff --git a/otherresults/MAGIC_analysis.sh → bak/otherresults/MAGIC_analysis.sh b/otherresults/MAGIC_analysis.sh → bak/otherresults/MAGIC_analysis.sh
diff --git a/otherresults/MAGIC_analysis_usage.sh → bak/otherresults/MAGIC_analysis_usage.sh b/otherresults/MAGIC_analysis_usage.sh → bak/otherresults/MAGIC_analysis_usage.sh
diff --git a/otherresults/MAGIC_impute.py → bak/otherresults/MAGIC_impute.py b/otherresults/MAGIC_impute.py → bak/otherresults/MAGIC_impute.py
diff --git a/otherresults/MAGIC_impute_usage.py → bak/otherresults/MAGIC_impute_usage.py b/otherresults/MAGIC_impute_usage.py → bak/otherresults/MAGIC_impute_usage.py
diff --git a/otherresults/Other_Results_Evaluation.sh → bak/otherresults/Other_Results_Evaluation.sh b/otherresults/Other_Results_Evaluation.sh → bak/otherresults/Other_Results_Evaluation.sh
diff --git a/otherresults/Other_results_Reading.py → bak/otherresults/Other_results_Reading.py b/otherresults/Other_results_Reading.py → bak/otherresults/Other_results_Reading.py
diff --git a/otherresults/Other_results_celltype.py → bak/otherresults/Other_results_celltype.py b/otherresults/Other_results_celltype.py → bak/otherresults/Other_results_celltype.py
diff --git a/otherresults/Other_results_impute.py → bak/otherresults/Other_results_impute.py b/otherresults/Other_results_impute.py → bak/otherresults/Other_results_impute.py
diff --git a/otherresults/README.md → bak/otherresults/README.md b/otherresults/README.md → bak/otherresults/README.md
diff --git a/otherresults/SAUCIE_analysis.sh → bak/otherresults/SAUCIE_analysis.sh b/otherresults/SAUCIE_analysis.sh → bak/otherresults/SAUCIE_analysis.sh
diff --git a/otherresults/SAUCIE_celltype.py → bak/otherresults/SAUCIE_celltype.py b/otherresults/SAUCIE_celltype.py → bak/otherresults/SAUCIE_celltype.py
diff --git a/otherresults/SAUCIE_impute.py → bak/otherresults/SAUCIE_impute.py b/otherresults/SAUCIE_impute.py → bak/otherresults/SAUCIE_impute.py
diff --git a/otherresults/SAVER_impute.R → bak/otherresults/SAVER_impute.R b/otherresults/SAVER_impute.R → bak/otherresults/SAVER_impute.R
diff --git a/otherresults/SCIMPUTE_impute.R → bak/otherresults/SCIMPUTE_impute.R b/otherresults/SCIMPUTE_impute.R → bak/otherresults/SCIMPUTE_impute.R
diff --git a/otherresults/dca_impute.py → bak/otherresults/dca_impute.py b/otherresults/dca_impute.py → bak/otherresults/dca_impute.py
diff --git a/otherresults/scVi_impute.py → bak/otherresults/scVi_impute.py b/otherresults/scVi_impute.py → bak/otherresults/scVi_impute.py
diff --git a/otherresults/simulation_generator.R → bak/otherresults/simulation_generator.R b/otherresults/simulation_generator.R → bak/otherresults/simulation_generator.R
diff --git a/results/calculateROGUE.R → bak/results/calculateROGUE.R b/results/calculateROGUE.R → bak/results/calculateROGUE.R
diff --git a/results/compare_varID.py → bak/results/compare_varID.py b/results/compare_varID.py → bak/results/compare_varID.py
diff --git a/results/jobinfo_imp_23dropout.txt → bak/results/jobinfo_imp_23dropout.txt b/results/jobinfo_imp_23dropout.txt → bak/results/jobinfo_imp_23dropout.txt
diff --git a/results/jobinfo_imp_explore.txt → bak/results/jobinfo_imp_explore.txt b/results/jobinfo_imp_explore.txt → bak/results/jobinfo_imp_explore.txt
diff --git a/results/jobinfo_imp_louvain_2.txt → bak/results/jobinfo_imp_louvain_2.txt b/results/jobinfo_imp_louvain_2.txt → bak/results/jobinfo_imp_louvain_2.txt
diff --git a/results/results_ROGUE.py → bak/results/results_ROGUE.py b/results/results_ROGUE.py → bak/results/results_ROGUE.py
diff --git a/results/results_Reading.py → bak/results/results_Reading.py b/results/results_Reading.py → bak/results/results_Reading.py
@@ -13,6 +13,7 @@
 args = parser.parse_args()
 
 # Note:
+# Main Check results
 # Generate results in python other than in shell for better organization
 # We are not use runpy.run_path('main_result.py') for it is hard to pass arguments
 # We are not use subprocess.call("python main_result.py", shell=True) for it runs scripts parallel

diff --git a/results/results_Reading_23.py → bak/results/results_Reading_23.py b/results/results_Reading_23.py → bak/results/results_Reading_23.py
diff --git a/results/results_Reading_23dropout.py → bak/results/results_Reading_23dropout.py b/results/results_Reading_23dropout.py → bak/results/results_Reading_23dropout.py
diff --git a/results/results_Reading_explore.py → bak/results/results_Reading_explore.py b/results/results_Reading_explore.py → bak/results/results_Reading_explore.py
diff --git a/results/results_Reading_graph.py → bak/results/results_Reading_graph.py b/results/results_Reading_graph.py → bak/results/results_Reading_graph.py
diff --git a/results/results_imputation.sh → bak/results/results_imputation.sh b/results/results_imputation.sh → bak/results/results_imputation.sh
diff --git a/results/results_imputation_0.3.sh → bak/results/results_imputation_0.3.sh b/results/results_imputation_0.3.sh → bak/results/results_imputation_0.3.sh
diff --git a/results/results_imputation_grid.sh → bak/results/results_imputation_grid.sh b/results/results_imputation_grid.sh → bak/results/results_imputation_grid.sh
diff --git a/results/results_impute.py → bak/results/results_impute.py b/results/results_impute.py → bak/results/results_impute.py
@@ -56,8 +56,8 @@
 dropix           = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_dropix.npy')
 
 featuresImpute   = np.load(args.npyDir+args.datasetName+'_'+args.regulized_type+discreteStr+'_'+args.ratio+'_recon'+args.reconstr+'.npy')
-l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
-print('{:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax), end='')
+l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse = imputation_error_log(featuresImpute, featuresOriginal, features, dropi, dropj, dropix)
+print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} '.format(l1ErrorMean, l1ErrorMedian, l1ErrorMin, l1ErrorMax, rmse), end='')
 
 def imputeResult(inputData):
     '''

diff --git a/results/results_impute_graph_ROC.py → bak/results/results_impute_graph_ROC.py b/results/results_impute_graph_ROC.py → bak/results/results_impute_graph_ROC.py
diff --git a/results/results_impute_graph_ROC.sh → bak/results/results_impute_graph_ROC.sh b/results/results_impute_graph_ROC.sh → bak/results/results_impute_graph_ROC.sh
diff --git a/results/submitCluster_Result_Celltype.sh → bak/results/submitCluster_Result_Celltype.sh b/results/submitCluster_Result_Celltype.sh → bak/results/submitCluster_Result_Celltype.sh
diff --git a/results/submitCluster_Result_Impute.sh → bak/results/submitCluster_Result_Impute.sh b/results/submitCluster_Result_Impute.sh → bak/results/submitCluster_Result_Impute.sh
diff --git a/results/submitCluster_Result_Impute_23.sh → ...results/submitCluster_Result_Impute_23.sh b/results/submitCluster_Result_Impute_23.sh → ...results/submitCluster_Result_Impute_23.sh
diff --git a/.../submitCluster_Result_Impute_23dropout.sh → .../submitCluster_Result_Impute_23dropout.sh b/.../submitCluster_Result_Impute_23dropout.sh → .../submitCluster_Result_Impute_23dropout.sh
diff --git a/...ts/submitCluster_Result_Impute_explore.sh → ...ts/submitCluster_Result_Impute_explore.sh b/...ts/submitCluster_Result_Impute_explore.sh → ...ts/submitCluster_Result_Impute_explore.sh
diff --git a/results/submitCluster_Result_Impute_graph.sh → ...ults/submitCluster_Result_Impute_graph.sh b/results/submitCluster_Result_Impute_graph.sh → ...ults/submitCluster_Result_Impute_graph.sh
diff --git a/results/summary.sh → bak/results/summary.sh b/results/summary.sh → bak/results/summary.sh
diff --git a/results/summary_cmd.py → bak/results/summary_cmd.py b/results/summary_cmd.py → bak/results/summary_cmd.py
diff --git a/benchmark_util.py b/benchmark_util.py
@@ -530,6 +530,7 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - y)
+        rmse = ((x - y)**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -538,8 +539,9 @@ def imputation_error(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - yuse)
+        rmse = ((x - yuse)**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
 
 # IMPUTATION METRICS
@@ -562,6 +564,7 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         all_index = i[ix], j[ix]
         x, y = X_mean[all_index], X[all_index]
         result = np.abs(x - np.log(y+1))
+        rmse = ((x - np.log(y+1))**2/len(result))**0.5
     # If the input is a sparse matrix
     else:
         all_index = i[ix], j[ix]
@@ -570,10 +573,11 @@ def imputation_error_log(X_mean, X, X_zero, i, j, ix):
         yuse = scipy.sparse.lil_matrix.todense(y)
         yuse = np.asarray(yuse).reshape(-1)
         result = np.abs(x - np.log(yuse+1))
+        rmse = ((x - np.log(yuse+1))**2/len(result))**0.5
     # return np.median(np.abs(x - yuse))
-    return np.mean(result), np.median(result), np.min(result), np.max(result)
+    return np.mean(result), np.median(result), np.min(result), np.max(result), np.mean(rmse)
 
-# cosine similarity
+# cosine similarity with log
 def imputation_cosine_log(X_mean, X, X_zero, i, j, ix):
     """
     X_mean: imputed dataset

diff --git a/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py b/codesfromJGandYJ/codeForCellcluster/Run_netNMF_celltype.py
@@ -0,0 +1,71 @@
+# This code has not cleaned yet
+# run netNMF-sc from command line and save outputs to specified directory
+from __future__ import print_function
+import numpy as np
+from warnings import warn
+from joblib import Parallel, delayed
+import copy,argparse,os,math,random,time
+from scipy import sparse, io,linalg
+from scipy.sparse import csr_matrix
+import warnings,os
+from netNMFsc import plot
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
+def main(args):
+    if args.method == 'GD':
+        from netNMFsc import netNMFGD
+        operator = netNMFGD(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+    elif args.method == 'MU':
+        from netNMFsc import netNMFMU
+        operator = netNMFMU(d=args.dimensions, alpha=args.alpha, n_inits=1, tol=args.tol, max_iter=args.max_iters, n_jobs=1)
+
+
+    chung = pd.read_csv(args.filename, header=0,
+                        index_col=0, sep=',')
+    X = chung.values
+    genes = []
+    for gen in chung.index.values:
+        if '.' in gen:
+            genes.append(gen.upper().split('.')[0])
+        else:
+            genes.append(gen.upper())
+    #print(genes)
+    operator.X = X
+    operator.genes = np.asarray(genes)
+    #operator.load_10X(direc=args.tenXdir,genome='mm10')
+    operator.load_network(net=args.network,genenames=args.netgenes,sparsity=args.sparsity)
+    dictW = operator.fit_transform()
+    W, H = dictW['W'], dictW['H']
+    k,clusters = plot.select_clusters(H,max_clusters=20)
+    plot.tSNE(H,clusters,fname=args.direc + '/netNMFsc_tsne')
+    os.system('mkdir -p %s'%(args.direc))
+    np.save(os.path.join(args.direc,'W.npy'),W)
+    np.save(os.path.join(args.direc,'H.npy'),H)
+    np.save(os.path.join(args.direc, 'cluster.npy'), clusters)
+    return
+#/storage/htc/joshilab/jghhd/singlecellTest/netNMFsc/netNMF-sc/netNMFsc/refdata/
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m","--method",help="either 'GD for gradient descent or MU for multiplicative update",type=str,default='GD')
+    parser.add_argument("-f","--filename", help="path to data file (.npy or .mtx)",type=str,default='matrix.mtx')
+    parser.add_argument("-g","--gene_names", help="path to file containing gene names (.npy or .tsv)",type=str,default='gene_names.tsv')
+    parser.add_argument("-net","--network", help="path to network file (.npy or .mtx)",type=str,default='')
+    parser.add_argument("-netgenes","--netgenes", help="path to file containing gene names for network (.npy or .tsv)",type=str,default='')
+    parser.add_argument("-org","--organism", help="mouse or human",type=str,default='human')
+    parser.add_argument("-id","--idtype", help="ensemble, symbol, or entrez",type=str,default='ensemble')
+    parser.add_argument("-netid","--netidtype", help="ensemble, symbol, or entrez",type=str,default='entrez')
+    parser.add_argument("-n","--normalize", help="normalize data? 1 = yes, 0 = no",type=int,default=0)
+    parser.add_argument("-sparse","--sparsity", help="sparsity for network",type=float,default=0.99)
+    parser.add_argument("-mi","--max_iters", help="max iters for netNMF-sc",type=int,default=1500)
+    parser.add_argument("-t","--tol", help="tolerence for netNMF-sc",type=float,default=1e-2)
+    parser.add_argument("-d","--direc", help="directory to save files",default='')
+    parser.add_argument("-D","--dimensions", help="number of dimensions to apply shift",type=int,default = 10)
+    parser.add_argument("-a","--alpha", help="lambda param for netNMF-sc",type=float,default = 1.0)
+    parser.add_argument("-x","--tenXdir", help="data is from 10X. Only required to provide directory containing matrix.mtx, genes.tsv, barcodes.tsv files",type=str,default = '')
+    args = parser.parse_args()
+    main(args)
+
+
+#'/storage/htc/joshilab/jghhd/singlecellTest/Data/11.Kolodziejczyk/Use_expression.csv'
diff --git a/codesfromJGandYJ/impute code/MAGIC_impute.py b/codesfromJGandYJ/impute code/MAGIC_impute.py
diff --git a/codesfromJGandYJ/impute code/SAVER_impute.py b/codesfromJGandYJ/impute code/SAVER_impute.py
diff --git a/codesfromJGandYJ/impute code/SCIMPUTE.py b/codesfromJGandYJ/impute code/SCIMPUTE.py