diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..6c720e9
Binary files /dev/null and b/.DS_Store differ
diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..33cc41e
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,5 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.Rproj.user/4F102347/cpp-compilation-config b/.Rproj.user/4F102347/cpp-compilation-config
new file mode 100644
index 0000000..8779cd8
--- /dev/null
+++ b/.Rproj.user/4F102347/cpp-compilation-config
@@ -0,0 +1,20 @@
+{
+    "args": [
+        "-isystem",
+        "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include",
+        "-I/Library/Developer/CommandLineTools/usr/lib/clang/11.0.0/include",
+        "-I/usr/local/Cellar/llvm/10.0.0_3/bin/../include/c++/v1",
+        "-I/usr/local/Cellar/llvm/10.0.0_3/lib/clang/10.0.0/include",
+        "-I/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include",
+        "-I/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include",
+        "-I/Library/Frameworks/R.framework/Resources/include",
+        "-DNDEBUG",
+        "-isysroot",
+        "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk",
+        "-I/usr/local/include",
+        "-fPIC"
+    ],
+    "pch": "Rcpp",
+    "is_cpp": true,
+    "hash": "15931292361600965640"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/cpp-definition-cache b/.Rproj.user/4F102347/cpp-definition-cache
new file mode 100644
index 0000000..74d5909
--- /dev/null
+++ b/.Rproj.user/4F102347/cpp-definition-cache
@@ -0,0 +1,17 @@
+[
+    {
+        "file": "/rdx/projects/dbsinglecell/src/rcpp_hello_world.cpp",
+        "file_last_write": 1600965640.0,
+        "definitions": [
+            {
+                "usr": "c:@F@rcpp_hello_world#",
+                "kind": 6,
+                "parent_name": "",
+                "name": "rcpp_hello_world",
+                "file": "/rdx/projects/dbsinglecell/src/rcpp_hello_world.cpp",
+                "line": 6,
+                "column": 6
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/debug-breakpoints.pper b/.Rproj.user/4F102347/pcs/debug-breakpoints.pper
new file mode 100644
index 0000000..4893a8a
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/debug-breakpoints.pper
@@ -0,0 +1,5 @@
+{
+    "debugBreakpointsState": {
+        "breakpoints": []
+    }
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/files-pane.pper b/.Rproj.user/4F102347/pcs/files-pane.pper
new file mode 100644
index 0000000..de8e32a
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/files-pane.pper
@@ -0,0 +1,9 @@
+{
+    "sortOrder": [
+        {
+            "columnIndex": 2,
+            "ascending": true
+        }
+    ],
+    "path": "/rdx/projects/dbsinglecell"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/source-pane.pper b/.Rproj.user/4F102347/pcs/source-pane.pper
new file mode 100644
index 0000000..b71dede
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/source-pane.pper
@@ -0,0 +1,5 @@
+{
+    "activeTab": 0,
+    "activeTabSourceWindow1": 0,
+    "activeTabSourceWindow2": 0
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/source-window.pper b/.Rproj.user/4F102347/pcs/source-window.pper
new file mode 100644
index 0000000..71a5c65
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/source-window.pper
@@ -0,0 +1,11 @@
+{
+    "sourceWindowGeometry": {
+        "w18o69s9ak1jd": {
+            "ordinal": 1,
+            "x": 240,
+            "y": 0,
+            "width": 800,
+            "height": 778
+        }
+    }
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/windowlayoutstate.pper b/.Rproj.user/4F102347/pcs/windowlayoutstate.pper
new file mode 100644
index 0000000..7e3fed9
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/windowlayoutstate.pper
@@ -0,0 +1,14 @@
+{
+    "left": {
+        "splitterpos": 319,
+        "topwindowstate": "NORMAL",
+        "panelheight": 762,
+        "windowheight": 800
+    },
+    "right": {
+        "splitterpos": 485,
+        "topwindowstate": "NORMAL",
+        "panelheight": 762,
+        "windowheight": 800
+    }
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/pcs/workbench-pane.pper b/.Rproj.user/4F102347/pcs/workbench-pane.pper
new file mode 100644
index 0000000..7ed1b52
--- /dev/null
+++ b/.Rproj.user/4F102347/pcs/workbench-pane.pper
@@ -0,0 +1,5 @@
+{
+    "TabSet1": 2,
+    "TabSet2": 2,
+    "TabZoom": {}
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/persistent-state b/.Rproj.user/4F102347/persistent-state
new file mode 100644
index 0000000..2d203af
--- /dev/null
+++ b/.Rproj.user/4F102347/persistent-state
@@ -0,0 +1,8 @@
+build-last-errors="[]"
+build-last-errors-base-dir="/rdx/projects/dbsinglecell/"
+build-last-outputs="[{\"type\":0,\"output\":\"==> Rcpp::compileAttributes()\\n\\n\"},{\"type\":1,\"output\":\"* Updated R/RcppExports.R\\n\"},{\"type\":1,\"output\":\"\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --preclean --no-multiarch --with-keep.source dbsinglecell\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/private/var/folders/sd/l2qc9w6513l7zmfnj_v27wmc0000gn/T/RtmpKJnFOE/temp_libpath11b424050aaa’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘dbsinglecell’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"** libs\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -I\\\"/Library/Frameworks/R.framework/Resources/include\\\" -DNDEBUG  -I\\\"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include\\\" -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include  -fPIC  -Wall -g -O2  -c RcppExports.cpp -o RcppExports.o\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -I\\\"/Library/Frameworks/R.framework/Resources/include\\\" -DNDEBUG  -I\\\"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include\\\" -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include  -fPIC  -Wall -g -O2  -c rcpp_hello_world.cpp -o rcpp_hello_world.o\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/usr/local/Cellar/llvm/10.0.0_3/lib -o dbsinglecell.so RcppExports.o rcpp_hello_world.o -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"installing to /private/var/folders/sd/l2qc9w6513l7zmfnj_v27wmc0000gn/T/RtmpKJnFOE/temp_libpath11b424050aaa/00LOCK-dbsinglecell/00new/dbsinglecell/libs\\n\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** checking absolute paths in shared objects and dynamic libraries\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (dbsinglecell)\\n\"},{\"type\":1,\"output\":\"\"}]"
+compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
+files.monitored-path=""
+find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
+imageDirtyState="1"
+saveActionState="0"
diff --git a/.Rproj.user/4F102347/rmd-outputs b/.Rproj.user/4F102347/rmd-outputs
new file mode 100644
index 0000000..3f2ff2d
--- /dev/null
+++ b/.Rproj.user/4F102347/rmd-outputs
@@ -0,0 +1,5 @@
+
+
+
+
+
diff --git a/.Rproj.user/4F102347/saved_source_markers b/.Rproj.user/4F102347/saved_source_markers
new file mode 100644
index 0000000..2b1bef1
--- /dev/null
+++ b/.Rproj.user/4F102347/saved_source_markers
@@ -0,0 +1 @@
+{"active_set":"","sets":[]}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/051E5C13 b/.Rproj.user/4F102347/sources/prop/051E5C13
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/051E5C13
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/133D5E34 b/.Rproj.user/4F102347/sources/prop/133D5E34
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/133D5E34
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/1443B4E2 b/.Rproj.user/4F102347/sources/prop/1443B4E2
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/1443B4E2
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/14D99F79 b/.Rproj.user/4F102347/sources/prop/14D99F79
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/14D99F79
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/1DD97DDE b/.Rproj.user/4F102347/sources/prop/1DD97DDE
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/1DD97DDE
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/2EA126F7 b/.Rproj.user/4F102347/sources/prop/2EA126F7
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/2EA126F7
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/37189DC7 b/.Rproj.user/4F102347/sources/prop/37189DC7
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/37189DC7
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/377C7BA5 b/.Rproj.user/4F102347/sources/prop/377C7BA5
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/377C7BA5
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/38FF06E6 b/.Rproj.user/4F102347/sources/prop/38FF06E6
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/38FF06E6
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/3C93B1F8 b/.Rproj.user/4F102347/sources/prop/3C93B1F8
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/3C93B1F8
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/3FB33404 b/.Rproj.user/4F102347/sources/prop/3FB33404
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/3FB33404
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/4581755F b/.Rproj.user/4F102347/sources/prop/4581755F
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/4581755F
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/4F76852F b/.Rproj.user/4F102347/sources/prop/4F76852F
new file mode 100644
index 0000000..8a4e166
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/4F76852F
@@ -0,0 +1,3 @@
+{
+    "source_window_id": "w18o69s9ak1jd"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/59DFEF3E b/.Rproj.user/4F102347/sources/prop/59DFEF3E
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/59DFEF3E
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/64FDE598 b/.Rproj.user/4F102347/sources/prop/64FDE598
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/64FDE598
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/69252159 b/.Rproj.user/4F102347/sources/prop/69252159
new file mode 100644
index 0000000..62762cb
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/69252159
@@ -0,0 +1,3 @@
+{
+    "source_window_id": "wy4ve70vn7zpp"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/741250F7 b/.Rproj.user/4F102347/sources/prop/741250F7
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/741250F7
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/75394C5A b/.Rproj.user/4F102347/sources/prop/75394C5A
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/75394C5A
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/7A8FA062 b/.Rproj.user/4F102347/sources/prop/7A8FA062
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/7A8FA062
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/8695D2EE b/.Rproj.user/4F102347/sources/prop/8695D2EE
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/8695D2EE
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/896AF6F0 b/.Rproj.user/4F102347/sources/prop/896AF6F0
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/896AF6F0
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/8F8E8389 b/.Rproj.user/4F102347/sources/prop/8F8E8389
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/8F8E8389
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/9122B8A0 b/.Rproj.user/4F102347/sources/prop/9122B8A0
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/9122B8A0
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/AB64079C b/.Rproj.user/4F102347/sources/prop/AB64079C
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/AB64079C
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/ADDF398F b/.Rproj.user/4F102347/sources/prop/ADDF398F
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/ADDF398F
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/B343C981 b/.Rproj.user/4F102347/sources/prop/B343C981
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/B343C981
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/B497A198 b/.Rproj.user/4F102347/sources/prop/B497A198
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/B497A198
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/B5AFB4CD b/.Rproj.user/4F102347/sources/prop/B5AFB4CD
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/B5AFB4CD
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/B7BAAE89 b/.Rproj.user/4F102347/sources/prop/B7BAAE89
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/B7BAAE89
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/C046BAAF b/.Rproj.user/4F102347/sources/prop/C046BAAF
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/C046BAAF
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/C230E935 b/.Rproj.user/4F102347/sources/prop/C230E935
new file mode 100644
index 0000000..72a3a92
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/C230E935
@@ -0,0 +1,3 @@
+{
+    "tempName": "Untitled1"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/CD8192A0 b/.Rproj.user/4F102347/sources/prop/CD8192A0
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/CD8192A0
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/DC147442 b/.Rproj.user/4F102347/sources/prop/DC147442
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/DC147442
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/E0576C72 b/.Rproj.user/4F102347/sources/prop/E0576C72
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/E0576C72
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/E059CE59 b/.Rproj.user/4F102347/sources/prop/E059CE59
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/E059CE59
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/E44D9DA6 b/.Rproj.user/4F102347/sources/prop/E44D9DA6
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/E44D9DA6
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/ED610C92 b/.Rproj.user/4F102347/sources/prop/ED610C92
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/ED610C92
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/ED69A4CA b/.Rproj.user/4F102347/sources/prop/ED69A4CA
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/ED69A4CA
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/F731FB4B b/.Rproj.user/4F102347/sources/prop/F731FB4B
new file mode 100644
index 0000000..72a3a92
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/F731FB4B
@@ -0,0 +1,3 @@
+{
+    "tempName": "Untitled1"
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/F934CB88 b/.Rproj.user/4F102347/sources/prop/F934CB88
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/F934CB88
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/prop/INDEX b/.Rproj.user/4F102347/sources/prop/INDEX
new file mode 100644
index 0000000..5b12250
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/prop/INDEX
@@ -0,0 +1,40 @@
+%2Frdx%2Fprojects%2FGeCKO%2F.travis.yml="ED610C92"
+%2Frdx%2Fprojects%2FGeCKO%2FDESCRIPTION="1DD97DDE"
+%2Frdx%2Fprojects%2FGeCKO%2FNAMESPACE="7A8FA062"
+%2Frdx%2Fprojects%2FGeCKO%2F_pkgdown.yml="E0576C72"
+%2Frdx%2Fprojects%2FPerNiche%2FDESCRIPTION="14D99F79"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2FNewMeta.R="E059CE59"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fbetter.R="69252159"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fcreate_merge.R="38FF06E6"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fdb_read10x.R="C046BAAF"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fhdbscan.R="4F76852F"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fplot_umap.R="B7BAAE89"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2FscRNA_helpers.R="377C7BA5"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fumap-learn.R="133D5E34"
+%2Frdx%2Fprojects%2FPerNiche%2FR%2Fumap.R="CD8192A0"
+%2Frdx%2Fprojects%2FQE%2FR%2FCellPhoneDB.R="64FDE598"
+%2Frdx%2Fprojects%2FQE%2FR%2FPerNiche%2F02_dimReduction.R="E44D9DA6"
+%2Frdx%2Fprojects%2FQE%2FR%2FPerNiche%2Fcomplete2.R="AB64079C"
+%2Frdx%2Fprojects%2FQE%2Fdocs%2FPerNiche.Rmd="2EA126F7"
+%2Frdx%2Fprojects%2Fdb_singlecell%2FDESCRIPTION="37189DC7"
+%2Frdx%2Fprojects%2Fdb_singlecell%2FNAMESPACE="ED69A4CA"
+%2Frdx%2Fprojects%2Fdb_singlecell%2FR%2Fcellphonedb_utilities.R="C230E935"
+%2Frdx%2Fprojects%2Fdb_singlecell%2FRead-and-delete-me="8F8E8389"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FDESCRIPTION="B343C981"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FNAMESPACE="741250F7"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2FNewMeta.R="DC147442"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fbetter.R="1443B4E2"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fcellphonedb_utilities.R="9122B8A0"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fcreate_merge.R="8695D2EE"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fdb_read10x.R="4581755F"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fhdbscan.R="F934CB88"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Forganize_10x.R="B5AFB4CD"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fplot_umap.R="B497A198"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Freticulate_helpers.R="F731FB4B"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2FscRNA_helpers.R="3C93B1F8"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fumap-learn.R="051E5C13"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fumap.R="75394C5A"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FREADME.md="3FB33404"
+%2Frdx%2Fprojects%2Fdbsinglecell%2FRead-and-delete-me="59DFEF3E"
+%2Frdx%2Fprojects%2Fdbsinglecell%2F_pkgdown.yml="ADDF398F"
+%2Frdx%2Fprojects%2Fdbsinglecell%2Fman%2Fcellphonedb_summary.Rd="896AF6F0"
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents
new file mode 100644
index 0000000..7671da6
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents
@@ -0,0 +1,19 @@
+# Generated by roxygen2: do not edit by hand
+
+export(HDBSCAN)
+export(HDBSCAN.Seurat)
+export(NewMeta)
+export(cellphonedb_summary)
+export(clustUMAP)
+export(create_seurat)
+export(db_read10x)
+export(gg_umap)
+export(hcl_umap)
+export(hue_umap)
+export(pal_umap)
+export(pre_processing)
+export(rbw_umap)
+export(read10x)
+export(read10x_atlas)
+export(umap)
+export(visUMAP)
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents
new file mode 100644
index 0000000..711ea0d
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents
@@ -0,0 +1,176 @@
+#' Custom Palette for UMAP
+#'
+#' @param object
+#' @param group_col
+#' @param base_col
+#' @param jitter  randomize the colors
+#'
+#' @return
+#' @export
+#' @importFrom colortools setColors
+#' @import viridis
+#'
+#' @examples
+#'
+pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+
+
+  if(jitter){
+    new_order <- unlist(sapply(1:5, function(x) seq(x, n,5)))
+  }
+  pal <- c(pal, colortools::setColors(base_col,n))[new_order]
+  return(pal)
+}
+
+#' UMAP Palette using HCL presets
+#'
+#' @param object
+#' @param group_col
+#' @param hcl_pal
+#' @param jitter
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order])
+  return(pal)
+}
+
+#' UMAP palette using rainbow colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order])
+  return(pal)
+}
+
+#' UMAP Palette using soft hues
+#'
+#' @param object
+#' @param group_col
+#' @param jitter integer setting the color complementary to be used
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @import colorspace
+#'
+#' @examples
+hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order])
+  return(pal)
+}
+
+gg_color_hue <- function(n) {
+  hues = seq(15, 375, length = n + 1)
+  hcl(h = hues, l = 65, c = 100)[1:n]
+}
+
+#' UMAP Palette using ggplot2 colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementarity to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, gg_color_hue(n)[new_order])
+  return(pal)
+}
+
+hex_convert <- function(x){
+  if(x>=256) stop()
+  tmp <- c(0:9, LETTERS[1:6])
+
+  first <- floor(x/16)
+
+  first <- ifelse(first==16, 15, first)
+  second <- x - first*16
+  res <- paste0(tmp[first+1], tmp[second+1])
+  return(res)
+}
+
+hex_convert <- Vectorize(hex_convert)
+
+incA <- function(n, min = 0, base = '#E1E1E1'){
+  low <- hex_convert(min/100*255)
+  c1 <- paste0(base, low)
+
+  res <- c(c1,paste0(
+    substring(viridis::plasma(n), 1, 7),
+    hex_convert(seq(min/100*255,255, length.out = n))))
+  return(res)
+}
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents
new file mode 100644
index 0000000..ac2ced4
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents
@@ -0,0 +1,11 @@
+# global reference to scipy (will be initialized in .onLoad)
+scipy <- NULL
+
+.onLoad <- function(libname, pkgname) {
+  # use superassignment to update global reference to scipy
+  scipy <<- reticulate::import("scipy", delay_load = TRUE)
+}
+
+install_python_packages <- function(method = "auto", conda = "auto") {
+  reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents
new file mode 100644
index 0000000..6277ef3
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents
@@ -0,0 +1,17 @@
+#' Easy Add Meta data to Seurat Object
+#'
+#' @param object
+#' @param meta
+#' @param col.name
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+NewMeta <- function(object, meta, col.name){
+  test <- meta[as.character(Seurat::Idents(object))]
+  names(test) <- colnames(object)
+  result <- Seurat::AddMetaData(object, test, col.name)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents
new file mode 100644
index 0000000..3c7df6f
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents
@@ -0,0 +1,89 @@
+prep_cellphonedb <- function(rds){
+  require(data.table)
+  require(Seurat)
+  require(Matrix)
+  setDTthreads(20)
+  pn <- readRDS('../../Peritoneal_Niche.rds')
+  require(Matrix)
+  
+  # exporting normalized data
+  mat <- pn@assays$RNA@data[, Cells(pn)]
+  
+  # creating i,j,x format
+  mm.sum <- summary(mat)
+  
+  # creating workable dataset of count data
+  res <- data.table::data.table(Genes = rownames(mat)[mm.sum$i], Cell = colnames(mat)[mm.sum$j], Count = mm.sum$x)
+  # create vector with values interested to use for cell_type 
+  new.meta <- pn@meta.data[,'cell_subset']
+  names(new.meta) <- rownames(pn@meta.data)
+  
+  # add cell_types to res
+  res[,cell_subset:=new.meta[res$Cell]]
+  setkey(res, Genes, cell_subset)
+  
+  # generate summary information to be used for filtering uninformative genes
+  test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes','cell_subset')]
+  test[,total:=sum(N),Genes]
+  test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+  
+  # create vector with leftover genes
+  gl <- unique(test$Genes)
+  length(gl)
+  
+  # subset count dataset
+  res <- res[Genes %in% gl]
+  
+  # create counts file
+  counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+  colnames(counts)[1] <- 'Gene'
+  setkey(counts, Gene)
+  
+  m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+  mz_genes <- m2h$Ensembl_gene_id
+  names(mz_genes) <- m2h$mouse
+  new_genes <- mz_genes[counts$Gene]
+  names(new_genes) <- counts$Gene
+  new_genes <- new_genes[!is.na(new_genes)]
+  
+  dim(counts)
+  counts <- counts[Gene  %in% names(new_genes)]
+  counts[,Gene:=new_genes[Gene]]
+  
+  # create meta file
+  meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+  meta <- meta[Cell %in% colnames(counts)[-1]]
+  fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+  
+  fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)  
+}
+
+sparse2DT <- function(matrix){
+  require(data.table)
+  require(Seurat)
+  require(Matrix)
+  
+  require(Matrix)
+  # creating i,j,x format
+  mm.sum <- summary(matrix)
+  
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+return(result)
+}
+
+
+
+sparse2DT.Seurat <- function(object){
+  require(data.table)
+  require(Seurat)
+  require(Matrix)
+  
+  require(Matrix)
+  
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+  
+  result <- sparse2DT(mat)
+  return(result)
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents
new file mode 100644
index 0000000..51355f8
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents
@@ -0,0 +1,156 @@
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param x
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param prediction_data  not sure what this is for. Will update later.
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#'
+#' @examples
+HDBSCAN <- function(x,
+                    algorithm='best',
+                    alpha=1.0,
+                    approx_min_span_tree = TRUE,
+                    gen_min_span_tree=FALSE,
+                    leaf_size=40,
+                    metric='euclidean',
+                    prediction_data=TRUE,
+                    min_cluster_size =50,
+                    min_samples = 1,
+                    cluster_selection_epsilon = 0.5,
+                    cluster_selection_method = 'leaf',
+                    nThreads = parallel::detectCores()
+){
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm = algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+
+
+
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  return(result)
+}
+
+
+
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param object
+#' @param reduction
+#' @param dims
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param return_seurat  logical to return the result within the orignal object or as the raw HDBSCAN result
+#' @param prediction_data not sure what this is for. Will update later.
+#'
+#' @return
+#' @export
+#'
+#' @examples
+HDBSCAN.Seurat <- function(object,
+                           reduction = 'umap',
+                           dims = NULL,
+                           algorithm='best',
+                           alpha=1.0,
+                           prediction_data = TRUE,
+                           approx_min_span_tree = TRUE,
+                           gen_min_span_tree=FALSE,
+                           leaf_size=40,
+                           metric='euclidean',
+                           min_cluster_size =50,
+                           min_samples = 1,
+                           cluster_selection_epsilon = 0.5,
+                           cluster_selection_method = 'leaf',
+                           nThreads = parallel::detectCores(),
+                           return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    x <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    x <- Seurat::Embeddings(object, reduction = reduction)[,dims]
+  }
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm=algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  if(return_seurat){
+    object@misc$hdbscan <- result
+    object$cl <- factor(clusterer$labels_)
+    return(object)
+  } else {
+    return(result)
+  }
+
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents
new file mode 100644
index 0000000..44533d2
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents
@@ -0,0 +1,15 @@
+# global reference to scipy (will be initialized in .onLoad)
+scipy <- NULL
+hdbscan <- NULL
+umap <- NULL
+
+.onLoad <- function(libname, pkgname) {
+  # use superassignment to update global reference to scipy
+  scipy <<- reticulate::import("scipy", delay_load = TRUE)
+  hdbscan <<- reticulate::import('hdbscan', delay_load = TRUE)
+  umap <<- reticulate::import('umap', delay_load = TRUE)
+}
+
+install_python_packages <- function(method = "auto", conda = "auto") {
+  reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents
new file mode 100644
index 0000000..1768c36
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents
@@ -0,0 +1,132 @@
+#' Simple method for creating Seurat Objects
+#'
+#' @param filepath
+#' @param sample
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+create_seurat <- function(filepath, sample = NULL ){
+  if(is.null(sample)){
+    sample <- basename(filepath)
+  }
+
+  # read in 10X data
+  x <- Seurat::Read10X(data.dir = filepath)
+
+  # create unique cell ids
+  cell_ids <- paste0(sample, '_', colnames(x))
+  colnames(x) <-cell_ids
+
+  # create Seurat Object and include meta data
+  suppressWarnings({
+    res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample)
+  })
+
+  return(res)
+}
+
+#' Seurat Preprocessing
+#'
+#' @param object
+#' @param species
+#' @param nfeatures
+#' @param npcs
+#'
+#' @return
+#' @export
+#' @import Seurat
+#' @importFrom stringr str_to_title
+#' @import crayon
+#'
+#' @examples
+pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){
+  if(species == 'Homo sapiens'){
+    mt_pattern <- '^MT-'
+  } else {
+    mt_pattern <- '^mt-'
+  }
+
+  object <- Seurat::PercentageFeatureSet(object,
+                                 pattern = mt_pattern,
+                                 col.name = "percent.mt")
+
+  message_section('Filtering out low quality cells and doublets')
+
+  # Removing low quality cells and doublets
+  object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100)
+
+  message_section('Normalizing data')
+  # Normalization
+  object<- Seurat::NormalizeData(object, verbose = TRUE)
+  # Variable Features
+
+  message_section(paste('Finding',nfeatures,'most variable fatures'))
+  object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures)
+
+  if(species =='Mus musculus'){ # change gene name format to title capitalization
+    ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+    ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+  } else { # use build in gene names
+    ccss <- cc.genes.updated.2019$s.genes
+    ccg2m <- cc.genes.updated.2019$g2m.genes
+  }
+
+  # scoring function
+  object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m)
+
+  # difference between s and g2m scores
+  object$CC.Difference <-object$S.Score -object$G2M.Score
+
+  message_section('Scaling data')
+  # Scaling Data ----
+  object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt'))
+
+  message_section('Performing PCA')
+  message_append(paste('using npcs =',npcs))
+  # PCA ----
+  object<- Seurat::RunPCA(
+    object,
+    pc.genes =object@var.genes,
+    npcs = npcs)
+  return(object)
+
+}
+
+message_section <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+  cat("\n",rep('-',n), "\n",sep = '')
+  cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n')
+}
+
+message_task <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n -22){
+    cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n')
+  } else {
+    cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n')
+  }
+
+}
+
+message_append <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n - 22){
+    invisible()
+  } else {
+    cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '')
+  }
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents
new file mode 100644
index 0000000..4016df9
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cellphonedb_utilities.R
+\name{cellphonedb_summary}
+\alias{cellphonedb_summary}
+\title{CellPhoneDB Summary File}
+\usage{
+cellphonedb_summary(path, pvalue = "all")
+}
+\arguments{
+\item{pvalue}{}
+}
+\value{
+
+}
+\description{
+CellPhoneDB Summary File
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents
new file mode 100644
index 0000000..4164644
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents
@@ -0,0 +1,154 @@
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param x
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#'
+#' @examples
+HDBSCAN <- function(x,
+                    algorithm='best',
+                    alpha=1.0,
+                    approx_min_span_tree = TRUE,
+                    gen_min_span_tree=FALSE,
+                    leaf_size=40,
+                    metric='euclidean',
+                    prediction_data=TRUE,
+                    min_cluster_size =50,
+                    min_samples = 1,
+                    cluster_selection_epsilon = 0.5,
+                    cluster_selection_method = 'leaf',
+                    nThreads = parallel::detectCores()
+){
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm = algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+
+
+
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  return(result)
+}
+
+
+
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param object
+#' @param reduction
+#' @param dims
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param return_seurat
+#'
+#' @return
+#' @export
+#'
+#' @examples
+HDBSCAN.Seurat <- function(object,
+                           reduction = 'umap',
+                           dims = NULL,
+                           algorithm='best',
+                           alpha=1.0,
+                           prediction_data = TRUE,
+                           approx_min_span_tree = TRUE,
+                           gen_min_span_tree=FALSE,
+                           leaf_size=40,
+                           metric='euclidean',
+                           min_cluster_size =50,
+                           min_samples = 1,
+                           cluster_selection_epsilon = 0.5,
+                           cluster_selection_method = 'leaf',
+                           nThreads = parallel::detectCores(),
+                           return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    x <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    x <- Seurat::Embeddings(object, reduction = reduction)[,dims]
+  }
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm=algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  if(return_seurat){
+    object@misc$hdbscan <- result
+    object$cl <- factor(clusterer$labels_)
+    return(object)
+  } else {
+    return(result)
+  }
+
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents
new file mode 100644
index 0000000..848aa5c
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents
@@ -0,0 +1,145 @@
+#' CellPhoneDB Summary File
+#'
+#' @param path the directory containing the CellPhoneDB Output
+#' @param pvalue setting this will return results less than it
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' @import data.table
+#' @import Matrix
+#' @import crayon
+#'
+cellphonedb_summary <- function(path, pvalue = 'all'){
+  means <- data.table::fread(file.path(path,'means.txt'))
+  pvalues <- data.table::fread(file.path(path, 'pvalues.txt'))
+  id.vars <- colnames(means)[1:11]
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files'))))
+  means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean')
+  pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue')
+
+  data.table::setkeyv(means, c('cell_pair',id.vars))
+  data.table::setkeyv(pvalues, c('cell_pair',id.vars))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets'))))
+  result <- data.table::merge.data.table(means, pvalues)
+  result <- as.data.table(result)
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs'))))
+ int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair,
+            data.table::data.table(gA = character(),
+                                   gB = character()))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs'))))
+ cell_pair <- strcapture('(.+)\\|(.+)',
+                         result$cell_pair,
+                         data.table::data.table(cell_a = character(),
+                                                      cell_b = character()))
+
+ result <- data.table(cell_pair, int_pairs, result)
+ if(pvalue=='significant'){
+   result <- result[pvalue<0.05]
+ }
+  cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished'))))
+
+  return(result)
+}
+
+
+# prep_cellphonedb <- function(rds, meta_column, path){
+#   require(data.table)
+#   require(Seurat)
+#
+#   object <- readRDS(rds)
+#
+#   res <- sparse2DT.Seurat(object)
+#
+#   new.meta <- object@meta.data[,meta_column]
+#   names(new.meta) <- rownames(object@meta.data)
+#
+#   # add cell_types to res
+#   res[,cell_subset:=new.meta[res$Cell]]
+#   data.table::setkey(res, Genes, cell_subset)
+#
+#   # generate summary information to be used for filtering uninformative genes
+#   test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)]
+#   test[,total:=sum(N),Genes]
+#   test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+#
+#   # create vector with leftover genes
+#   gl <- unique(test$Genes)
+#
+#   # subset count dataset
+#   res <- res[Genes %in% gl]
+#
+#   # create counts file
+#   counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+#   colnames(counts)[1] <- 'Gene'
+#   setkey(counts, Gene)
+#
+#   m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+#   mz_genes <- m2h$Ensembl_gene_id
+#   names(mz_genes) <- m2h$mouse
+#   new_genes <- mz_genes[counts$Gene]
+#   names(new_genes) <- counts$Gene
+#   new_genes <- new_genes[!is.na(new_genes)]
+#
+#   dim(counts)
+#   counts <- counts[Gene  %in% names(new_genes)]
+#   counts[,Gene:=new_genes[Gene]]
+#
+#   # create meta file
+#   meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+#   meta <- meta[Cell %in% colnames(counts)[-1]]
+#   fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+#
+#   fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)
+# }
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param matrix  sparse matrix to be used
+#'
+#' @return
+#' @export
+#'
+#' @import data.table
+#' @import Seurat
+#' @import Matrix
+#'
+#' @examples
+#'
+#'
+sparse2DT <- function(matrix){
+
+  # creating i,j,x format
+  mm.sum <- Matrix::summary(matrix)
+
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+  return(result)
+}
+
+
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param object Seurat object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Seurat
+#' @importMatrix
+#'
+#' @examples
+sparse2DT.Seurat <- function(object){
+
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+
+  result <- sparse2DT(mat)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents
new file mode 100644
index 0000000..8f94962
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents
@@ -0,0 +1,10 @@
+* Edit the help file skeletons in 'man', possibly combining help
+  files for multiple functions.
+* Edit the exports in 'NAMESPACE', and add necessary imports.
+* Put any C/C++/Fortran code in 'src'.
+* If you have compiled code, add a useDynLib() directive to
+  'NAMESPACE'.
+* Run R CMD build to build the package tarball.
+* Run R CMD check to check the package tarball.
+
+Read "Writing R Extensions" for more information.
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents
new file mode 100644
index 0000000..8165dea
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents
@@ -0,0 +1,53 @@
+#' Reorganize another person's mess into a usable 10X dataset
+#'
+#' @param x
+#'
+#' @return
+#' @export
+#' @import data.table
+#'
+#' @examples
+organize_10x <- function(x ){
+  path_main <- x
+  file_list <- dir(path = x, full.names = T)
+  file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)]
+
+  res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list),
+                               proto = data.table::data.table(accession_id = character(),
+                                                  file_type = character())))
+  res$old_path <- file_list
+  res$old_name<- basename(file_list)
+  res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz'
+  res$new_folder <- file.path(path_main,paste0(res$accession_id))
+  res$new_path <- file.path(res$new_folder, res$file_type)
+
+  new_dirs <- unique(res$new_folder)
+
+  length(file_list)
+  pb <- progress::progress_bar$new(
+    format = "  [:bar] :percent eta: :eta",
+    clear = FALSE, total = length(file_list), width = 80)
+
+
+  for(i in new_dirs){
+
+    if(!dir.exists(i)){
+      dir.create(i)
+    }
+
+    f2m <- res[new_folder==i]
+
+    for(j in 1:nrow(f2m)){
+    pb$tick()
+      file.copy(f2m[j,old_path ],f2m[j,new_path] )
+    }
+
+  }
+
+for(i in file_list){
+  file.remove(i)
+}
+
+}
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents
new file mode 100644
index 0000000..aca7d14
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents
@@ -0,0 +1,156 @@
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param x
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param prediction_data
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#'
+#' @examples
+HDBSCAN <- function(x,
+                    algorithm='best',
+                    alpha=1.0,
+                    approx_min_span_tree = TRUE,
+                    gen_min_span_tree=FALSE,
+                    leaf_size=40,
+                    metric='euclidean',
+                    prediction_data=TRUE,
+                    min_cluster_size =50,
+                    min_samples = 1,
+                    cluster_selection_epsilon = 0.5,
+                    cluster_selection_method = 'leaf',
+                    nThreads = parallel::detectCores()
+){
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm = algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+
+
+
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  return(result)
+}
+
+
+
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param object
+#' @param reduction
+#' @param dims
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param return_seurat  logical to return the result within the orignal object or as the raw HDBSCAN result
+#' @param prediction_data
+#'
+#' @return
+#' @export
+#'
+#' @examples
+HDBSCAN.Seurat <- function(object,
+                           reduction = 'umap',
+                           dims = NULL,
+                           algorithm='best',
+                           alpha=1.0,
+                           prediction_data = TRUE,
+                           approx_min_span_tree = TRUE,
+                           gen_min_span_tree=FALSE,
+                           leaf_size=40,
+                           metric='euclidean',
+                           min_cluster_size =50,
+                           min_samples = 1,
+                           cluster_selection_epsilon = 0.5,
+                           cluster_selection_method = 'leaf',
+                           nThreads = parallel::detectCores(),
+                           return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    x <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    x <- Seurat::Embeddings(object, reduction = reduction)[,dims]
+  }
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm=algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  if(return_seurat){
+    object@misc$hdbscan <- result
+    object$cl <- factor(clusterer$labels_)
+    return(object)
+  } else {
+    return(result)
+  }
+
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents
new file mode 100644
index 0000000..3a20887
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents
@@ -0,0 +1,132 @@
+#' UWOT-UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose
+#'
+#' @return
+#' @export
+#' @import uwot
+#' @import Seurat
+#'
+#' @examples
+visUMAP <- function(object,
+                    reduction = 'harmony',
+                    spread = 1,
+                    n_components = 2,
+                    min_dist = 0.3,
+                    metric = 'cosine',
+                    n_neighbors = 30,
+                    set_op_mix_ratio = 1,
+                    local_connectivity = 1,
+                    repulsion_strength = 1,
+                    negative_sample_rate = 5,
+                    n_threads =  parallel::detectCores()-1,
+                    reduction_name = 'umap',
+                    return_seurat = TRUE,
+                    verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  umap_res <-   uwot::umap(embds,
+                           spread = 1,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           metric = metric,
+                           n_threads = n_threads,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate
+  )
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
+
+
+#' UWOT-UAMP: Clustering Specific UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose
+#'
+#' @return
+#' @export
+#'
+#' @examples
+clustUMAP <- function(object,
+                      reduction = 'harmony',
+                      spread = 1.1,
+                      n_components = NULL,
+                      min_dist = 0,
+                      metric = 'cosine',
+                      n_neighbors = 50,
+                      set_op_mix_ratio = 1,
+                      local_connectivity = 1,
+                      repulsion_strength = 1,
+                      negative_sample_rate = 5,
+                      n_threads = parallel::detectCores()-1,
+                      reduction_name = 'umap',
+                      return_seurat = TRUE,
+                      verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  if(is.null(n_components)){
+    n_components <- ncol(embds)
+  }
+  umap_res <-   uwot::umap(embds,
+                           spread = spread,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           n_threads = n_threads,
+                           metric = metric,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate,
+                           verbose = verbose
+  )
+
+  rownames(umap_res) <- rownames(embds)
+  colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res))
+
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res,
+                                                           key = 'clustUMAP_',
+                                                           assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents
new file mode 100644
index 0000000..d6c923b
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents
@@ -0,0 +1,181 @@
+#' Read10x v1
+#'
+#' @param path
+#' @param return.sce
+#'
+#' @return
+#' @export
+#'
+#' @examples
+read10x  <- function(path, return.sce = TRUE){
+  require(data.table, quietly = TRUE)
+  require(Matrix, quietly = TRUE)
+  require(SingleCellExperiment, quietly = TRUE)
+  fl <- dir(path)
+
+# reads in matrix file ----------------------------------------------------
+    mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                                skip = 3,
+                                col.names = c('i','j','value'),
+                                colClasses = c('integer','integer','integer'),
+                                header = FALSE)
+
+# imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                                    header = FALSE,
+                                    colClasses = 'character')$V1
+
+# imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V2
+
+
+# duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table::data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+ res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene,barcode))
+
+ if(return.sce){
+   SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta)
+ } else {
+   return(res)
+ }
+
+}
+
+#' Read10x v2
+#'
+#' @param filepaths
+#' @param project
+#' @param meta
+#'
+#' @return
+#' @export
+#'
+#' @examples
+read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){
+  require(doParallel,quietly = TRUE)
+  require(foreach, quietly = TRUE)
+  int_list <- 1:length(filepaths)
+
+# checking meta data ------------------------------------------------------
+  #  if(is.null(meta)){
+  #   meta = list()
+  # } else if(nrow(meta)!=length(filepaths)){
+  #   stop('meta data needs to be the same length as filepaths')
+  # } else {
+  #   meta <- as.list(meta)
+  # }
+
+# setting project vector --------------------------------------------------
+  # if(length(project)!=length(filepaths)){
+  #   if( length(project) == 1){
+  #     project <- rep(project, times = length(filepaths))
+  #   } else {
+  #     stop('supply either one project or a vector the same length as filepaths')
+  #   }
+  # }
+
+# creating cluster and registering doSNOW ---------------------------------
+  numCores <- parallel::detectCores() -1
+  cl <- snow::makeCluster(numCores)
+  doSNOW::registerDoSNOW(cl)
+  on.exit(snow::stopCluster(cl))
+  e <- simpleError("error occured")
+
+# progress bar ------------------------------------------------------------
+  iterations <- length(int_list)                               # used for the foreach loop
+
+  pb <- progress::progress_bar$new(
+    format = ":percent item = :item [:bar] :elapsed | eta: :eta",
+    total = iterations,
+    width = floor(options()$width*0.9),
+    clear = TRUE
+  )
+
+  # allowing progress bar to be used in foreach -----------------------------
+
+  progress <- function(n) {
+    pb$tick(tokens = list(item = int_list[n]))     # report the int_list item
+  }
+
+  opts <- list(progress = progress)  # used in the the foreach loop
+
+    result <- foreach( i = 1:iterations,
+                       .options.snow = opts,
+                       .export = 'db_read10x',
+                       .combine = 'cbind',
+                       .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% {
+                         db_read10x(path = filepaths[i])
+                       }
+
+
+  return(result)
+}
+
+
+#' Read10x v3
+#'
+#' @param path
+#' @param return.sce
+#'
+#' @return
+#' @export
+#'
+#' @examples
+db_read10x  <- function(path, return.sce = TRUE){
+  require(data.table, quietly = TRUE)
+  fl <- dir(path)
+
+  # reads in matrix file ----------------------------------------------------
+  mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                           skip = 3,
+                           col.names = c('i','j','value'),
+                           colClasses = c('integer','integer','integer'),
+                           header = FALSE)
+
+  # imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                               header = FALSE,
+                               colClasses = 'character')$V1
+
+  # imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V1
+
+
+  # duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+  max_i <- max(mat$i)
+  res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene[1:max_i],barcode))
+
+  if(return.sce){
+    SingleCellExperiment::SingleCellExperiment(list(counts = res))
+  } else {
+    return(gene)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents
new file mode 100644
index 0000000..3d824a5
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents
@@ -0,0 +1,201 @@
+#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+#'
+#' @param embedding
+#' @param a
+#' @param angular_rp_forest
+#' @param b
+#' @param force_approximation_algorithm
+#' @param init
+#' @param learning_rate
+#' @param local_connectivity
+#' @param low_memory
+#' @param metric
+#' @param metric_kwds
+#' @param min_dist
+#' @param n_components
+#' @param n_epochs
+#' @param n_neighbors
+#' @param negative_sample_rate
+#' @param output_metric
+#' @param output_metric_kwds
+#' @param random_state
+#' @param repulsion_strength
+#' @param set_op_mix_ratio
+#' @param spread
+#' @param target_metric
+#' @param target_metric_kwds
+#' @param target_n_neighbors
+#' @param target_weight
+#' @param transform_queue_size
+#' @param transform_seed
+#' @param unique
+#' @param verbose
+#' @param nThreads
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#' @import Seurat
+#'
+#' @examples
+umap <- function(
+  embedding,
+  a=NULL,
+  angular_rp_forest=FALSE,
+  b=NULL,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=200,
+  n_neighbors=15,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1
+){
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=as.intger(local_connectivity),
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=as.integer(target_n_neighbors),
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  return(result)
+}
+
+umap.Seurat <- function(
+  object,
+  reduction = 'pca',
+  reduction_name = 'umap',
+  dims = NULL,
+  a=1.662,
+  angular_rp_forest=FALSE,
+  b=0.7905,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=100,
+  n_neighbors=50,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1,
+  return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    embedding <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims)
+  }
+
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=local_connectivity,
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=target_n_neighbors,
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  if(return_seurat){
+    object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA')
+    return(object)
+  } else {
+    return(result)
+  }
+}
+
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents
new file mode 100644
index 0000000..9411853
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents
@@ -0,0 +1,53 @@
+#' Reorganize another person's mess into a usable 10X dataset
+#'
+#' @param x  path containing the unorganized disaster
+#'
+#' @return
+#' @export
+#' @import data.table
+#'
+#' @examples
+organize_10x <- function(x ){
+  path_main <- x
+  file_list <- dir(path = x, full.names = T)
+  file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)]
+
+  res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list),
+                               proto = data.table::data.table(accession_id = character(),
+                                                  file_type = character())))
+  res$old_path <- file_list
+  res$old_name<- basename(file_list)
+  res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz'
+  res$new_folder <- file.path(path_main,paste0(res$accession_id))
+  res$new_path <- file.path(res$new_folder, res$file_type)
+
+  new_dirs <- unique(res$new_folder)
+
+  length(file_list)
+  pb <- progress::progress_bar$new(
+    format = "  [:bar] :percent eta: :eta",
+    clear = FALSE, total = length(file_list), width = 80)
+
+
+  for(i in new_dirs){
+
+    if(!dir.exists(i)){
+      dir.create(i)
+    }
+
+    f2m <- res[new_folder==i]
+
+    for(j in 1:nrow(f2m)){
+    pb$tick()
+      file.copy(f2m[j,old_path ],f2m[j,new_path] )
+    }
+
+  }
+
+for(i in file_list){
+  file.remove(i)
+}
+
+}
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents
new file mode 100644
index 0000000..1768c36
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents
@@ -0,0 +1,132 @@
+#' Simple method for creating Seurat Objects
+#'
+#' @param filepath
+#' @param sample
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+create_seurat <- function(filepath, sample = NULL ){
+  if(is.null(sample)){
+    sample <- basename(filepath)
+  }
+
+  # read in 10X data
+  x <- Seurat::Read10X(data.dir = filepath)
+
+  # create unique cell ids
+  cell_ids <- paste0(sample, '_', colnames(x))
+  colnames(x) <-cell_ids
+
+  # create Seurat Object and include meta data
+  suppressWarnings({
+    res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample)
+  })
+
+  return(res)
+}
+
+#' Seurat Preprocessing
+#'
+#' @param object
+#' @param species
+#' @param nfeatures
+#' @param npcs
+#'
+#' @return
+#' @export
+#' @import Seurat
+#' @importFrom stringr str_to_title
+#' @import crayon
+#'
+#' @examples
+pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){
+  if(species == 'Homo sapiens'){
+    mt_pattern <- '^MT-'
+  } else {
+    mt_pattern <- '^mt-'
+  }
+
+  object <- Seurat::PercentageFeatureSet(object,
+                                 pattern = mt_pattern,
+                                 col.name = "percent.mt")
+
+  message_section('Filtering out low quality cells and doublets')
+
+  # Removing low quality cells and doublets
+  object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100)
+
+  message_section('Normalizing data')
+  # Normalization
+  object<- Seurat::NormalizeData(object, verbose = TRUE)
+  # Variable Features
+
+  message_section(paste('Finding',nfeatures,'most variable fatures'))
+  object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures)
+
+  if(species =='Mus musculus'){ # change gene name format to title capitalization
+    ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+    ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+  } else { # use build in gene names
+    ccss <- cc.genes.updated.2019$s.genes
+    ccg2m <- cc.genes.updated.2019$g2m.genes
+  }
+
+  # scoring function
+  object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m)
+
+  # difference between s and g2m scores
+  object$CC.Difference <-object$S.Score -object$G2M.Score
+
+  message_section('Scaling data')
+  # Scaling Data ----
+  object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt'))
+
+  message_section('Performing PCA')
+  message_append(paste('using npcs =',npcs))
+  # PCA ----
+  object<- Seurat::RunPCA(
+    object,
+    pc.genes =object@var.genes,
+    npcs = npcs)
+  return(object)
+
+}
+
+message_section <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+  cat("\n",rep('-',n), "\n",sep = '')
+  cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n')
+}
+
+message_task <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n -22){
+    cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n')
+  } else {
+    cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n')
+  }
+
+}
+
+message_append <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n - 22){
+    invisible()
+  } else {
+    cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '')
+  }
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents
new file mode 100644
index 0000000..2f9f0fc
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents
@@ -0,0 +1,132 @@
+#' UWOT-UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose whether to print function messages
+#'
+#' @return
+#' @export
+#' @import uwot
+#' @import Seurat
+#'
+#' @examples
+visUMAP <- function(object,
+                    reduction = 'harmony',
+                    spread = 1,
+                    n_components = 2,
+                    min_dist = 0.3,
+                    metric = 'cosine',
+                    n_neighbors = 30,
+                    set_op_mix_ratio = 1,
+                    local_connectivity = 1,
+                    repulsion_strength = 1,
+                    negative_sample_rate = 5,
+                    n_threads =  parallel::detectCores()-1,
+                    reduction_name = 'umap',
+                    return_seurat = TRUE,
+                    verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  umap_res <-   uwot::umap(embds,
+                           spread = 1,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           metric = metric,
+                           n_threads = n_threads,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate
+  )
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
+
+
+#' UWOT-UAMP: Clustering Specific UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose   whether to print function messages
+#'
+#' @return
+#' @export
+#'
+#' @examples
+clustUMAP <- function(object,
+                      reduction = 'harmony',
+                      spread = 1.1,
+                      n_components = NULL,
+                      min_dist = 0,
+                      metric = 'cosine',
+                      n_neighbors = 50,
+                      set_op_mix_ratio = 1,
+                      local_connectivity = 1,
+                      repulsion_strength = 1,
+                      negative_sample_rate = 5,
+                      n_threads = parallel::detectCores()-1,
+                      reduction_name = 'umap',
+                      return_seurat = TRUE,
+                      verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  if(is.null(n_components)){
+    n_components <- ncol(embds)
+  }
+  umap_res <-   uwot::umap(embds,
+                           spread = spread,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           n_threads = n_threads,
+                           metric = metric,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate,
+                           verbose = verbose
+  )
+
+  rownames(umap_res) <- rownames(embds)
+  colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res))
+
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res,
+                                                           key = 'clustUMAP_',
+                                                           assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents
new file mode 100644
index 0000000..1998197
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents
@@ -0,0 +1,176 @@
+#' Custom Palette for UMAP
+#'
+#' @param object
+#' @param group_col
+#' @param base_col
+#' @param jitter
+#'
+#' @return
+#' @export
+#' @importFrom colortools setcolors
+#' @import viridis
+#'
+#' @examples
+#'
+pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+
+
+  if(jitter){
+    new_order <- unlist(sapply(1:5, function(x) seq(x, n,5)))
+  }
+  pal <- c(pal, colortools::setColors(base_col,n))[new_order]
+  return(pal)
+}
+
+#' UMAP Pallette using HCL presets
+#'
+#' @param object
+#' @param group_col
+#' @param hcl_pal
+#' @param jitter
+#' @param comp
+#'
+#' @return
+#' @export
+#'
+#' @examples
+hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order])
+  return(pal)
+}
+
+#' UMAP palette using rainbow colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp
+#'
+#' @return
+#' @export
+#'
+#' @examples
+rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order])
+  return(pal)
+}
+
+#' UMAP Palette using soft hues
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp
+#'
+#' @return
+#' @export
+#'
+#' @importFrom colortools setcolors sequential_hcl
+#'
+#' @examples
+hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order])
+  return(pal)
+}
+
+gg_color_hue <- function(n) {
+  hues = seq(15, 375, length = n + 1)
+  hcl(h = hues, l = 65, c = 100)[1:n]
+}
+
+#' UMAP Palette using ggplot2 colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp
+#'
+#' @return
+#' @export
+#'
+#' @examples
+gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, gg_color_hue(n)[new_order])
+  return(pal)
+}
+
+hex_convert <- function(x){
+  if(x>=256) stop()
+  tmp <- c(0:9, LETTERS[1:6])
+
+  first <- floor(x/16)
+
+  first <- ifelse(first==16, 15, first)
+  second <- x - first*16
+  res <- paste0(tmp[first+1], tmp[second+1])
+  return(res)
+}
+
+hex_convert <- Vectorize(hex_convert)
+
+incA <- function(n, min = 0, base = '#E1E1E1'){
+  low <- hex_convert(min/100*255)
+  c1 <- paste0(base, low)
+
+  res <- c(c1,paste0(
+    substring(viridis::plasma(n), 1, 7),
+    hex_convert(seq(min/100*255,255, length.out = n))))
+  return(res)
+}
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents
new file mode 100644
index 0000000..0577bd5
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents
@@ -0,0 +1,132 @@
+#' Simple method for creating Seurat Objects
+#'
+#' @param filepath
+#' @param sample  sample name to use
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+create_seurat <- function(filepath, sample = NULL ){
+  if(is.null(sample)){
+    sample <- basename(filepath)
+  }
+
+  # read in 10X data
+  x <- Seurat::Read10X(data.dir = filepath)
+
+  # create unique cell ids
+  cell_ids <- paste0(sample, '_', colnames(x))
+  colnames(x) <-cell_ids
+
+  # create Seurat Object and include meta data
+  suppressWarnings({
+    res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample)
+  })
+
+  return(res)
+}
+
+#' Seurat Preprocessing
+#'
+#' @param object
+#' @param species
+#' @param nfeatures
+#' @param npcs  number of principle component dimensions to calculate
+#'
+#' @return
+#' @export
+#' @import Seurat
+#' @importFrom stringr str_to_title
+#' @import crayon
+#'
+#' @examples
+pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){
+  if(species == 'Homo sapiens'){
+    mt_pattern <- '^MT-'
+  } else {
+    mt_pattern <- '^mt-'
+  }
+
+  object <- Seurat::PercentageFeatureSet(object,
+                                 pattern = mt_pattern,
+                                 col.name = "percent.mt")
+
+  message_section('Filtering out low quality cells and doublets')
+
+  # Removing low quality cells and doublets
+  object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100)
+
+  message_section('Normalizing data')
+  # Normalization
+  object<- Seurat::NormalizeData(object, verbose = TRUE)
+  # Variable Features
+
+  message_section(paste('Finding',nfeatures,'most variable fatures'))
+  object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures)
+
+  if(species =='Mus musculus'){ # change gene name format to title capitalization
+    ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+    ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+  } else { # use build in gene names
+    ccss <- cc.genes.updated.2019$s.genes
+    ccg2m <- cc.genes.updated.2019$g2m.genes
+  }
+
+  # scoring function
+  object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m)
+
+  # difference between s and g2m scores
+  object$CC.Difference <-object$S.Score -object$G2M.Score
+
+  message_section('Scaling data')
+  # Scaling Data ----
+  object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt'))
+
+  message_section('Performing PCA')
+  message_append(paste('using npcs =',npcs))
+  # PCA ----
+  object<- Seurat::RunPCA(
+    object,
+    pc.genes =object@var.genes,
+    npcs = npcs)
+  return(object)
+
+}
+
+message_section <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+  cat("\n",rep('-',n), "\n",sep = '')
+  cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n')
+}
+
+message_task <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n -22){
+    cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n')
+  } else {
+    cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n')
+  }
+
+}
+
+message_append <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n - 22){
+    invisible()
+  } else {
+    cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '')
+  }
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents
new file mode 100644
index 0000000..32499c2
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents
@@ -0,0 +1,145 @@
+#' CellPhoneDB Summary File
+#'
+#' @param path the directory containing the CellPhoneDB Output
+#' @param pvalue setting this will return results less than it
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' @import data.table
+#' @import Matrix
+#' @import crayon
+#'
+cellphonedb_summary <- function(path, pvalue = 'all'){
+  means <- data.table::fread(file.path(path,'means.txt'))
+  pvalues <- data.table::fread(file.path(path, 'pvalues.txt'))
+  id.vars <- colnames(means)[1:11]
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files'))))
+  means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean')
+  pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue')
+
+  data.table::setkeyv(means, c('cell_pair',id.vars))
+  data.table::setkeyv(pvalues, c('cell_pair',id.vars))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets'))))
+  result <- data.table::merge.data.table(means, pvalues)
+  result <- as.data.table(result)
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs'))))
+ int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair,
+            data.table::data.table(gA = character(),
+                                   gB = character()))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs'))))
+ cell_pair <- strcapture('(.+)\\|(.+)',
+                         result$cell_pair,
+                         data.table::data.table(cell_a = character(),
+                                                      cell_b = character()))
+
+ result <- data.table(cell_pair, int_pairs, result)
+ if(pvalue=='significant'){
+   result <- result[pvalue<0.05]
+ }
+  cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished'))))
+
+  return(result)
+}
+
+
+# prep_cellphonedb <- function(rds, meta_column, path){
+#   require(data.table)
+#   require(Seurat)
+#
+#   object <- readRDS(rds)
+#
+#   res <- sparse2DT.Seurat(object)
+#
+#   new.meta <- object@meta.data[,meta_column]
+#   names(new.meta) <- rownames(object@meta.data)
+#
+#   # add cell_types to res
+#   res[,cell_subset:=new.meta[res$Cell]]
+#   data.table::setkey(res, Genes, cell_subset)
+#
+#   # generate summary information to be used for filtering uninformative genes
+#   test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)]
+#   test[,total:=sum(N),Genes]
+#   test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+#
+#   # create vector with leftover genes
+#   gl <- unique(test$Genes)
+#
+#   # subset count dataset
+#   res <- res[Genes %in% gl]
+#
+#   # create counts file
+#   counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+#   colnames(counts)[1] <- 'Gene'
+#   setkey(counts, Gene)
+#
+#   m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+#   mz_genes <- m2h$Ensembl_gene_id
+#   names(mz_genes) <- m2h$mouse
+#   new_genes <- mz_genes[counts$Gene]
+#   names(new_genes) <- counts$Gene
+#   new_genes <- new_genes[!is.na(new_genes)]
+#
+#   dim(counts)
+#   counts <- counts[Gene  %in% names(new_genes)]
+#   counts[,Gene:=new_genes[Gene]]
+#
+#   # create meta file
+#   meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+#   meta <- meta[Cell %in% colnames(counts)[-1]]
+#   fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+#
+#   fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)
+# }
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param matrix
+#'
+#' @return
+#' @export
+#'
+#' @import data.table
+#' @import Seurat
+#' @import Matrix
+#'
+#' @examples
+#'
+#'
+sparse2DT <- function(matrix){
+
+  # creating i,j,x format
+  mm.sum <- Matrix::summary(matrix)
+
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+  return(result)
+}
+
+
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Seurat
+#' @importMatrix
+#'
+#' @examples
+sparse2DT.Seurat <- function(object){
+
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+
+  result <- sparse2DT(mat)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents
new file mode 100644
index 0000000..ce41ba9
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents
@@ -0,0 +1,201 @@
+#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+#'
+#' @param embedding
+#' @param a
+#' @param angular_rp_forest
+#' @param b
+#' @param force_approximation_algorithm
+#' @param init
+#' @param learning_rate
+#' @param local_connectivity
+#' @param low_memory
+#' @param metric
+#' @param metric_kwds
+#' @param min_dist
+#' @param n_components
+#' @param n_epochs
+#' @param n_neighbors
+#' @param negative_sample_rate
+#' @param output_metric
+#' @param output_metric_kwds
+#' @param random_state
+#' @param repulsion_strength
+#' @param set_op_mix_ratio
+#' @param spread
+#' @param target_metric
+#' @param target_metric_kwds
+#' @param target_n_neighbors
+#' @param target_weight
+#' @param transform_queue_size
+#' @param transform_seed
+#' @param unique
+#' @param verbose
+#' @param nThreads  number of parallel threads to be used
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#' @import Seurat
+#'
+#' @examples
+umap <- function(
+  embedding,
+  a=NULL,
+  angular_rp_forest=FALSE,
+  b=NULL,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=200,
+  n_neighbors=15,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1
+){
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=as.intger(local_connectivity),
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=as.integer(target_n_neighbors),
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  return(result)
+}
+
+umap.Seurat <- function(
+  object,
+  reduction = 'pca',
+  reduction_name = 'umap',
+  dims = NULL,
+  a=1.662,
+  angular_rp_forest=FALSE,
+  b=0.7905,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=100,
+  n_neighbors=50,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1,
+  return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    embedding <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims)
+  }
+
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=local_connectivity,
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=target_n_neighbors,
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  if(return_seurat){
+    object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA')
+    return(object)
+  } else {
+    return(result)
+  }
+}
+
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents
new file mode 100644
index 0000000..2f4b795
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents
@@ -0,0 +1,145 @@
+#' CellPhoneDB Summary File
+#'
+#' @param path
+#' @param pvalue
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' @import data.table
+#' @import Matrix
+#' @import crayon
+#'
+cellphonedb_summary <- function(path, pvalue = 'all'){
+  means <- data.table::fread(file.path(path,'means.txt'))
+  pvalues <- data.table::fread(file.path(path, 'pvalues.txt'))
+  id.vars <- colnames(means)[1:11]
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files'))))
+  means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean')
+  pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue')
+
+  data.table::setkeyv(means, c('cell_pair',id.vars))
+  data.table::setkeyv(pvalues, c('cell_pair',id.vars))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets'))))
+  result <- data.table::merge.data.table(means, pvalues)
+  result <- as.data.table(result)
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs'))))
+ int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair,
+            data.table::data.table(gA = character(),
+                                   gB = character()))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs'))))
+ cell_pair <- strcapture('(.+)\\|(.+)',
+                         result$cell_pair,
+                         data.table::data.table(cell_a = character(),
+                                                      cell_b = character()))
+
+ result <- data.table(cell_pair, int_pairs, result)
+ if(pvalue=='significant'){
+   result <- result[pvalue<0.05]
+ }
+  cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished'))))
+
+  return(result)
+}
+
+
+# prep_cellphonedb <- function(rds, meta_column, path){
+#   require(data.table)
+#   require(Seurat)
+#
+#   object <- readRDS(rds)
+#
+#   res <- sparse2DT.Seurat(object)
+#
+#   new.meta <- object@meta.data[,meta_column]
+#   names(new.meta) <- rownames(object@meta.data)
+#
+#   # add cell_types to res
+#   res[,cell_subset:=new.meta[res$Cell]]
+#   data.table::setkey(res, Genes, cell_subset)
+#
+#   # generate summary information to be used for filtering uninformative genes
+#   test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)]
+#   test[,total:=sum(N),Genes]
+#   test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+#
+#   # create vector with leftover genes
+#   gl <- unique(test$Genes)
+#
+#   # subset count dataset
+#   res <- res[Genes %in% gl]
+#
+#   # create counts file
+#   counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+#   colnames(counts)[1] <- 'Gene'
+#   setkey(counts, Gene)
+#
+#   m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+#   mz_genes <- m2h$Ensembl_gene_id
+#   names(mz_genes) <- m2h$mouse
+#   new_genes <- mz_genes[counts$Gene]
+#   names(new_genes) <- counts$Gene
+#   new_genes <- new_genes[!is.na(new_genes)]
+#
+#   dim(counts)
+#   counts <- counts[Gene  %in% names(new_genes)]
+#   counts[,Gene:=new_genes[Gene]]
+#
+#   # create meta file
+#   meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+#   meta <- meta[Cell %in% colnames(counts)[-1]]
+#   fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+#
+#   fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)
+# }
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param matrix
+#'
+#' @return
+#' @export
+#'
+#' @import data.table
+#' @import Seurat
+#' @import Matrix
+#'
+#' @examples
+#'
+#'
+sparse2DT <- function(matrix){
+
+  # creating i,j,x format
+  mm.sum <- Matrix::summary(matrix)
+
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+  return(result)
+}
+
+
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Seurat
+#' @importMatrix
+#'
+#' @examples
+sparse2DT.Seurat <- function(object){
+
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+
+  result <- sparse2DT(mat)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents
new file mode 100644
index 0000000..442d352
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents
@@ -0,0 +1,186 @@
+#' Read10x v1
+#'
+#' @param path
+#' @param return.sce return result as SingleCellExperiment object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+# reads in matrix file ----------------------------------------------------
+    mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                                skip = 3,
+                                col.names = c('i','j','value'),
+                                colClasses = c('integer','integer','integer'),
+                                header = FALSE)
+
+# imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                                    header = FALSE,
+                                    colClasses = 'character')$V1
+
+# imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V2
+
+
+# duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table::data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+ res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene,barcode))
+
+ if(return.sce){
+   SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta)
+ } else {
+   return(res)
+ }
+
+}
+
+#' Read10x v2
+#'
+#' @param filepaths
+#' @param project
+#' @param meta
+#'
+#' @return
+#' @export
+#' @import doParallel
+#' @import foreach
+#' @import doSNOW
+#' @import snow
+#' @import progress
+#'
+#' @examples
+read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){
+  int_list <- 1:length(filepaths)
+
+# checking meta data ------------------------------------------------------
+  #  if(is.null(meta)){
+  #   meta = list()
+  # } else if(nrow(meta)!=length(filepaths)){
+  #   stop('meta data needs to be the same length as filepaths')
+  # } else {
+  #   meta <- as.list(meta)
+  # }
+
+# setting project vector --------------------------------------------------
+  # if(length(project)!=length(filepaths)){
+  #   if( length(project) == 1){
+  #     project <- rep(project, times = length(filepaths))
+  #   } else {
+  #     stop('supply either one project or a vector the same length as filepaths')
+  #   }
+  # }
+
+# creating cluster and registering doSNOW ---------------------------------
+  numCores <- parallel::detectCores() -1
+  cl <- snow::makeCluster(numCores)
+  doSNOW::registerDoSNOW(cl)
+  on.exit(snow::stopCluster(cl))
+  e <- simpleError("error occured")
+
+# progress bar ------------------------------------------------------------
+  iterations <- length(int_list)                               # used for the foreach loop
+
+  pb <- progress::progress_bar$new(
+    format = ":percent item = :item [:bar] :elapsed | eta: :eta",
+    total = iterations,
+    width = floor(options()$width*0.9),
+    clear = TRUE
+  )
+
+  # allowing progress bar to be used in foreach -----------------------------
+
+  progress <- function(n) {
+    pb$tick(tokens = list(item = int_list[n]))     # report the int_list item
+  }
+
+  opts <- list(progress = progress)  # used in the the foreach loop
+
+    result <- foreach::foreach( i = 1:iterations,
+                       .options.snow = opts,
+                       .export = 'db_read10x',
+                       .combine = 'cbind',
+                       .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% {
+                         db_read10x(path = filepaths[i])
+                       }
+
+
+  return(result)
+}
+
+
+#' Read10x v3
+#'
+#' @param path
+#' @param return.sce
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+db_read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+  # reads in matrix file ----------------------------------------------------
+  mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                           skip = 3,
+                           col.names = c('i','j','value'),
+                           colClasses = c('integer','integer','integer'),
+                           header = FALSE)
+
+  # imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                               header = FALSE,
+                               colClasses = 'character')$V1
+
+  # imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V1
+
+
+  # duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+  max_i <- max(mat$i)
+  res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene[1:max_i],barcode))
+
+  if(return.sce){
+    SingleCellExperiment::SingleCellExperiment(list(counts = res))
+  } else {
+    return(gene)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents
new file mode 100644
index 0000000..a9f80ff
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents
@@ -0,0 +1,186 @@
+#' Read10x v1
+#'
+#' @param path
+#' @param return.sce return result as SingleCellExperiment object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+# reads in matrix file ----------------------------------------------------
+    mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                                skip = 3,
+                                col.names = c('i','j','value'),
+                                colClasses = c('integer','integer','integer'),
+                                header = FALSE)
+
+# imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                                    header = FALSE,
+                                    colClasses = 'character')$V1
+
+# imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V2
+
+
+# duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table::data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+ res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene,barcode))
+
+ if(return.sce){
+   SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta)
+ } else {
+   return(res)
+ }
+
+}
+
+#' Read10x v2
+#'
+#' @param filepaths
+#' @param project
+#' @param meta  meta data to include with the various datasets
+#'
+#' @return
+#' @export
+#' @import doParallel
+#' @import foreach
+#' @import doSNOW
+#' @import snow
+#' @import progress
+#'
+#' @examples
+read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){
+  int_list <- 1:length(filepaths)
+
+# checking meta data ------------------------------------------------------
+  #  if(is.null(meta)){
+  #   meta = list()
+  # } else if(nrow(meta)!=length(filepaths)){
+  #   stop('meta data needs to be the same length as filepaths')
+  # } else {
+  #   meta <- as.list(meta)
+  # }
+
+# setting project vector --------------------------------------------------
+  # if(length(project)!=length(filepaths)){
+  #   if( length(project) == 1){
+  #     project <- rep(project, times = length(filepaths))
+  #   } else {
+  #     stop('supply either one project or a vector the same length as filepaths')
+  #   }
+  # }
+
+# creating cluster and registering doSNOW ---------------------------------
+  numCores <- parallel::detectCores() -1
+  cl <- snow::makeCluster(numCores)
+  doSNOW::registerDoSNOW(cl)
+  on.exit(snow::stopCluster(cl))
+  e <- simpleError("error occured")
+
+# progress bar ------------------------------------------------------------
+  iterations <- length(int_list)                               # used for the foreach loop
+
+  pb <- progress::progress_bar$new(
+    format = ":percent item = :item [:bar] :elapsed | eta: :eta",
+    total = iterations,
+    width = floor(options()$width*0.9),
+    clear = TRUE
+  )
+
+  # allowing progress bar to be used in foreach -----------------------------
+
+  progress <- function(n) {
+    pb$tick(tokens = list(item = int_list[n]))     # report the int_list item
+  }
+
+  opts <- list(progress = progress)  # used in the the foreach loop
+
+    result <- foreach::foreach( i = 1:iterations,
+                       .options.snow = opts,
+                       .export = 'db_read10x',
+                       .combine = 'cbind',
+                       .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% {
+                         db_read10x(path = filepaths[i])
+                       }
+
+
+  return(result)
+}
+
+
+#' Read10x v3
+#'
+#' @param path
+#' @param return.sce  return result as SingleCellExperiment object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+db_read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+  # reads in matrix file ----------------------------------------------------
+  mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                           skip = 3,
+                           col.names = c('i','j','value'),
+                           colClasses = c('integer','integer','integer'),
+                           header = FALSE)
+
+  # imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                               header = FALSE,
+                               colClasses = 'character')$V1
+
+  # imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V1
+
+
+  # duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+  max_i <- max(mat$i)
+  res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene[1:max_i],barcode))
+
+  if(return.sce){
+    SingleCellExperiment::SingleCellExperiment(list(counts = res))
+  } else {
+    return(gene)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents
new file mode 100644
index 0000000..cdcf33e
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents
@@ -0,0 +1,358 @@
+library(logger)
+# log_info('Starting the script...')
+# log_debug('This is the second log line')
+# log_trace('Note that the 2nd line is being placed right after the 1st one.')
+# log_success('Doing pretty well so far!')
+# log_warn('But beware, as some errors might come :/')
+# log_error('This is a problem')
+# log_debug('Note that getting an error is usually bad')
+# log_error('This is another problem')
+# log_fatal('The last problem')
+
+log_layout(layout_glue_colors)
+log_threshold(TRACE)
+
+log_info('Start of Script.')
+
+log_info('Creating Seurat Objects for Each Tissue')
+# Creating Seurat Objects for Each Tissue ----
+library(Seurat)
+f <- function(y, project){
+  x <- CreateSeuratObject(Read10X(y),
+                          project = project)
+  return(x)
+}
+
+log_trace('..... Omental Macrophages')
+# Omental Macrophages
+path.OmMac <- list.dirs('/rdx/db/E-MTAB/E-MTAB-8593.processed.1',
+                        recursive = F)
+
+tmp <- lapply(path.OmMac, function(x) f(x, project = 'OmMac'))
+OmMac <- merge(x = tmp[[1]],
+               y =  c(tmp[[2]], tmp[[3]], tmp[[4]]), 
+               add.cell.ids = paste0('rep',1:4), 
+               project = "OmMac")
+log_trace('..... Omental Stroma')
+# Omental Stroma
+path.Stroma <- '/rdx/db/scRNA/GSE136636_RAW/'
+Stroma <- CreateSeuratObject(counts = Read10X(path.Stroma),
+                             project = 'Stroma')
+
+log_trace('..... Peritoneal Cavity')
+
+# Peritoneal Cavity
+path.PerC <-  c("/rdx/db/scRNA/GSE121521_RAW/GSM3438655_Beclin_lysM",
+                '/rdx/db/scRNA/GSE121521_RAW/GSM3438654_Beclin_flox',
+                '/rdx/db/scRNA/GSE124562_RAW/GSM3536577_wt',
+                '/rdx/db/scRNA/GSE124562_RAW/GSM3536578_ko'
+)
+
+PerC_series.list <- c('GSE121521','GSE124562')
+PerC_sample.list <- c('GSM3438655','GSM3438654','GSM3536577','GSM3536578')
+
+tmp <- lapply(path.PerC, function(x) f(x, project = 'PerC'))
+PerC <- merge( x= tmp[[1]],
+               y = tmp[-1],
+               add.cell.ids = PerC_sample.list,
+               project = 'PerC'
+)
+
+
+PerC_lengths <- sapply(tmp, ncol)
+
+log_trace('..... Inflammatory Fibroblasts')
+# Inflammatory Fibroblasts
+path.fibro <- '/rdx/db/scRNA/GSE129087/'
+Fibroblasts <- CreateSeuratObject(counts = Read10X(path.fibro),
+                                  project = 'Fibroblasts')
+
+log_trace('..... Inflammatory Endothelium')
+# Inflammatory Endothelium
+path.Endo <-  c("/rdx/db/E-MTAB/E-MTAB-7149/result/outs/filtered_feature_bc_matrix/")
+Endothelium <- CreateSeuratObject(counts = Read10X(path.Endo),
+                                  project = 'Endothelium')
+
+log_trace('..... Stromal Vascular Cells from adipose tissue')
+# Stromal Vascular Cells from adipose tissue
+path.SVC <-  c("/rdx/db/scRNA/GSE128890_RAW/GSM3717977_SCmurinep12/",
+               '/rdx/db/scRNA/GSE128890_RAW/GSM3717978_SCmurineAdult/',
+               '/rdx/db/scRNA/E-MTAB-6677/data/'
+)
+
+SVC_series.list <- c('GSE128890','E.MTAB.6677')
+SVC_sample.list <- c('GSM3717977','GSM3717978','E-MTAB-6677')
+
+
+
+
+tmp <- lapply(path.SVC, function(x) f(x, project = 'SVC'))
+SVC <- merge( x= tmp[[1]],
+              y = tmp[-1],
+              add.cell.ids = SVC_sample.list,
+              project = 'SVC'
+)
+
+
+SVC_lengths <- sapply(tmp, ncol)
+
+log_success('Created Seurat Objects')
+
+log_info('Merging all into one dataset')
+# Merge into One Dataset ----
+
+PerNiche <- merge(
+  x = OmMac,
+  y = c(Stroma, PerC, Endothelium, Fibroblasts,SVC),
+  add.cell.ids = c('OmMac', 'Stroma', 'PerC', 'Endothelium','Stroma','SVC'),
+  project = 'PerNiche'
+)
+
+log_success('PerNiche object created')
+
+log_info('Adding Meta data')
+# Add Meta-data ----
+PerNiche@meta.data$tissue <- c(
+  rep('Omentum', ncol(OmMac)+ncol(Stroma)),
+  rep('Peritoneal Cavity', ncol(PerC)),
+  rep('Aorta Endothelium', ncol(Endothelium)),
+  rep('Synovial Stroma', ncol(Fibroblasts)),
+  rep('Stromal Vascular Cells', ncol(SVC))
+)
+
+PerNiche@meta.data$type <- c(
+  rep('Macrophage', ncol(OmMac)),
+  rep('Stromal', ncol(Stroma)),
+  rep('PerC-Cell', ncol(PerC)),
+  rep('Endothlium', ncol(Endothelium)),
+  rep('Stromal', ncol(Fibroblasts)),
+  rep('SVC', ncol(SVC))
+)
+
+
+PerNiche@meta.data$dataset <- c(
+  rep('E.MTAB.8593', ncol(OmMac)),
+  rep('GSE136636', ncol(Stroma)),
+  unlist(mapply(rep, PerC_series.list, c(sum(PerC_lengths[1:2]), sum(PerC_lengths[3:4])))),
+  rep('E.MTAB.7149', ncol(Endothelium)),
+  rep('GSE129087', ncol(Fibroblasts)),
+  unlist(mapply(rep, SVC_series.list, c(sum(SVC_lengths[1:2]), sum(SVC_lengths[3]))))
+)
+
+
+# Cleanup ----
+rm(list = c('OmMac','Stroma','PerC', 'Endothelium','Fibroblasts',
+            'path.PerC','path.OmMac','path.Stroma','path.fibro','path.Endo',
+            'tmp', 'SVC','path.SVC' ))
+
+
+log_info('Seurat Preprocessing')
+# Seurat Preprocessing ----
+
+
+PerNiche <- PercentageFeatureSet(PerNiche,
+                                 pattern = "^mt-",
+                                 col.name = "percent.mt")
+
+PerNiche <- PercentageFeatureSet(PerNiche,
+                                 pattern = "^Rp[sl]",
+                                 col.name = "percent.ribo")
+
+
+low <- 200
+high <- 5700
+ribo <- 3.5
+mito <- 20
+
+library(data.table)
+
+tc <- length(Cells(PerNiche))
+
+#start with cells with many genes detected.
+high.det <- WhichCells(PerNiche, expression = nFeature_RNA < high)
+
+#start with cells with many genes detected.
+low.det<- WhichCells(PerNiche, expression = nFeature_RNA > low )
+
+# Mito/Ribo filtering
+selected.mt <- WhichCells(PerNiche, expression = percent.mt < mito)
+selected.ribo <- WhichCells(PerNiche, expression = percent.ribo  > ribo)
+
+log_trace('..... removing high expressing cells')
+# remove these cells
+PerNiche  <- subset(PerNiche,
+                    cells=high.det)
+
+log_trace('..... removing low expressing low')
+# remove these cells
+PerNiche <- subset(PerNiche, 
+                   cells=low.det)
+
+log_trace('.....removing cells with >20% MT genes')
+# and subset the object to only keep those cells
+PerNiche <- subset(PerNiche, cells = selected.mt)
+
+log_trace('.....removing cells with <3.5% Rp genes')
+PerNiche <- subset(PerNiche, cells = selected.ribo)
+
+log_success('Finished Filtering')
+
+log_info('Normalization')
+
+# Normalization ----
+PerNiche <- NormalizeData(
+  PerNiche,
+  verbose = TRUE
+) 
+
+log_success('')
+
+log_info('Finding 4000 Variable Features')
+# Variable Features ----
+PerNiche <- FindVariableFeatures(
+  PerNiche,
+  selection.method = "vst",
+  nfeatures = 4000
+)
+
+log_success('')
+
+log_info('Determining Cell Cycle Score')
+
+ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+
+PerNiche <- CellCycleScoring(PerNiche,
+                             s.features = ccss,
+                             g2m.features = ccg2m)
+
+PerNiche$CC.Difference <- PerNiche$S.Score - PerNiche$G2m.Score
+
+log_info('Scaling Data')
+# Scaling Data ----
+PerNiche <- ScaleData(
+  PerNiche, 
+  vars.to.regress = 'CC.Difference',
+  verbose = TRUE
+)
+
+log_success('')
+
+log_info('Performing PCA using 100npcs')
+# PCA ----
+PerNiche <- RunPCA(
+  PerNiche,
+  pc.genes = PerNiche@var.genes,
+  npcs = 100,
+  verbose = TRUE
+)
+
+log_success('')
+
+log_info('Harmonizing datasets')
+# Harmonizing the Integrated Datasets ----
+
+library(harmony)
+
+PerNiche <- RunHarmony(
+  PerNiche,group.by.vars = 'dataset',
+  # sigma = 0.1,                          # width of soft kmeans clusters,
+  # theta = 8,                            # encourages diverse clusters
+  nclust = 50,                          # number of clusters in model,
+  max.iter.cluster = 100,
+  max.iter.harmony = 100
+)
+
+log_success('')
+
+log_info('Dimensional Reduction Post-Harmonizing')
+# Dimensional Reduction Post-Harmonizing  ----
+
+log_trace('.......UMAP')
+# UMAP 
+PerNiche <-  RunUMAP(PerNiche,
+                     reduction = "harmony",
+                     dims = 1:100)
+
+
+log_trace('.......TSNE')
+
+#t-SNE (currently using FIt-SNE Method)
+# PerNiche <-  RunTSNE(PerNiche, reduction = "harmony", dims = 1:20, tsne.method = "fftRtsne")
+learningRate <- dim(PerNiche)[2]/12
+Perplexity <- dim(PerNiche)[2]/100
+
+createINITmatrix <- function(x, reduction = 'pca', dims = 1:2){
+  res <- as.matrix(Seurat::Embeddings(x, reduction = reduction))[,dims]
+  res <- (res/sd(res[,1]))*0.0001
+  return(res)
+}
+
+init.mat <- createINITmatrix(PerNiche, reduction = 'harmony',dims = 1:100)
+
+PerNiche <-  RunTSNE(PerNiche,
+                     reduction = "harmony",
+                     dims = 1:100,
+                     tsne.method = "FIt-SNE",
+                     fast_tsne_path = '/rdx/software/FIt-SNE-1.1.0/bin/fast_tsne',
+                     perplexity = Perplexity,
+                     learning_rate = learningRate,
+                     k = 10,
+                     late_exag_coeff = 4,
+                     initialization = init.mat
+)
+
+log_success('Finished with dimension reduction')
+
+log_warn('Saving current results to tmpPerniche.rds')
+saveRDS(PerNiche,'tmpPerniche.rds')
+log_success('')
+
+log_info('Finding Neighbors')
+# Finding Neighbors ----
+PerNiche <-  FindNeighbors(PerNiche,
+                           nn.method = 'annoy',
+                           reduction = "harmony",
+                           annoy.metric = 'cosine',
+                           force.recalc = TRUE,
+                           dims = 1:100)
+
+log_info('Finding Clusters')
+# Finding Clusters ----
+PerNiche <-  FindClusters(PerNiche,
+                          algorithm = 3,
+                          resolution = 1,
+                          method = 'igraph',
+                          n.start = 30,
+                          n.iter = 100,
+                          verbose = TRUE
+                          )
+
+PerNiche <-  identity(PerNiche)
+
+png('~/Desktop/PerNiche-umap.png', height = 10, width = 10, res = 300, units = 'in')
+DimPlot(PerNiche, reduction = 'umap', repel = TRUE, label = TRUE, order = TRUE)+NoLegend()
+dev.off()
+
+png('~/Desktop/PerNiche-tsne.png', height = 10, width = 10, res = 300, units = 'in')
+DimPlot(PerNiche, reduction = 'tsne', repel = TRUE,label = TRUE, order = TRUE)+NoLegend()
+dev.off()
+
+log_info('Indentifying Cluster Markers')
+# Identifying Cluster Markers ---
+
+cluster.markers <- FindAllMarkers(
+  PerNiche,
+  logfc.threshold = 0.5,
+  verbose = TRUE
+)
+
+library(data.table)
+setDT(cluster.markers)
+feat_cols <- c('lightgrey',viridis::plasma(5)) # best color palette for FeaturePlot
+
+log_info('Saving Results as .rds file')
+# Saving Results as .rds file ----
+saveRDS(list(PerNiche = PerNiche, markers = cluster.markers, feat_cols = feat_cols), '/rdx/projects/QE/data/20200519_PerNiche_01.rds')
+
+log_success('Enjoy your spoils')
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents
new file mode 100644
index 0000000..a9dd400
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents
@@ -0,0 +1,132 @@
+#' UWOT-UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose
+#'
+#' @return
+#' @export
+#' @import uwot
+#' @import Seurat
+#'
+#' @examples
+visUMAP <- function(object,
+                    reduction = 'harmony',
+                    spread = 1,
+                    n_components = 2,
+                    min_dist = 0.3,
+                    metric = 'cosine',
+                    n_neighbors = 30,
+                    set_op_mix_ratio = 1,
+                    local_connectivity = 1,
+                    repulsion_strength = 1,
+                    negative_sample_rate = 5,
+                    n_threads =  parallel::detectCores()-1,
+                    reduction_name = 'umap',
+                    return_seurat = TRUE,
+                    verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  umap_res <-   uwot::umap(embds,
+                           spread = 1,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           metric = metric,
+                           n_threads = n_threads,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate
+  )
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
+
+
+#' UWOT-UAMP: Clustering Specific UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose   whether to print function messages
+#'
+#' @return
+#' @export
+#'
+#' @examples
+clustUMAP <- function(object,
+                      reduction = 'harmony',
+                      spread = 1.1,
+                      n_components = NULL,
+                      min_dist = 0,
+                      metric = 'cosine',
+                      n_neighbors = 50,
+                      set_op_mix_ratio = 1,
+                      local_connectivity = 1,
+                      repulsion_strength = 1,
+                      negative_sample_rate = 5,
+                      n_threads = parallel::detectCores()-1,
+                      reduction_name = 'umap',
+                      return_seurat = TRUE,
+                      verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  if(is.null(n_components)){
+    n_components <- ncol(embds)
+  }
+  umap_res <-   uwot::umap(embds,
+                           spread = spread,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           n_threads = n_threads,
+                           metric = metric,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate,
+                           verbose = verbose
+  )
+
+  rownames(umap_res) <- rownames(embds)
+  colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res))
+
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res,
+                                                           key = 'clustUMAP_',
+                                                           assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3 b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3
new file mode 100644
index 0000000..1ddfb65
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3
@@ -0,0 +1,21 @@
+{
+    "id": "A102BAE3",
+    "path": "/rdx/projects/dbsinglecell/DESCRIPTION",
+    "project_path": "DESCRIPTION",
+    "type": "dcf",
+    "hash": "3089006847",
+    "contents": "",
+    "dirty": false,
+    "created": 1601696304924.0,
+    "source_on_save": false,
+    "relative_order": 5,
+    "properties": {},
+    "folds": "",
+    "lastKnownWriteTime": 1601700213,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1601700213309,
+    "read_only": false,
+    "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents
new file mode 100644
index 0000000..8cddfb3
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents
@@ -0,0 +1,34 @@
+Package: dbsinglecell
+Type: Package
+Title: Dewey Brooke's Single-Cell Toolkit
+Version: 0.1
+Date: 2019-06-21
+Authors@R: c(
+    person("Dewey", "Brooke", , "dbrooke@uab.edu", role = c("aut", "cre"), 
+      comment = c(ORCID = "0000-0003-4290-3809")
+    )
+    )
+Maintainer: Dewey Brooke <dbrooke@uab.edu>
+Description: A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk.  
+License: GPL (>= 2)
+Imports: 
+  Rcpp (>= 1.0.5),
+  Matrix,
+  data.table,
+  doParallel,
+  foreach,
+  progress,
+  Seurat,
+  colorspace,
+  crayon,
+  snow,
+  doSNOW,
+  colortools,
+  stringr,
+  uwot,
+  viridis,
+  SingleCellExperiment,
+  reticulate
+LinkingTo: Rcpp
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.1.1
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents
new file mode 100644
index 0000000..ce41ba9
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents
@@ -0,0 +1,201 @@
+#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+#'
+#' @param embedding
+#' @param a
+#' @param angular_rp_forest
+#' @param b
+#' @param force_approximation_algorithm
+#' @param init
+#' @param learning_rate
+#' @param local_connectivity
+#' @param low_memory
+#' @param metric
+#' @param metric_kwds
+#' @param min_dist
+#' @param n_components
+#' @param n_epochs
+#' @param n_neighbors
+#' @param negative_sample_rate
+#' @param output_metric
+#' @param output_metric_kwds
+#' @param random_state
+#' @param repulsion_strength
+#' @param set_op_mix_ratio
+#' @param spread
+#' @param target_metric
+#' @param target_metric_kwds
+#' @param target_n_neighbors
+#' @param target_weight
+#' @param transform_queue_size
+#' @param transform_seed
+#' @param unique
+#' @param verbose
+#' @param nThreads  number of parallel threads to be used
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#' @import Seurat
+#'
+#' @examples
+umap <- function(
+  embedding,
+  a=NULL,
+  angular_rp_forest=FALSE,
+  b=NULL,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=200,
+  n_neighbors=15,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1
+){
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=as.intger(local_connectivity),
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=as.integer(target_n_neighbors),
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  return(result)
+}
+
+umap.Seurat <- function(
+  object,
+  reduction = 'pca',
+  reduction_name = 'umap',
+  dims = NULL,
+  a=1.662,
+  angular_rp_forest=FALSE,
+  b=0.7905,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=100,
+  n_neighbors=50,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1,
+  return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    embedding <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims)
+  }
+
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=local_connectivity,
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=target_n_neighbors,
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  if(return_seurat){
+    object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA')
+    return(object)
+  } else {
+    return(result)
+  }
+}
+
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents
new file mode 100644
index 0000000..1768c36
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents
@@ -0,0 +1,132 @@
+#' Simple method for creating Seurat Objects
+#'
+#' @param filepath
+#' @param sample
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+create_seurat <- function(filepath, sample = NULL ){
+  if(is.null(sample)){
+    sample <- basename(filepath)
+  }
+
+  # read in 10X data
+  x <- Seurat::Read10X(data.dir = filepath)
+
+  # create unique cell ids
+  cell_ids <- paste0(sample, '_', colnames(x))
+  colnames(x) <-cell_ids
+
+  # create Seurat Object and include meta data
+  suppressWarnings({
+    res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample)
+  })
+
+  return(res)
+}
+
+#' Seurat Preprocessing
+#'
+#' @param object
+#' @param species
+#' @param nfeatures
+#' @param npcs
+#'
+#' @return
+#' @export
+#' @import Seurat
+#' @importFrom stringr str_to_title
+#' @import crayon
+#'
+#' @examples
+pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){
+  if(species == 'Homo sapiens'){
+    mt_pattern <- '^MT-'
+  } else {
+    mt_pattern <- '^mt-'
+  }
+
+  object <- Seurat::PercentageFeatureSet(object,
+                                 pattern = mt_pattern,
+                                 col.name = "percent.mt")
+
+  message_section('Filtering out low quality cells and doublets')
+
+  # Removing low quality cells and doublets
+  object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100)
+
+  message_section('Normalizing data')
+  # Normalization
+  object<- Seurat::NormalizeData(object, verbose = TRUE)
+  # Variable Features
+
+  message_section(paste('Finding',nfeatures,'most variable fatures'))
+  object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures)
+
+  if(species =='Mus musculus'){ # change gene name format to title capitalization
+    ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+    ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+  } else { # use build in gene names
+    ccss <- cc.genes.updated.2019$s.genes
+    ccg2m <- cc.genes.updated.2019$g2m.genes
+  }
+
+  # scoring function
+  object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m)
+
+  # difference between s and g2m scores
+  object$CC.Difference <-object$S.Score -object$G2M.Score
+
+  message_section('Scaling data')
+  # Scaling Data ----
+  object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt'))
+
+  message_section('Performing PCA')
+  message_append(paste('using npcs =',npcs))
+  # PCA ----
+  object<- Seurat::RunPCA(
+    object,
+    pc.genes =object@var.genes,
+    npcs = npcs)
+  return(object)
+
+}
+
+message_section <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+  cat("\n",rep('-',n), "\n",sep = '')
+  cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n')
+}
+
+message_task <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n -22){
+    cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n')
+  } else {
+    cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n')
+  }
+
+}
+
+message_append <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n - 22){
+    invisible()
+  } else {
+    cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '')
+  }
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents
new file mode 100644
index 0000000..9e9e7ea
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents
@@ -0,0 +1,17 @@
+#' Easy Add Meta data to Seurat Object
+#'
+#' @param object
+#' @param meta
+#' @param col.name   name of the column for the new meta data
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+NewMeta <- function(object, meta, col.name){
+  test <- meta[as.character(Seurat::Idents(object))]
+  names(test) <- colnames(object)
+  result <- Seurat::AddMetaData(object, test, col.name)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents
new file mode 100644
index 0000000..3d824a5
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents
@@ -0,0 +1,201 @@
+#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+#'
+#' @param embedding
+#' @param a
+#' @param angular_rp_forest
+#' @param b
+#' @param force_approximation_algorithm
+#' @param init
+#' @param learning_rate
+#' @param local_connectivity
+#' @param low_memory
+#' @param metric
+#' @param metric_kwds
+#' @param min_dist
+#' @param n_components
+#' @param n_epochs
+#' @param n_neighbors
+#' @param negative_sample_rate
+#' @param output_metric
+#' @param output_metric_kwds
+#' @param random_state
+#' @param repulsion_strength
+#' @param set_op_mix_ratio
+#' @param spread
+#' @param target_metric
+#' @param target_metric_kwds
+#' @param target_n_neighbors
+#' @param target_weight
+#' @param transform_queue_size
+#' @param transform_seed
+#' @param unique
+#' @param verbose
+#' @param nThreads
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#' @import Seurat
+#'
+#' @examples
+umap <- function(
+  embedding,
+  a=NULL,
+  angular_rp_forest=FALSE,
+  b=NULL,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=200,
+  n_neighbors=15,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1
+){
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=as.intger(local_connectivity),
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=as.integer(target_n_neighbors),
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  return(result)
+}
+
+umap.Seurat <- function(
+  object,
+  reduction = 'pca',
+  reduction_name = 'umap',
+  dims = NULL,
+  a=1.662,
+  angular_rp_forest=FALSE,
+  b=0.7905,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=100,
+  n_neighbors=50,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1,
+  return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    embedding <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims)
+  }
+
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=local_connectivity,
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=target_n_neighbors,
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  if(return_seurat){
+    object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA')
+    return(object)
+  } else {
+    return(result)
+  }
+}
+
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents
new file mode 100644
index 0000000..aca7d14
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents
@@ -0,0 +1,156 @@
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param x
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param prediction_data
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#'
+#' @examples
+HDBSCAN <- function(x,
+                    algorithm='best',
+                    alpha=1.0,
+                    approx_min_span_tree = TRUE,
+                    gen_min_span_tree=FALSE,
+                    leaf_size=40,
+                    metric='euclidean',
+                    prediction_data=TRUE,
+                    min_cluster_size =50,
+                    min_samples = 1,
+                    cluster_selection_epsilon = 0.5,
+                    cluster_selection_method = 'leaf',
+                    nThreads = parallel::detectCores()
+){
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm = algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+
+
+
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  return(result)
+}
+
+
+
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param object
+#' @param reduction
+#' @param dims
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param return_seurat  logical to return the result within the orignal object or as the raw HDBSCAN result
+#' @param prediction_data
+#'
+#' @return
+#' @export
+#'
+#' @examples
+HDBSCAN.Seurat <- function(object,
+                           reduction = 'umap',
+                           dims = NULL,
+                           algorithm='best',
+                           alpha=1.0,
+                           prediction_data = TRUE,
+                           approx_min_span_tree = TRUE,
+                           gen_min_span_tree=FALSE,
+                           leaf_size=40,
+                           metric='euclidean',
+                           min_cluster_size =50,
+                           min_samples = 1,
+                           cluster_selection_epsilon = 0.5,
+                           cluster_selection_method = 'leaf',
+                           nThreads = parallel::detectCores(),
+                           return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    x <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    x <- Seurat::Embeddings(object, reduction = reduction)[,dims]
+  }
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm=algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  if(return_seurat){
+    object@misc$hdbscan <- result
+    object$cl <- factor(clusterer$labels_)
+    return(object)
+  } else {
+    return(result)
+  }
+
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents
new file mode 100644
index 0000000..2f9f0fc
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents
@@ -0,0 +1,132 @@
+#' UWOT-UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose whether to print function messages
+#'
+#' @return
+#' @export
+#' @import uwot
+#' @import Seurat
+#'
+#' @examples
+visUMAP <- function(object,
+                    reduction = 'harmony',
+                    spread = 1,
+                    n_components = 2,
+                    min_dist = 0.3,
+                    metric = 'cosine',
+                    n_neighbors = 30,
+                    set_op_mix_ratio = 1,
+                    local_connectivity = 1,
+                    repulsion_strength = 1,
+                    negative_sample_rate = 5,
+                    n_threads =  parallel::detectCores()-1,
+                    reduction_name = 'umap',
+                    return_seurat = TRUE,
+                    verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  umap_res <-   uwot::umap(embds,
+                           spread = 1,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           metric = metric,
+                           n_threads = n_threads,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate
+  )
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
+
+
+#' UWOT-UAMP: Clustering Specific UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose   whether to print function messages
+#'
+#' @return
+#' @export
+#'
+#' @examples
+clustUMAP <- function(object,
+                      reduction = 'harmony',
+                      spread = 1.1,
+                      n_components = NULL,
+                      min_dist = 0,
+                      metric = 'cosine',
+                      n_neighbors = 50,
+                      set_op_mix_ratio = 1,
+                      local_connectivity = 1,
+                      repulsion_strength = 1,
+                      negative_sample_rate = 5,
+                      n_threads = parallel::detectCores()-1,
+                      reduction_name = 'umap',
+                      return_seurat = TRUE,
+                      verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  if(is.null(n_components)){
+    n_components <- ncol(embds)
+  }
+  umap_res <-   uwot::umap(embds,
+                           spread = spread,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           n_threads = n_threads,
+                           metric = metric,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate,
+                           verbose = verbose
+  )
+
+  rownames(umap_res) <- rownames(embds)
+  colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res))
+
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res,
+                                                           key = 'clustUMAP_',
+                                                           assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents
new file mode 100644
index 0000000..8801c0f
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents
@@ -0,0 +1,145 @@
+#' CellPhoneDB Summary File
+#'
+#' @param path the directory containing the CellPhoneDB Output
+#' @param pvalue setting this will return results less than it
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' @import data.table
+#' @import Matrix
+#' @import crayon
+#'
+cellphonedb_summary <- function(path, pvalue = 'all'){
+  means <- data.table::fread(file.path(path,'means.txt'))
+  pvalues <- data.table::fread(file.path(path, 'pvalues.txt'))
+  id.vars <- colnames(means)[1:11]
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files'))))
+  means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean')
+  pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue')
+
+  data.table::setkeyv(means, c('cell_pair',id.vars))
+  data.table::setkeyv(pvalues, c('cell_pair',id.vars))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets'))))
+  result <- data.table::merge.data.table(means, pvalues)
+  result <- as.data.table(result)
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs'))))
+ int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair,
+            data.table::data.table(gA = character(),
+                                   gB = character()))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs'))))
+ cell_pair <- strcapture('(.+)\\|(.+)',
+                         result$cell_pair,
+                         data.table::data.table(cell_a = character(),
+                                                      cell_b = character()))
+
+ result <- data.table(cell_pair, int_pairs, result)
+ if(pvalue=='significant'){
+   result <- result[pvalue<0.05]
+ }
+  cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished'))))
+
+  return(result)
+}
+
+
+# prep_cellphonedb <- function(rds, meta_column, path){
+#   require(data.table)
+#   require(Seurat)
+#
+#   object <- readRDS(rds)
+#
+#   res <- sparse2DT.Seurat(object)
+#
+#   new.meta <- object@meta.data[,meta_column]
+#   names(new.meta) <- rownames(object@meta.data)
+#
+#   # add cell_types to res
+#   res[,cell_subset:=new.meta[res$Cell]]
+#   data.table::setkey(res, Genes, cell_subset)
+#
+#   # generate summary information to be used for filtering uninformative genes
+#   test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)]
+#   test[,total:=sum(N),Genes]
+#   test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+#
+#   # create vector with leftover genes
+#   gl <- unique(test$Genes)
+#
+#   # subset count dataset
+#   res <- res[Genes %in% gl]
+#
+#   # create counts file
+#   counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+#   colnames(counts)[1] <- 'Gene'
+#   setkey(counts, Gene)
+#
+#   m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+#   mz_genes <- m2h$Ensembl_gene_id
+#   names(mz_genes) <- m2h$mouse
+#   new_genes <- mz_genes[counts$Gene]
+#   names(new_genes) <- counts$Gene
+#   new_genes <- new_genes[!is.na(new_genes)]
+#
+#   dim(counts)
+#   counts <- counts[Gene  %in% names(new_genes)]
+#   counts[,Gene:=new_genes[Gene]]
+#
+#   # create meta file
+#   meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+#   meta <- meta[Cell %in% colnames(counts)[-1]]
+#   fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+#
+#   fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)
+# }
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param matrix
+#'
+#' @return
+#' @export
+#'
+#' @import data.table
+#' @import Seurat
+#' @import Matrix
+#'
+#' @examples
+#'
+#'
+sparse2DT <- function(matrix){
+
+  # creating i,j,x format
+  mm.sum <- Matrix::summary(matrix)
+
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+  return(result)
+}
+
+
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param object Seurat object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Seurat
+#' @importMatrix
+#'
+#' @examples
+sparse2DT.Seurat <- function(object){
+
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+
+  result <- sparse2DT(mat)
+  return(result)
+}
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents
new file mode 100644
index 0000000..711ea0d
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents
@@ -0,0 +1,176 @@
+#' Custom Palette for UMAP
+#'
+#' @param object
+#' @param group_col
+#' @param base_col
+#' @param jitter  randomize the colors
+#'
+#' @return
+#' @export
+#' @importFrom colortools setColors
+#' @import viridis
+#'
+#' @examples
+#'
+pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+
+
+  if(jitter){
+    new_order <- unlist(sapply(1:5, function(x) seq(x, n,5)))
+  }
+  pal <- c(pal, colortools::setColors(base_col,n))[new_order]
+  return(pal)
+}
+
+#' UMAP Palette using HCL presets
+#'
+#' @param object
+#' @param group_col
+#' @param hcl_pal
+#' @param jitter
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order])
+  return(pal)
+}
+
+#' UMAP palette using rainbow colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order])
+  return(pal)
+}
+
+#' UMAP Palette using soft hues
+#'
+#' @param object
+#' @param group_col
+#' @param jitter integer setting the color complementary to be used
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @import colorspace
+#'
+#' @examples
+hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order])
+  return(pal)
+}
+
+gg_color_hue <- function(n) {
+  hues = seq(15, 375, length = n + 1)
+  hcl(h = hues, l = 65, c = 100)[1:n]
+}
+
+#' UMAP Palette using ggplot2 colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementarity to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, gg_color_hue(n)[new_order])
+  return(pal)
+}
+
+hex_convert <- function(x){
+  if(x>=256) stop()
+  tmp <- c(0:9, LETTERS[1:6])
+
+  first <- floor(x/16)
+
+  first <- ifelse(first==16, 15, first)
+  second <- x - first*16
+  res <- paste0(tmp[first+1], tmp[second+1])
+  return(res)
+}
+
+hex_convert <- Vectorize(hex_convert)
+
+incA <- function(n, min = 0, base = '#E1E1E1'){
+  low <- hex_convert(min/100*255)
+  c1 <- paste0(base, low)
+
+  res <- c(c1,paste0(
+    substring(viridis::plasma(n), 1, 7),
+    hex_convert(seq(min/100*255,255, length.out = n))))
+  return(res)
+}
+
+
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8 b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8
new file mode 100644
index 0000000..3273cb0
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8
@@ -0,0 +1,21 @@
+{
+    "id": "FA66A4D8",
+    "path": "/rdx/projects/dbsinglecell/README.md",
+    "project_path": "README.md",
+    "type": "markdown",
+    "hash": "3239581323",
+    "contents": "",
+    "dirty": false,
+    "created": 1601700425517.0,
+    "source_on_save": false,
+    "relative_order": 7,
+    "properties": {},
+    "folds": "",
+    "lastKnownWriteTime": 1601700650,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1601700650154,
+    "read_only": false,
+    "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents
new file mode 100644
index 0000000..1ce2d6b
--- /dev/null
+++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents
@@ -0,0 +1,23 @@
+
+# dbsinglecell
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk.  
+
+## Installation
+
+If you so desire to use this package, install  by using
+
+``` r
+remotes::install_github("dbrookeUAB/dbsinglecell")
+```
+
+To use `HDSCAN` or `umap` functions, you must install their respective python libraries by
+
+``` r
+library(dbsinglecell)
+install_python_packages()
+```
+Cheers! 
diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/lock_file b/.Rproj.user/4F102347/sources/s-2CBFA7B6/lock_file
new file mode 100644
index 0000000..e69de29
diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names
new file mode 100644
index 0000000..e69de29
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
new file mode 100644
index 0000000..7f2a1d0
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/paths
@@ -0,0 +1,31 @@
+/rdx/projects/GeCKO/.travis.yml="36C21FFA"
+/rdx/projects/GeCKO/DESCRIPTION="ADFA58AB"
+/rdx/projects/GeCKO/NAMESPACE="00A958B8"
+/rdx/projects/GeCKO/_pkgdown.yml="F63E4187"
+/rdx/projects/PerNiche/DESCRIPTION="8831743E"
+/rdx/projects/PerNiche/R/NewMeta.R="7F2B2DF3"
+/rdx/projects/PerNiche/R/db_read10x.R="1190B6E6"
+/rdx/projects/PerNiche/R/hdbscan.R="77499622"
+/rdx/projects/PerNiche/R/plot_umap.R="2AA3DD28"
+/rdx/projects/PerNiche/R/scRNA_helpers.R="C0D72FE7"
+/rdx/projects/PerNiche/R/umap-learn.R="CC28AC64"
+/rdx/projects/PerNiche/R/umap.R="15B8B434"
+/rdx/projects/QE/R/CellPhoneDB.R="E718568E"
+/rdx/projects/QE/R/PerNiche/02_dimReduction.R="7886EAB7"
+/rdx/projects/QE/R/PerNiche/complete2.R="5E4D9938"
+/rdx/projects/QE/docs/PerNiche.Rmd="2744004E"
+/rdx/projects/dbsinglecell/DESCRIPTION="21C89D3A"
+/rdx/projects/dbsinglecell/NAMESPACE="C917BDCD"
+/rdx/projects/dbsinglecell/R/NewMeta.R="99B6A7BE"
+/rdx/projects/dbsinglecell/R/cellphonedb_utilities.R="5E6CFB65"
+/rdx/projects/dbsinglecell/R/db_read10x.R="42CB8524"
+/rdx/projects/dbsinglecell/R/hdbscan.R="B49E6C2B"
+/rdx/projects/dbsinglecell/R/organize_10x.R="3CC32046"
+/rdx/projects/dbsinglecell/R/plot_umap.R="A99ACBE4"
+/rdx/projects/dbsinglecell/R/reticulate_helpers.R="E0690E3C"
+/rdx/projects/dbsinglecell/R/scRNA_helpers.R="6AC0283F"
+/rdx/projects/dbsinglecell/R/umap-learn.R="B3715FD8"
+/rdx/projects/dbsinglecell/R/umap.R="9EF542D3"
+/rdx/projects/dbsinglecell/Read-and-delete-me="7C689967"
+/rdx/projects/dbsinglecell/_pkgdown.yml="F5EBAB80"
+/rdx/projects/dbsinglecell/man/cellphonedb_summary.Rd="5390F419"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..99b91e9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+docs
+docs/
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..8cddfb3
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,34 @@
+Package: dbsinglecell
+Type: Package
+Title: Dewey Brooke's Single-Cell Toolkit
+Version: 0.1
+Date: 2019-06-21
+Authors@R: c(
+    person("Dewey", "Brooke", , "dbrooke@uab.edu", role = c("aut", "cre"), 
+      comment = c(ORCID = "0000-0003-4290-3809")
+    )
+    )
+Maintainer: Dewey Brooke <dbrooke@uab.edu>
+Description: A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk.  
+License: GPL (>= 2)
+Imports: 
+  Rcpp (>= 1.0.5),
+  Matrix,
+  data.table,
+  doParallel,
+  foreach,
+  progress,
+  Seurat,
+  colorspace,
+  crayon,
+  snow,
+  doSNOW,
+  colortools,
+  stringr,
+  uwot,
+  viridis,
+  SingleCellExperiment,
+  reticulate
+LinkingTo: Rcpp
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.1.1
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..6e6b376
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,38 @@
+# Generated by roxygen2: do not edit by hand
+
+export(HDBSCAN)
+export(HDBSCAN.Seurat)
+export(NewMeta)
+export(cellphonedb_summary)
+export(clustUMAP)
+export(create_seurat)
+export(db_read10x)
+export(gg_umap)
+export(hcl_umap)
+export(hue_umap)
+export(organize_10x)
+export(pal_umap)
+export(pre_processing)
+export(rbw_umap)
+export(read10x)
+export(read10x_atlas)
+export(sparse2DT)
+export(sparse2DT.Seurat)
+export(umap)
+export(visUMAP)
+import(Matrix)
+import(Seurat)
+import(SingleCellExperiment)
+import(colorspace)
+import(crayon)
+import(data.table)
+import(doParallel)
+import(doSNOW)
+import(foreach)
+import(progress)
+import(reticulate)
+import(snow)
+import(uwot)
+import(viridis)
+importFrom(colortools,setColors)
+importFrom(stringr,str_to_title)
diff --git a/R/.DS_Store b/R/.DS_Store
new file mode 100644
index 0000000..5033756
Binary files /dev/null and b/R/.DS_Store differ
diff --git a/R/NewMeta.R b/R/NewMeta.R
new file mode 100644
index 0000000..9e9e7ea
--- /dev/null
+++ b/R/NewMeta.R
@@ -0,0 +1,17 @@
+#' Easy Add Meta data to Seurat Object
+#'
+#' @param object
+#' @param meta
+#' @param col.name   name of the column for the new meta data
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+NewMeta <- function(object, meta, col.name){
+  test <- meta[as.character(Seurat::Idents(object))]
+  names(test) <- colnames(object)
+  result <- Seurat::AddMetaData(object, test, col.name)
+  return(result)
+}
diff --git a/R/RcppExports.R b/R/RcppExports.R
new file mode 100644
index 0000000..56aa66c
--- /dev/null
+++ b/R/RcppExports.R
@@ -0,0 +1,7 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+rcpp_hello_world <- function() {
+    .Call('_dbsinglecell_rcpp_hello_world', PACKAGE = 'dbsinglecell')
+}
+
diff --git a/R/cellphonedb_utilities.R b/R/cellphonedb_utilities.R
new file mode 100644
index 0000000..848aa5c
--- /dev/null
+++ b/R/cellphonedb_utilities.R
@@ -0,0 +1,145 @@
+#' CellPhoneDB Summary File
+#'
+#' @param path the directory containing the CellPhoneDB Output
+#' @param pvalue setting this will return results less than it
+#'
+#' @return
+#' @export
+#'
+#' @examples
+#' @import data.table
+#' @import Matrix
+#' @import crayon
+#'
+cellphonedb_summary <- function(path, pvalue = 'all'){
+  means <- data.table::fread(file.path(path,'means.txt'))
+  pvalues <- data.table::fread(file.path(path, 'pvalues.txt'))
+  id.vars <- colnames(means)[1:11]
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files'))))
+  means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean')
+  pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue')
+
+  data.table::setkeyv(means, c('cell_pair',id.vars))
+  data.table::setkeyv(pvalues, c('cell_pair',id.vars))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets'))))
+  result <- data.table::merge.data.table(means, pvalues)
+  result <- as.data.table(result)
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs'))))
+ int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair,
+            data.table::data.table(gA = character(),
+                                   gB = character()))
+
+  cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs'))))
+ cell_pair <- strcapture('(.+)\\|(.+)',
+                         result$cell_pair,
+                         data.table::data.table(cell_a = character(),
+                                                      cell_b = character()))
+
+ result <- data.table(cell_pair, int_pairs, result)
+ if(pvalue=='significant'){
+   result <- result[pvalue<0.05]
+ }
+  cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished'))))
+
+  return(result)
+}
+
+
+# prep_cellphonedb <- function(rds, meta_column, path){
+#   require(data.table)
+#   require(Seurat)
+#
+#   object <- readRDS(rds)
+#
+#   res <- sparse2DT.Seurat(object)
+#
+#   new.meta <- object@meta.data[,meta_column]
+#   names(new.meta) <- rownames(object@meta.data)
+#
+#   # add cell_types to res
+#   res[,cell_subset:=new.meta[res$Cell]]
+#   data.table::setkey(res, Genes, cell_subset)
+#
+#   # generate summary information to be used for filtering uninformative genes
+#   test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)]
+#   test[,total:=sum(N),Genes]
+#   test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)]
+#
+#   # create vector with leftover genes
+#   gl <- unique(test$Genes)
+#
+#   # subset count dataset
+#   res <- res[Genes %in% gl]
+#
+#   # create counts file
+#   counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0)
+#   colnames(counts)[1] <- 'Gene'
+#   setkey(counts, Gene)
+#
+#   m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse')
+#   mz_genes <- m2h$Ensembl_gene_id
+#   names(mz_genes) <- m2h$mouse
+#   new_genes <- mz_genes[counts$Gene]
+#   names(new_genes) <- counts$Gene
+#   new_genes <- new_genes[!is.na(new_genes)]
+#
+#   dim(counts)
+#   counts <- counts[Gene  %in% names(new_genes)]
+#   counts[,Gene:=new_genes[Gene]]
+#
+#   # create meta file
+#   meta <- data.table(Cell = colnames(counts)[-1],cell_type =  new.meta[colnames(counts)[-1]])
+#   meta <- meta[Cell %in% colnames(counts)[-1]]
+#   fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE)
+#
+#   fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE)
+# }
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param matrix  sparse matrix to be used
+#'
+#' @return
+#' @export
+#'
+#' @import data.table
+#' @import Seurat
+#' @import Matrix
+#'
+#' @examples
+#'
+#'
+sparse2DT <- function(matrix){
+
+  # creating i,j,x format
+  mm.sum <- Matrix::summary(matrix)
+
+  # creating workable dataset of count data
+  result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x)
+  return(result)
+}
+
+
+
+#' Convert a sparse matrix to a data.table
+#'
+#' @param object Seurat object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Seurat
+#' @importMatrix
+#'
+#' @examples
+sparse2DT.Seurat <- function(object){
+
+  # exporting normalized data
+  mat <- object@assays$RNA@data[, Cells(object)]
+
+  result <- sparse2DT(mat)
+  return(result)
+}
diff --git a/R/db_read10x.R b/R/db_read10x.R
new file mode 100644
index 0000000..a9f80ff
--- /dev/null
+++ b/R/db_read10x.R
@@ -0,0 +1,186 @@
+#' Read10x v1
+#'
+#' @param path
+#' @param return.sce return result as SingleCellExperiment object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+# reads in matrix file ----------------------------------------------------
+    mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                                skip = 3,
+                                col.names = c('i','j','value'),
+                                colClasses = c('integer','integer','integer'),
+                                header = FALSE)
+
+# imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                                    header = FALSE,
+                                    colClasses = 'character')$V1
+
+# imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V2
+
+
+# duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table::data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+ res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene,barcode))
+
+ if(return.sce){
+   SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta)
+ } else {
+   return(res)
+ }
+
+}
+
+#' Read10x v2
+#'
+#' @param filepaths
+#' @param project
+#' @param meta  meta data to include with the various datasets
+#'
+#' @return
+#' @export
+#' @import doParallel
+#' @import foreach
+#' @import doSNOW
+#' @import snow
+#' @import progress
+#'
+#' @examples
+read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){
+  int_list <- 1:length(filepaths)
+
+# checking meta data ------------------------------------------------------
+  #  if(is.null(meta)){
+  #   meta = list()
+  # } else if(nrow(meta)!=length(filepaths)){
+  #   stop('meta data needs to be the same length as filepaths')
+  # } else {
+  #   meta <- as.list(meta)
+  # }
+
+# setting project vector --------------------------------------------------
+  # if(length(project)!=length(filepaths)){
+  #   if( length(project) == 1){
+  #     project <- rep(project, times = length(filepaths))
+  #   } else {
+  #     stop('supply either one project or a vector the same length as filepaths')
+  #   }
+  # }
+
+# creating cluster and registering doSNOW ---------------------------------
+  numCores <- parallel::detectCores() -1
+  cl <- snow::makeCluster(numCores)
+  doSNOW::registerDoSNOW(cl)
+  on.exit(snow::stopCluster(cl))
+  e <- simpleError("error occured")
+
+# progress bar ------------------------------------------------------------
+  iterations <- length(int_list)                               # used for the foreach loop
+
+  pb <- progress::progress_bar$new(
+    format = ":percent item = :item [:bar] :elapsed | eta: :eta",
+    total = iterations,
+    width = floor(options()$width*0.9),
+    clear = TRUE
+  )
+
+  # allowing progress bar to be used in foreach -----------------------------
+
+  progress <- function(n) {
+    pb$tick(tokens = list(item = int_list[n]))     # report the int_list item
+  }
+
+  opts <- list(progress = progress)  # used in the the foreach loop
+
+    result <- foreach::foreach( i = 1:iterations,
+                       .options.snow = opts,
+                       .export = 'db_read10x',
+                       .combine = 'cbind',
+                       .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% {
+                         db_read10x(path = filepaths[i])
+                       }
+
+
+  return(result)
+}
+
+
+#' Read10x v3
+#'
+#' @param path
+#' @param return.sce  return result as SingleCellExperiment object
+#'
+#' @return
+#' @export
+#' @import data.table
+#' @import Matrix
+#' @import SingleCellExperiment
+#'
+#' @examples
+db_read10x  <- function(path, return.sce = TRUE){
+  fl <- dir(path)
+
+  # reads in matrix file ----------------------------------------------------
+  mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]),
+                           skip = 3,
+                           col.names = c('i','j','value'),
+                           colClasses = c('integer','integer','integer'),
+                           header = FALSE)
+
+  # imports barcode ---------------------------------------------------------
+  barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]),
+                               header = FALSE,
+                               colClasses = 'character')$V1
+
+  # imports gene ------------------------------------------------------------
+  gene<- data.table::fread(
+    file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]),
+    header = FALSE)$V1
+
+
+  # duplicate gene names for row names --------------------------------------
+
+  if(!all(duplicated(gene)==FALSE)){
+    dg <- data.table(
+      position = which(duplicated(gene)),
+      name = gene[duplicated(gene)])[,N:=.N,name][]
+    dg[,new.name:=paste0(name,'.',1:.N), name]
+    gene[dg$position] <- dg$new.name
+  }
+  max_i <- max(mat$i)
+  res <-  Matrix::sparseMatrix(
+    i =  mat$i,
+    j = mat$j,
+    x = mat$value,
+    dimnames = list(gene[1:max_i],barcode))
+
+  if(return.sce){
+    SingleCellExperiment::SingleCellExperiment(list(counts = res))
+  } else {
+    return(gene)
+  }
+
+}
diff --git a/R/hdbscan.R b/R/hdbscan.R
new file mode 100644
index 0000000..51355f8
--- /dev/null
+++ b/R/hdbscan.R
@@ -0,0 +1,156 @@
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param x
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param prediction_data  not sure what this is for. Will update later.
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#'
+#' @examples
+HDBSCAN <- function(x,
+                    algorithm='best',
+                    alpha=1.0,
+                    approx_min_span_tree = TRUE,
+                    gen_min_span_tree=FALSE,
+                    leaf_size=40,
+                    metric='euclidean',
+                    prediction_data=TRUE,
+                    min_cluster_size =50,
+                    min_samples = 1,
+                    cluster_selection_epsilon = 0.5,
+                    cluster_selection_method = 'leaf',
+                    nThreads = parallel::detectCores()
+){
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm = algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+
+
+
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  return(result)
+}
+
+
+
+#'  Hierarchical Density-Based Spatial Clustering of Applications with Noise
+#'
+#' @param object
+#' @param reduction
+#' @param dims
+#' @param algorithm
+#' @param alpha
+#' @param approx_min_span_tree
+#' @param gen_min_span_tree
+#' @param leaf_size
+#' @param metric
+#' @param min_cluster_size
+#' @param min_samples
+#' @param cluster_selection_epsilon
+#' @param cluster_selection_method
+#' @param nThreads
+#' @param return_seurat  logical to return the result within the orignal object or as the raw HDBSCAN result
+#' @param prediction_data not sure what this is for. Will update later.
+#'
+#' @return
+#' @export
+#'
+#' @examples
+HDBSCAN.Seurat <- function(object,
+                           reduction = 'umap',
+                           dims = NULL,
+                           algorithm='best',
+                           alpha=1.0,
+                           prediction_data = TRUE,
+                           approx_min_span_tree = TRUE,
+                           gen_min_span_tree=FALSE,
+                           leaf_size=40,
+                           metric='euclidean',
+                           min_cluster_size =50,
+                           min_samples = 1,
+                           cluster_selection_epsilon = 0.5,
+                           cluster_selection_method = 'leaf',
+                           nThreads = parallel::detectCores(),
+                           return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    x <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    x <- Seurat::Embeddings(object, reduction = reduction)[,dims]
+  }
+
+  hdbscan <- reticulate::import('hdbscan', delay_load = TRUE)
+
+
+
+  clusterer <- hdbscan$HDBSCAN(algorithm=algorithm,
+                               alpha = alpha,
+                               prediction_data = prediction_data,
+                               approx_min_span_tree = approx_min_span_tree,
+                               gen_min_span_tree = gen_min_span_tree,
+                               leaf_size = leaf_size,
+                               core_dist_n_jobs = nThreads,
+                               metric = metric,
+                               min_cluster_size = as.integer(min_cluster_size),
+                               min_samples = as.integer(min_samples),
+                               cluster_selection_epsilon =  cluster_selection_epsilon,
+                               cluster_selection_method = cluster_selection_method
+  )
+  clusterer$fit(x)
+
+  result <- list(
+    labels = factor(clusterer$labels_),
+    probabilities = clusterer$probabilities_,
+    cluster_persistance = clusterer$cluster_persistence_,
+    exemplars = clusterer$exemplars_,
+    outlier_scores = clusterer$outlier_scores_)
+
+  levels(result$labels)[1] <- NA
+  if(return_seurat){
+    object@misc$hdbscan <- result
+    object$cl <- factor(clusterer$labels_)
+    return(object)
+  } else {
+    return(result)
+  }
+
+
+}
diff --git a/R/organize_10x.R b/R/organize_10x.R
new file mode 100644
index 0000000..9411853
--- /dev/null
+++ b/R/organize_10x.R
@@ -0,0 +1,53 @@
+#' Reorganize another person's mess into a usable 10X dataset
+#'
+#' @param x  path containing the unorganized disaster
+#'
+#' @return
+#' @export
+#' @import data.table
+#'
+#' @examples
+organize_10x <- function(x ){
+  path_main <- x
+  file_list <- dir(path = x, full.names = T)
+  file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)]
+
+  res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list),
+                               proto = data.table::data.table(accession_id = character(),
+                                                  file_type = character())))
+  res$old_path <- file_list
+  res$old_name<- basename(file_list)
+  res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz'
+  res$new_folder <- file.path(path_main,paste0(res$accession_id))
+  res$new_path <- file.path(res$new_folder, res$file_type)
+
+  new_dirs <- unique(res$new_folder)
+
+  length(file_list)
+  pb <- progress::progress_bar$new(
+    format = "  [:bar] :percent eta: :eta",
+    clear = FALSE, total = length(file_list), width = 80)
+
+
+  for(i in new_dirs){
+
+    if(!dir.exists(i)){
+      dir.create(i)
+    }
+
+    f2m <- res[new_folder==i]
+
+    for(j in 1:nrow(f2m)){
+    pb$tick()
+      file.copy(f2m[j,old_path ],f2m[j,new_path] )
+    }
+
+  }
+
+for(i in file_list){
+  file.remove(i)
+}
+
+}
+
+
diff --git a/R/plot_umap.R b/R/plot_umap.R
new file mode 100644
index 0000000..711ea0d
--- /dev/null
+++ b/R/plot_umap.R
@@ -0,0 +1,176 @@
+#' Custom Palette for UMAP
+#'
+#' @param object
+#' @param group_col
+#' @param base_col
+#' @param jitter  randomize the colors
+#'
+#' @return
+#' @export
+#' @importFrom colortools setColors
+#' @import viridis
+#'
+#' @examples
+#'
+pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+
+
+  if(jitter){
+    new_order <- unlist(sapply(1:5, function(x) seq(x, n,5)))
+  }
+  pal <- c(pal, colortools::setColors(base_col,n))[new_order]
+  return(pal)
+}
+
+#' UMAP Palette using HCL presets
+#'
+#' @param object
+#' @param group_col
+#' @param hcl_pal
+#' @param jitter
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order])
+  return(pal)
+}
+
+#' UMAP palette using rainbow colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order])
+  return(pal)
+}
+
+#' UMAP Palette using soft hues
+#'
+#' @param object
+#' @param group_col
+#' @param jitter integer setting the color complementary to be used
+#' @param comp integer setting the color complementary to be used
+#'
+#' @return
+#' @export
+#'
+#' @import colorspace
+#'
+#' @examples
+hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order])
+  return(pal)
+}
+
+gg_color_hue <- function(n) {
+  hues = seq(15, 375, length = n + 1)
+  hcl(h = hues, l = 65, c = 100)[1:n]
+}
+
+#' UMAP Palette using ggplot2 colors
+#'
+#' @param object
+#' @param group_col
+#' @param jitter
+#' @param comp  integer setting the color complementarity to be used
+#'
+#' @return
+#' @export
+#'
+#' @examples
+gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){
+  col_levels <- object@meta.data[[group_col]]
+  n <- length(unique(col_levels))
+  if("-1" %in% col_levels){
+    pal <- c('gray')
+  } else {
+    pal <- c()
+  }
+  jn <- floor(n/comp)
+  new_order <- 1:n
+  if(jitter){
+    new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn))))
+  }
+  pal <- c(pal, gg_color_hue(n)[new_order])
+  return(pal)
+}
+
+hex_convert <- function(x){
+  if(x>=256) stop()
+  tmp <- c(0:9, LETTERS[1:6])
+
+  first <- floor(x/16)
+
+  first <- ifelse(first==16, 15, first)
+  second <- x - first*16
+  res <- paste0(tmp[first+1], tmp[second+1])
+  return(res)
+}
+
+hex_convert <- Vectorize(hex_convert)
+
+incA <- function(n, min = 0, base = '#E1E1E1'){
+  low <- hex_convert(min/100*255)
+  c1 <- paste0(base, low)
+
+  res <- c(c1,paste0(
+    substring(viridis::plasma(n), 1, 7),
+    hex_convert(seq(min/100*255,255, length.out = n))))
+  return(res)
+}
+
+
diff --git a/R/reticulate_helpers.R b/R/reticulate_helpers.R
new file mode 100644
index 0000000..44533d2
--- /dev/null
+++ b/R/reticulate_helpers.R
@@ -0,0 +1,15 @@
+# global reference to scipy (will be initialized in .onLoad)
+scipy <- NULL
+hdbscan <- NULL
+umap <- NULL
+
+.onLoad <- function(libname, pkgname) {
+  # use superassignment to update global reference to scipy
+  scipy <<- reticulate::import("scipy", delay_load = TRUE)
+  hdbscan <<- reticulate::import('hdbscan', delay_load = TRUE)
+  umap <<- reticulate::import('umap', delay_load = TRUE)
+}
+
+install_python_packages <- function(method = "auto", conda = "auto") {
+  reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda)
+}
diff --git a/R/scRNA_helpers.R b/R/scRNA_helpers.R
new file mode 100644
index 0000000..0577bd5
--- /dev/null
+++ b/R/scRNA_helpers.R
@@ -0,0 +1,132 @@
+#' Simple method for creating Seurat Objects
+#'
+#' @param filepath
+#' @param sample  sample name to use
+#'
+#' @return
+#' @export
+#' @import Seurat
+#'
+#' @examples
+create_seurat <- function(filepath, sample = NULL ){
+  if(is.null(sample)){
+    sample <- basename(filepath)
+  }
+
+  # read in 10X data
+  x <- Seurat::Read10X(data.dir = filepath)
+
+  # create unique cell ids
+  cell_ids <- paste0(sample, '_', colnames(x))
+  colnames(x) <-cell_ids
+
+  # create Seurat Object and include meta data
+  suppressWarnings({
+    res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample)
+  })
+
+  return(res)
+}
+
+#' Seurat Preprocessing
+#'
+#' @param object
+#' @param species
+#' @param nfeatures
+#' @param npcs  number of principle component dimensions to calculate
+#'
+#' @return
+#' @export
+#' @import Seurat
+#' @importFrom stringr str_to_title
+#' @import crayon
+#'
+#' @examples
+pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){
+  if(species == 'Homo sapiens'){
+    mt_pattern <- '^MT-'
+  } else {
+    mt_pattern <- '^mt-'
+  }
+
+  object <- Seurat::PercentageFeatureSet(object,
+                                 pattern = mt_pattern,
+                                 col.name = "percent.mt")
+
+  message_section('Filtering out low quality cells and doublets')
+
+  # Removing low quality cells and doublets
+  object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100)
+
+  message_section('Normalizing data')
+  # Normalization
+  object<- Seurat::NormalizeData(object, verbose = TRUE)
+  # Variable Features
+
+  message_section(paste('Finding',nfeatures,'most variable fatures'))
+  object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures)
+
+  if(species =='Mus musculus'){ # change gene name format to title capitalization
+    ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes)
+    ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes)
+  } else { # use build in gene names
+    ccss <- cc.genes.updated.2019$s.genes
+    ccg2m <- cc.genes.updated.2019$g2m.genes
+  }
+
+  # scoring function
+  object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m)
+
+  # difference between s and g2m scores
+  object$CC.Difference <-object$S.Score -object$G2M.Score
+
+  message_section('Scaling data')
+  # Scaling Data ----
+  object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt'))
+
+  message_section('Performing PCA')
+  message_append(paste('using npcs =',npcs))
+  # PCA ----
+  object<- Seurat::RunPCA(
+    object,
+    pc.genes =object@var.genes,
+    npcs = npcs)
+  return(object)
+
+}
+
+message_section <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+  cat("\n",rep('-',n), "\n",sep = '')
+  cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n')
+}
+
+message_task <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n -22){
+    cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n')
+  } else {
+    cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n')
+  }
+
+}
+
+message_append <- function(text){
+  n <- ceiling(options()$width*0.75)
+  if(n >120){
+    n <- 120
+  }
+
+  if(nchar(text) > n - 22){
+    invisible()
+  } else {
+    cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '')
+  }
+}
diff --git a/R/umap-learn.R b/R/umap-learn.R
new file mode 100644
index 0000000..ce41ba9
--- /dev/null
+++ b/R/umap-learn.R
@@ -0,0 +1,201 @@
+#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+#'
+#' @param embedding
+#' @param a
+#' @param angular_rp_forest
+#' @param b
+#' @param force_approximation_algorithm
+#' @param init
+#' @param learning_rate
+#' @param local_connectivity
+#' @param low_memory
+#' @param metric
+#' @param metric_kwds
+#' @param min_dist
+#' @param n_components
+#' @param n_epochs
+#' @param n_neighbors
+#' @param negative_sample_rate
+#' @param output_metric
+#' @param output_metric_kwds
+#' @param random_state
+#' @param repulsion_strength
+#' @param set_op_mix_ratio
+#' @param spread
+#' @param target_metric
+#' @param target_metric_kwds
+#' @param target_n_neighbors
+#' @param target_weight
+#' @param transform_queue_size
+#' @param transform_seed
+#' @param unique
+#' @param verbose
+#' @param nThreads  number of parallel threads to be used
+#'
+#' @return
+#' @export
+#'
+#' @import reticulate
+#' @import Seurat
+#'
+#' @examples
+umap <- function(
+  embedding,
+  a=NULL,
+  angular_rp_forest=FALSE,
+  b=NULL,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=200,
+  n_neighbors=15,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1
+){
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=as.intger(local_connectivity),
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=as.integer(target_n_neighbors),
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  return(result)
+}
+
+umap.Seurat <- function(
+  object,
+  reduction = 'pca',
+  reduction_name = 'umap',
+  dims = NULL,
+  a=1.662,
+  angular_rp_forest=FALSE,
+  b=0.7905,
+  force_approximation_algorithm=FALSE,
+  init='spectral',
+  learning_rate=1.0,
+  local_connectivity=1.0,
+  low_memory=FALSE,
+  metric='euclidean',
+  metric_kwds=NULL,
+  min_dist=0.1,
+  n_components=2,
+  n_epochs=100,
+  n_neighbors=50,
+  negative_sample_rate=5,
+  output_metric='euclidean',
+  output_metric_kwds=NULL,
+  random_state=42,
+  repulsion_strength=1.0,
+  set_op_mix_ratio=1.0,
+  spread=1.0,
+  target_metric='categorical',
+  target_metric_kwds=NULL,
+  target_n_neighbors=-1,
+  target_weight=0.5,
+  transform_queue_size=4.0,
+  transform_seed=42,
+  unique=FALSE,
+  verbose=TRUE,
+  nThreads = parallel::detectCores()-1,
+  return_seurat = TRUE
+){
+
+  if(is.null(dims)){
+    embedding <- Seurat::Embeddings(object, reduction = reduction)
+  } else {
+    embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims)
+  }
+
+  Sys.setenv(OMP_NUM_THREADS=nThreads)
+  umap <- reticulate::import('umap', delay_load = TRUE)
+  reducer <- umap$UMAP(
+    a=a,
+    angular_rp_forest=angular_rp_forest,
+    b=b,
+    force_approximation_algorithm=force_approximation_algorithm,
+    init=init,
+    learning_rate=learning_rate,
+    local_connectivity=local_connectivity,
+    low_memory=low_memory,
+    metric=metric,
+    metric_kwds=metric_kwds,
+    min_dist=min_dist,
+    n_components=as.integer(n_components),
+    n_epochs=as.integer(n_epochs),
+    n_neighbors=as.integer(n_neighbors),
+    negative_sample_rate=negative_sample_rate,
+    output_metric=output_metric,
+    output_metric_kwds=output_metric_kwds,
+    random_state=as.integer(random_state),
+    repulsion_strength=repulsion_strength,
+    set_op_mix_ratio=set_op_mix_ratio,
+    spread=spread,
+    target_metric=target_metric,
+    target_metric_kwds=target_metric_kwds,
+    target_n_neighbors=target_n_neighbors,
+    target_weight=target_weight,
+    transform_queue_size=transform_queue_size,
+    transform_seed=as.integer(transform_seed),
+    unique=unique,
+    verbose=verbose)
+
+  result <- reducer$fit_transform(embedding)
+
+  if(return_seurat){
+    object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA')
+    return(object)
+  } else {
+    return(result)
+  }
+}
+
+
+
diff --git a/R/umap.R b/R/umap.R
new file mode 100644
index 0000000..2f9f0fc
--- /dev/null
+++ b/R/umap.R
@@ -0,0 +1,132 @@
+#' UWOT-UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose whether to print function messages
+#'
+#' @return
+#' @export
+#' @import uwot
+#' @import Seurat
+#'
+#' @examples
+visUMAP <- function(object,
+                    reduction = 'harmony',
+                    spread = 1,
+                    n_components = 2,
+                    min_dist = 0.3,
+                    metric = 'cosine',
+                    n_neighbors = 30,
+                    set_op_mix_ratio = 1,
+                    local_connectivity = 1,
+                    repulsion_strength = 1,
+                    negative_sample_rate = 5,
+                    n_threads =  parallel::detectCores()-1,
+                    reduction_name = 'umap',
+                    return_seurat = TRUE,
+                    verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  umap_res <-   uwot::umap(embds,
+                           spread = 1,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           metric = metric,
+                           n_threads = n_threads,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate
+  )
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
+
+
+#' UWOT-UAMP: Clustering Specific UMAP
+#'
+#' @param object
+#' @param reduction
+#' @param spread
+#' @param n_components
+#' @param min_dist
+#' @param metric
+#' @param n_neighbors
+#' @param set_op_mix_ratio
+#' @param local_connectivity
+#' @param repulsion_strength
+#' @param negative_sample_rate
+#' @param n_threads
+#' @param reduction_name
+#' @param return_seurat
+#' @param verbose   whether to print function messages
+#'
+#' @return
+#' @export
+#'
+#' @examples
+clustUMAP <- function(object,
+                      reduction = 'harmony',
+                      spread = 1.1,
+                      n_components = NULL,
+                      min_dist = 0,
+                      metric = 'cosine',
+                      n_neighbors = 50,
+                      set_op_mix_ratio = 1,
+                      local_connectivity = 1,
+                      repulsion_strength = 1,
+                      negative_sample_rate = 5,
+                      n_threads = parallel::detectCores()-1,
+                      reduction_name = 'umap',
+                      return_seurat = TRUE,
+                      verbose = TRUE
+){
+  embds <- Seurat::Embeddings(object, reduction = reduction)
+  if(is.null(n_components)){
+    n_components <- ncol(embds)
+  }
+  umap_res <-   uwot::umap(embds,
+                           spread = spread,
+                           n_components = n_components,
+                           min_dist = min_dist,
+                           n_threads = n_threads,
+                           metric = metric,
+                           n_neighbors = n_neighbors,
+                           set_op_mix_ratio = set_op_mix_ratio,
+                           local_connectivity = local_connectivity,
+                           repulsion_strength = repulsion_strength,
+                           negative_sample_rate = negative_sample_rate,
+                           verbose = verbose
+  )
+
+  rownames(umap_res) <- rownames(embds)
+  colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res))
+
+  if(return_seurat){
+    object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res,
+                                                           key = 'clustUMAP_',
+                                                           assay = 'RNA')
+    return(object)
+  } else {
+    return(umap_res)
+  }
+
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1ce2d6b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+
+# dbsinglecell
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk.  
+
+## Installation
+
+If you so desire to use this package, install  by using
+
+``` r
+remotes::install_github("dbrookeUAB/dbsinglecell")
+```
+
+To use `HDSCAN` or `umap` functions, you must install their respective python libraries by
+
+``` r
+library(dbsinglecell)
+install_python_packages()
+```
+Cheers! 
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..a9e43eb
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,35 @@
+destination: docs
+template:
+      params:
+        bootswatch: yeti
+navbar:
+  structure:
+    left:  [home, intro, reference, articles, tutorials, news]
+    right: [github, packages,main_site, twitter]
+  components:
+     twitter:
+       icon: "fab fa-twitter fa-lg"
+       href: http://twitter.com/deweybrooke1
+     main_site:
+      icon: "fas fa-user-circle fa-lg"
+      text: Dewey Brooke
+      href: https://www.deweybrooke.org/
+     github:
+      icon: "fab fa-github fa-lg"
+      href: https://github.com/dbrookeUAB/GeCKO
+     packages:
+      icon: "fab fa-r-project fa-lg"
+      text: Other Packages
+      menu:
+      - text: TCGAseq
+        href: https://tcgaseq.deweybrooke.org/
+      - text: GTEXseq
+        href: https://gtexseq.deweybrooke.org/
+      - text: GeCKO
+        href: https://gecko.deweybrooke.org/
+      - text: miknn
+        href: https://miknn.deweybrooke.org/
+      - text: dth
+        href: https://dth.deweybrooke.org/
+      - text: dbsinglecell
+        href: https://dbsinglecell.deweybrooke.org/
diff --git a/dbsinglecell.Rproj b/dbsinglecell.Rproj
new file mode 100644
index 0000000..497f8bf
--- /dev/null
+++ b/dbsinglecell.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/man/HDBSCAN.Rd b/man/HDBSCAN.Rd
new file mode 100644
index 0000000..3b63232
--- /dev/null
+++ b/man/HDBSCAN.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hdbscan.R
+\name{HDBSCAN}
+\alias{HDBSCAN}
+\title{Hierarchical Density-Based Spatial Clustering of Applications with Noise}
+\usage{
+HDBSCAN(
+  x,
+  algorithm = "best",
+  alpha = 1,
+  approx_min_span_tree = TRUE,
+  gen_min_span_tree = FALSE,
+  leaf_size = 40,
+  metric = "euclidean",
+  prediction_data = TRUE,
+  min_cluster_size = 50,
+  min_samples = 1,
+  cluster_selection_epsilon = 0.5,
+  cluster_selection_method = "leaf",
+  nThreads = parallel::detectCores()
+)
+}
+\arguments{
+\item{prediction_data}{}
+}
+\value{
+
+}
+\description{
+Hierarchical Density-Based Spatial Clustering of Applications with Noise
+}
diff --git a/man/HDBSCAN.Seurat.Rd b/man/HDBSCAN.Seurat.Rd
new file mode 100644
index 0000000..8d196a7
--- /dev/null
+++ b/man/HDBSCAN.Seurat.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hdbscan.R
+\name{HDBSCAN.Seurat}
+\alias{HDBSCAN.Seurat}
+\title{Hierarchical Density-Based Spatial Clustering of Applications with Noise}
+\usage{
+HDBSCAN.Seurat(
+  object,
+  reduction = "umap",
+  dims = NULL,
+  algorithm = "best",
+  alpha = 1,
+  prediction_data = TRUE,
+  approx_min_span_tree = TRUE,
+  gen_min_span_tree = FALSE,
+  leaf_size = 40,
+  metric = "euclidean",
+  min_cluster_size = 50,
+  min_samples = 1,
+  cluster_selection_epsilon = 0.5,
+  cluster_selection_method = "leaf",
+  nThreads = parallel::detectCores(),
+  return_seurat = TRUE
+)
+}
+\arguments{
+\item{prediction_data}{}
+
+\item{return_seurat}{logical to return the result within the orignal object or as the raw HDBSCAN result}
+}
+\value{
+
+}
+\description{
+Hierarchical Density-Based Spatial Clustering of Applications with Noise
+}
diff --git a/man/NewMeta.Rd b/man/NewMeta.Rd
new file mode 100644
index 0000000..1f96652
--- /dev/null
+++ b/man/NewMeta.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/NewMeta.R
+\name{NewMeta}
+\alias{NewMeta}
+\title{Easy Add Meta data to Seurat Object}
+\usage{
+NewMeta(object, meta, col.name)
+}
+\arguments{
+\item{col.name}{name of the column for the new meta data}
+}
+\value{
+
+}
+\description{
+Easy Add Meta data to Seurat Object
+}
diff --git a/man/cellphonedb_summary.Rd b/man/cellphonedb_summary.Rd
new file mode 100644
index 0000000..1034025
--- /dev/null
+++ b/man/cellphonedb_summary.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cellphonedb_utilities.R
+\name{cellphonedb_summary}
+\alias{cellphonedb_summary}
+\title{CellPhoneDB Summary File}
+\usage{
+cellphonedb_summary(path, pvalue = "all")
+}
+\arguments{
+\item{path}{the directory containing the CellPhoneDB Output}
+
+\item{pvalue}{setting this will return results less than it}
+}
+\value{
+
+}
+\description{
+CellPhoneDB Summary File
+}
diff --git a/man/clustUMAP.Rd b/man/clustUMAP.Rd
new file mode 100644
index 0000000..f430dff
--- /dev/null
+++ b/man/clustUMAP.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/umap.R
+\name{clustUMAP}
+\alias{clustUMAP}
+\title{UWOT-UAMP: Clustering Specific UMAP}
+\usage{
+clustUMAP(
+  object,
+  reduction = "harmony",
+  spread = 1.1,
+  n_components = NULL,
+  min_dist = 0,
+  metric = "cosine",
+  n_neighbors = 50,
+  set_op_mix_ratio = 1,
+  local_connectivity = 1,
+  repulsion_strength = 1,
+  negative_sample_rate = 5,
+  n_threads = parallel::detectCores() - 1,
+  reduction_name = "umap",
+  return_seurat = TRUE,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{verbose}{whether to print function messages}
+}
+\value{
+
+}
+\description{
+UWOT-UAMP: Clustering Specific UMAP
+}
diff --git a/man/create_seurat.Rd b/man/create_seurat.Rd
new file mode 100644
index 0000000..98c2ad6
--- /dev/null
+++ b/man/create_seurat.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/scRNA_helpers.R
+\name{create_seurat}
+\alias{create_seurat}
+\title{Simple method for creating Seurat Objects}
+\usage{
+create_seurat(filepath, sample = NULL)
+}
+\arguments{
+\item{sample}{sample name to use}
+}
+\value{
+
+}
+\description{
+Simple method for creating Seurat Objects
+}
diff --git a/man/db_read10x.Rd b/man/db_read10x.Rd
new file mode 100644
index 0000000..2545faf
--- /dev/null
+++ b/man/db_read10x.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/db_read10x.R
+\name{db_read10x}
+\alias{db_read10x}
+\title{Read10x v3}
+\usage{
+db_read10x(path, return.sce = TRUE)
+}
+\arguments{
+\item{return.sce}{}
+}
+\value{
+
+}
+\description{
+Read10x v3
+}
diff --git a/man/db_singlecell-package.Rd b/man/db_singlecell-package.Rd
new file mode 100644
index 0000000..43627da
--- /dev/null
+++ b/man/db_singlecell-package.Rd
@@ -0,0 +1,34 @@
+\name{db_singlecell-package}
+\alias{db_singlecell-package}
+\alias{db_singlecell}
+\docType{package}
+\title{
+  A short title line describing what the package does
+}
+\description{
+  A more detailed description of what the package does. A length
+  of about one to five lines is recommended.
+}
+\details{
+  This section should provide a more detailed overview of how to use the
+  package, including the most important functions.
+}
+\author{
+Your Name, email optional.
+
+Maintainer: Your Name <your@email.com>
+}
+\references{
+  This optional section can contain literature or other references for
+  background information.
+}
+\keyword{ package }
+\seealso{
+  Optional links to other man pages
+}
+\examples{
+  \dontrun{
+     ## Optional simple examples of the most important functions
+     ## These can be in \dontrun{} and \donttest{} blocks.   
+  }
+}
diff --git a/man/gg_umap.Rd b/man/gg_umap.Rd
new file mode 100644
index 0000000..79a9dcb
--- /dev/null
+++ b/man/gg_umap.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_umap.R
+\name{gg_umap}
+\alias{gg_umap}
+\title{UMAP Palette using ggplot2 colors}
+\usage{
+gg_umap(object, group_col, jitter = TRUE, comp = 3)
+}
+\arguments{
+\item{comp}{integer setting the color complementarity to be used}
+}
+\value{
+
+}
+\description{
+UMAP Palette using ggplot2 colors
+}
diff --git a/man/hcl_umap.Rd b/man/hcl_umap.Rd
new file mode 100644
index 0000000..6dfc355
--- /dev/null
+++ b/man/hcl_umap.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_umap.R
+\name{hcl_umap}
+\alias{hcl_umap}
+\title{UMAP Palette using HCL presets}
+\usage{
+hcl_umap(object, group_col, hcl_pal = "Dark 3", jitter = TRUE, comp = 3)
+}
+\arguments{
+\item{comp}{integer setting the color complementary to be used}
+}
+\value{
+
+}
+\description{
+UMAP Palette using HCL presets
+}
diff --git a/man/hue_umap.Rd b/man/hue_umap.Rd
new file mode 100644
index 0000000..aa8c858
--- /dev/null
+++ b/man/hue_umap.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_umap.R
+\name{hue_umap}
+\alias{hue_umap}
+\title{UMAP Palette using soft hues}
+\usage{
+hue_umap(object, group_col, jitter = TRUE, comp = 3)
+}
+\arguments{
+\item{jitter}{integer setting the color complementary to be used}
+
+\item{comp}{integer setting the color complementary to be used}
+}
+\value{
+
+}
+\description{
+UMAP Palette using soft hues
+}
diff --git a/man/organize_10x.Rd b/man/organize_10x.Rd
new file mode 100644
index 0000000..c70a028
--- /dev/null
+++ b/man/organize_10x.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/organize_10x.R
+\name{organize_10x}
+\alias{organize_10x}
+\title{Reorganize another person's mess into a usable 10X dataset}
+\usage{
+organize_10x(x)
+}
+\arguments{
+\item{x}{path containing the unorganized disaster}
+}
+\value{
+
+}
+\description{
+Reorganize another person's mess into a usable 10X dataset
+}
diff --git a/man/pal_umap.Rd b/man/pal_umap.Rd
new file mode 100644
index 0000000..14a88b8
--- /dev/null
+++ b/man/pal_umap.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_umap.R
+\name{pal_umap}
+\alias{pal_umap}
+\title{Custom Palette for UMAP}
+\usage{
+pal_umap(object, group_col, base_col = "#1E90FF", jitter = TRUE)
+}
+\arguments{
+\item{jitter}{randomize the colors}
+}
+\value{
+
+}
+\description{
+Custom Palette for UMAP
+}
+\examples{
+
+}
diff --git a/man/pre_processing.Rd b/man/pre_processing.Rd
new file mode 100644
index 0000000..c48d352
--- /dev/null
+++ b/man/pre_processing.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/scRNA_helpers.R
+\name{pre_processing}
+\alias{pre_processing}
+\title{Seurat Preprocessing}
+\usage{
+pre_processing(object, species = "Homo sapiens", nfeatures = 3000, npcs = 50)
+}
+\arguments{
+\item{npcs}{number of principle component dimensions to calculate}
+}
+\value{
+
+}
+\description{
+Seurat Preprocessing
+}
diff --git a/man/rbw_umap.Rd b/man/rbw_umap.Rd
new file mode 100644
index 0000000..212ff86
--- /dev/null
+++ b/man/rbw_umap.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_umap.R
+\name{rbw_umap}
+\alias{rbw_umap}
+\title{UMAP palette using rainbow colors}
+\usage{
+rbw_umap(object, group_col, jitter = TRUE, comp = 3)
+}
+\arguments{
+\item{comp}{integer setting the color complementary to be used}
+}
+\value{
+
+}
+\description{
+UMAP palette using rainbow colors
+}
diff --git a/man/rcpp_hello_world.Rd b/man/rcpp_hello_world.Rd
new file mode 100644
index 0000000..e4f90bf
--- /dev/null
+++ b/man/rcpp_hello_world.Rd
@@ -0,0 +1,17 @@
+\name{rcpp_hello_world}
+\alias{rcpp_hello_world}
+\docType{package}
+\title{
+Simple function using Rcpp
+}
+\description{
+Simple function using Rcpp
+}
+\usage{
+rcpp_hello_world()	
+}
+\examples{
+\dontrun{
+rcpp_hello_world()
+}
+}
diff --git a/man/read10x.Rd b/man/read10x.Rd
new file mode 100644
index 0000000..f7c9d73
--- /dev/null
+++ b/man/read10x.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/db_read10x.R
+\name{read10x}
+\alias{read10x}
+\title{Read10x v1}
+\usage{
+read10x(path, return.sce = TRUE)
+}
+\arguments{
+\item{return.sce}{return result as SingleCellExperiment object}
+}
+\value{
+
+}
+\description{
+Read10x v1
+}
diff --git a/man/read10x_atlas.Rd b/man/read10x_atlas.Rd
new file mode 100644
index 0000000..59de9dd
--- /dev/null
+++ b/man/read10x_atlas.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/db_read10x.R
+\name{read10x_atlas}
+\alias{read10x_atlas}
+\title{Read10x v2}
+\usage{
+read10x_atlas(filepaths, project = "scRNAseq", meta = NULL)
+}
+\arguments{
+\item{meta}{}
+}
+\value{
+
+}
+\description{
+Read10x v2
+}
diff --git a/man/sparse2DT.Rd b/man/sparse2DT.Rd
new file mode 100644
index 0000000..490a880
--- /dev/null
+++ b/man/sparse2DT.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cellphonedb_utilities.R
+\name{sparse2DT}
+\alias{sparse2DT}
+\title{Convert a sparse matrix to a data.table}
+\usage{
+sparse2DT(matrix)
+}
+\arguments{
+\item{matrix}{sparse matrix to be used}
+}
+\value{
+
+}
+\description{
+Convert a sparse matrix to a data.table
+}
+\examples{
+
+
+}
diff --git a/man/sparse2DT.Seurat.Rd b/man/sparse2DT.Seurat.Rd
new file mode 100644
index 0000000..ebcdd3c
--- /dev/null
+++ b/man/sparse2DT.Seurat.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/cellphonedb_utilities.R
+\name{sparse2DT.Seurat}
+\alias{sparse2DT.Seurat}
+\title{Convert a sparse matrix to a data.table}
+\usage{
+sparse2DT.Seurat(object)
+}
+\arguments{
+\item{object}{Seurat object}
+}
+\value{
+
+}
+\description{
+Convert a sparse matrix to a data.table
+}
diff --git a/man/umap.Rd b/man/umap.Rd
new file mode 100644
index 0000000..38a2fca
--- /dev/null
+++ b/man/umap.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/umap-learn.R
+\name{umap}
+\alias{umap}
+\title{UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)}
+\usage{
+umap(
+  embedding,
+  a = NULL,
+  angular_rp_forest = FALSE,
+  b = NULL,
+  force_approximation_algorithm = FALSE,
+  init = "spectral",
+  learning_rate = 1,
+  local_connectivity = 1,
+  low_memory = FALSE,
+  metric = "euclidean",
+  metric_kwds = NULL,
+  min_dist = 0.1,
+  n_components = 2,
+  n_epochs = 200,
+  n_neighbors = 15,
+  negative_sample_rate = 5,
+  output_metric = "euclidean",
+  output_metric_kwds = NULL,
+  random_state = 42,
+  repulsion_strength = 1,
+  set_op_mix_ratio = 1,
+  spread = 1,
+  target_metric = "categorical",
+  target_metric_kwds = NULL,
+  target_n_neighbors = -1,
+  target_weight = 0.5,
+  transform_queue_size = 4,
+  transform_seed = 42,
+  unique = FALSE,
+  verbose = TRUE,
+  nThreads = parallel::detectCores() - 1
+)
+}
+\arguments{
+\item{nThreads}{number of parallel threads to be used}
+}
+\value{
+
+}
+\description{
+UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)
+}
diff --git a/man/visUMAP.Rd b/man/visUMAP.Rd
new file mode 100644
index 0000000..640f23d
--- /dev/null
+++ b/man/visUMAP.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/umap.R
+\name{visUMAP}
+\alias{visUMAP}
+\title{UWOT-UMAP}
+\usage{
+visUMAP(
+  object,
+  reduction = "harmony",
+  spread = 1,
+  n_components = 2,
+  min_dist = 0.3,
+  metric = "cosine",
+  n_neighbors = 30,
+  set_op_mix_ratio = 1,
+  local_connectivity = 1,
+  repulsion_strength = 1,
+  negative_sample_rate = 5,
+  n_threads = parallel::detectCores() - 1,
+  reduction_name = "umap",
+  return_seurat = TRUE,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{verbose}{whether to print function messages}
+}
+\value{
+
+}
+\description{
+UWOT-UMAP
+}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
new file mode 100644
index 0000000..13026b5
--- /dev/null
+++ b/src/RcppExports.cpp
@@ -0,0 +1,27 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// rcpp_hello_world
+List rcpp_hello_world();
+RcppExport SEXP _dbsinglecell_rcpp_hello_world() {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    rcpp_result_gen = Rcpp::wrap(rcpp_hello_world());
+    return rcpp_result_gen;
+END_RCPP
+}
+
+static const R_CallMethodDef CallEntries[] = {
+    {"_dbsinglecell_rcpp_hello_world", (DL_FUNC) &_dbsinglecell_rcpp_hello_world, 0},
+    {NULL, NULL, 0}
+};
+
+RcppExport void R_init_dbsinglecell(DllInfo *dll) {
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/src/RcppExports.o b/src/RcppExports.o
new file mode 100644
index 0000000..b76a3ca
Binary files /dev/null and b/src/RcppExports.o differ
diff --git a/src/dbsinglecell.so b/src/dbsinglecell.so
new file mode 100755
index 0000000..6863fa5
Binary files /dev/null and b/src/dbsinglecell.so differ
diff --git a/src/rcpp_hello_world.cpp b/src/rcpp_hello_world.cpp
new file mode 100644
index 0000000..98a959c
--- /dev/null
+++ b/src/rcpp_hello_world.cpp
@@ -0,0 +1,13 @@
+
+#include <Rcpp.h>
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+List rcpp_hello_world() {
+
+    CharacterVector x = CharacterVector::create( "foo", "bar" )  ;
+    NumericVector y   = NumericVector::create( 0.0, 1.0 ) ;
+    List z            = List::create( x, y ) ;
+
+    return z ;
+}
diff --git a/src/rcpp_hello_world.o b/src/rcpp_hello_world.o
new file mode 100644
index 0000000..c276559
Binary files /dev/null and b/src/rcpp_hello_world.o differ