diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..6c720e9 Binary files /dev/null and b/.DS_Store differ diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..33cc41e --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,5 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.Rproj.user/4F102347/cpp-compilation-config b/.Rproj.user/4F102347/cpp-compilation-config new file mode 100644 index 0000000..8779cd8 --- /dev/null +++ b/.Rproj.user/4F102347/cpp-compilation-config @@ -0,0 +1,20 @@ +{ + "args": [ + "-isystem", + "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include", + "-I/Library/Developer/CommandLineTools/usr/lib/clang/11.0.0/include", + "-I/usr/local/Cellar/llvm/10.0.0_3/bin/../include/c++/v1", + "-I/usr/local/Cellar/llvm/10.0.0_3/lib/clang/10.0.0/include", + "-I/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include", + "-I/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include", + "-I/Library/Frameworks/R.framework/Resources/include", + "-DNDEBUG", + "-isysroot", + "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk", + "-I/usr/local/include", + "-fPIC" + ], + "pch": "Rcpp", + "is_cpp": true, + "hash": "15931292361600965640" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/cpp-definition-cache b/.Rproj.user/4F102347/cpp-definition-cache new file mode 100644 index 0000000..74d5909 --- /dev/null +++ b/.Rproj.user/4F102347/cpp-definition-cache @@ -0,0 +1,17 @@ +[ + { + "file": "/rdx/projects/dbsinglecell/src/rcpp_hello_world.cpp", + "file_last_write": 1600965640.0, + "definitions": [ + { + "usr": "c:@F@rcpp_hello_world#", + "kind": 6, + "parent_name": "", + "name": "rcpp_hello_world", + "file": "/rdx/projects/dbsinglecell/src/rcpp_hello_world.cpp", + "line": 6, + "column": 6 + } + ] + } +] \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/debug-breakpoints.pper b/.Rproj.user/4F102347/pcs/debug-breakpoints.pper new file mode 100644 index 0000000..4893a8a --- /dev/null +++ b/.Rproj.user/4F102347/pcs/debug-breakpoints.pper @@ -0,0 +1,5 @@ +{ + "debugBreakpointsState": { + "breakpoints": [] + } +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/files-pane.pper b/.Rproj.user/4F102347/pcs/files-pane.pper new file mode 100644 index 0000000..de8e32a --- /dev/null +++ b/.Rproj.user/4F102347/pcs/files-pane.pper @@ -0,0 +1,9 @@ +{ + "sortOrder": [ + { + "columnIndex": 2, + "ascending": true + } + ], + "path": "/rdx/projects/dbsinglecell" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/source-pane.pper b/.Rproj.user/4F102347/pcs/source-pane.pper new file mode 100644 index 0000000..b71dede --- /dev/null +++ b/.Rproj.user/4F102347/pcs/source-pane.pper @@ -0,0 +1,5 @@ +{ + "activeTab": 0, + "activeTabSourceWindow1": 0, + "activeTabSourceWindow2": 0 +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/source-window.pper b/.Rproj.user/4F102347/pcs/source-window.pper new file mode 100644 index 0000000..71a5c65 --- /dev/null +++ b/.Rproj.user/4F102347/pcs/source-window.pper @@ -0,0 +1,11 @@ +{ + "sourceWindowGeometry": { + "w18o69s9ak1jd": { + "ordinal": 1, + "x": 240, + "y": 0, + "width": 800, + "height": 778 + } + } +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/windowlayoutstate.pper b/.Rproj.user/4F102347/pcs/windowlayoutstate.pper new file mode 100644 index 0000000..7e3fed9 --- /dev/null +++ b/.Rproj.user/4F102347/pcs/windowlayoutstate.pper @@ -0,0 +1,14 @@ +{ + "left": { + "splitterpos": 319, + "topwindowstate": "NORMAL", + "panelheight": 762, + "windowheight": 800 + }, + "right": { + "splitterpos": 485, + "topwindowstate": "NORMAL", + "panelheight": 762, + "windowheight": 800 + } +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/pcs/workbench-pane.pper b/.Rproj.user/4F102347/pcs/workbench-pane.pper new file mode 100644 index 0000000..7ed1b52 --- /dev/null +++ b/.Rproj.user/4F102347/pcs/workbench-pane.pper @@ -0,0 +1,5 @@ +{ + "TabSet1": 2, + "TabSet2": 2, + "TabZoom": {} +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/persistent-state b/.Rproj.user/4F102347/persistent-state new file mode 100644 index 0000000..2d203af --- /dev/null +++ b/.Rproj.user/4F102347/persistent-state @@ -0,0 +1,8 @@ +build-last-errors="[]" +build-last-errors-base-dir="/rdx/projects/dbsinglecell/" +build-last-outputs="[{\"type\":0,\"output\":\"==> Rcpp::compileAttributes()\\n\\n\"},{\"type\":1,\"output\":\"* Updated R/RcppExports.R\\n\"},{\"type\":1,\"output\":\"\\n\"},{\"type\":0,\"output\":\"==> R CMD INSTALL --preclean --no-multiarch --with-keep.source dbsinglecell\\n\\n\"},{\"type\":1,\"output\":\"* installing to library ‘/private/var/folders/sd/l2qc9w6513l7zmfnj_v27wmc0000gn/T/RtmpKJnFOE/temp_libpath11b424050aaa’\\n\"},{\"type\":1,\"output\":\"* installing *source* package ‘dbsinglecell’ ...\\n\"},{\"type\":1,\"output\":\"** using staged installation\\n\"},{\"type\":1,\"output\":\"** libs\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -I\\\"/Library/Frameworks/R.framework/Resources/include\\\" -DNDEBUG -I\\\"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include\\\" -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -Wall -g -O2 -c RcppExports.cpp -o RcppExports.o\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -I\\\"/Library/Frameworks/R.framework/Resources/include\\\" -DNDEBUG -I\\\"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include\\\" -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -Wall -g -O2 -c rcpp_hello_world.cpp -o rcpp_hello_world.o\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"/usr/local/Cellar/llvm/10.0.0_3/bin/clang++ -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/usr/local/Cellar/llvm/10.0.0_3/lib -o dbsinglecell.so RcppExports.o rcpp_hello_world.o -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"installing to /private/var/folders/sd/l2qc9w6513l7zmfnj_v27wmc0000gn/T/RtmpKJnFOE/temp_libpath11b424050aaa/00LOCK-dbsinglecell/00new/dbsinglecell/libs\\n\"},{\"type\":1,\"output\":\"** R\\n\"},{\"type\":1,\"output\":\"** byte-compile and prepare package for lazy loading\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** help\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"*** installing help indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** building package indices\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from temporary location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** checking absolute paths in shared objects and dynamic libraries\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package can be loaded from final location\\n\"},{\"type\":1,\"output\":\"\"},{\"type\":1,\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\"},{\"type\":1,\"output\":\"* DONE (dbsinglecell)\\n\"},{\"type\":1,\"output\":\"\"}]" +compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}" +files.monitored-path="" +find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}" +imageDirtyState="1" +saveActionState="0" diff --git a/.Rproj.user/4F102347/rmd-outputs b/.Rproj.user/4F102347/rmd-outputs new file mode 100644 index 0000000..3f2ff2d --- /dev/null +++ b/.Rproj.user/4F102347/rmd-outputs @@ -0,0 +1,5 @@ + + + + + diff --git a/.Rproj.user/4F102347/saved_source_markers b/.Rproj.user/4F102347/saved_source_markers new file mode 100644 index 0000000..2b1bef1 --- /dev/null +++ b/.Rproj.user/4F102347/saved_source_markers @@ -0,0 +1 @@ +{"active_set":"","sets":[]} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/051E5C13 b/.Rproj.user/4F102347/sources/prop/051E5C13 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/051E5C13 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/133D5E34 b/.Rproj.user/4F102347/sources/prop/133D5E34 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/133D5E34 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/1443B4E2 b/.Rproj.user/4F102347/sources/prop/1443B4E2 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/1443B4E2 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/14D99F79 b/.Rproj.user/4F102347/sources/prop/14D99F79 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/14D99F79 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/1DD97DDE b/.Rproj.user/4F102347/sources/prop/1DD97DDE new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/1DD97DDE @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/2EA126F7 b/.Rproj.user/4F102347/sources/prop/2EA126F7 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/2EA126F7 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/37189DC7 b/.Rproj.user/4F102347/sources/prop/37189DC7 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/37189DC7 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/377C7BA5 b/.Rproj.user/4F102347/sources/prop/377C7BA5 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/377C7BA5 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/38FF06E6 b/.Rproj.user/4F102347/sources/prop/38FF06E6 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/38FF06E6 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/3C93B1F8 b/.Rproj.user/4F102347/sources/prop/3C93B1F8 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/3C93B1F8 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/3FB33404 b/.Rproj.user/4F102347/sources/prop/3FB33404 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/3FB33404 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/4581755F b/.Rproj.user/4F102347/sources/prop/4581755F new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/4581755F @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/4F76852F b/.Rproj.user/4F102347/sources/prop/4F76852F new file mode 100644 index 0000000..8a4e166 --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/4F76852F @@ -0,0 +1,3 @@ +{ + "source_window_id": "w18o69s9ak1jd" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/59DFEF3E b/.Rproj.user/4F102347/sources/prop/59DFEF3E new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/59DFEF3E @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/64FDE598 b/.Rproj.user/4F102347/sources/prop/64FDE598 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/64FDE598 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/69252159 b/.Rproj.user/4F102347/sources/prop/69252159 new file mode 100644 index 0000000..62762cb --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/69252159 @@ -0,0 +1,3 @@ +{ + "source_window_id": "wy4ve70vn7zpp" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/741250F7 b/.Rproj.user/4F102347/sources/prop/741250F7 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/741250F7 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/75394C5A b/.Rproj.user/4F102347/sources/prop/75394C5A new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/75394C5A @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/7A8FA062 b/.Rproj.user/4F102347/sources/prop/7A8FA062 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/7A8FA062 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/8695D2EE b/.Rproj.user/4F102347/sources/prop/8695D2EE new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/8695D2EE @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/896AF6F0 b/.Rproj.user/4F102347/sources/prop/896AF6F0 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/896AF6F0 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/8F8E8389 b/.Rproj.user/4F102347/sources/prop/8F8E8389 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/8F8E8389 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/9122B8A0 b/.Rproj.user/4F102347/sources/prop/9122B8A0 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/9122B8A0 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/AB64079C b/.Rproj.user/4F102347/sources/prop/AB64079C new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/AB64079C @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/ADDF398F b/.Rproj.user/4F102347/sources/prop/ADDF398F new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/ADDF398F @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/B343C981 b/.Rproj.user/4F102347/sources/prop/B343C981 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/B343C981 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/B497A198 b/.Rproj.user/4F102347/sources/prop/B497A198 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/B497A198 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/B5AFB4CD b/.Rproj.user/4F102347/sources/prop/B5AFB4CD new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/B5AFB4CD @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/B7BAAE89 b/.Rproj.user/4F102347/sources/prop/B7BAAE89 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/B7BAAE89 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/C046BAAF b/.Rproj.user/4F102347/sources/prop/C046BAAF new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/C046BAAF @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/C230E935 b/.Rproj.user/4F102347/sources/prop/C230E935 new file mode 100644 index 0000000..72a3a92 --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/C230E935 @@ -0,0 +1,3 @@ +{ + "tempName": "Untitled1" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/CD8192A0 b/.Rproj.user/4F102347/sources/prop/CD8192A0 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/CD8192A0 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/DC147442 b/.Rproj.user/4F102347/sources/prop/DC147442 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/DC147442 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/E0576C72 b/.Rproj.user/4F102347/sources/prop/E0576C72 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/E0576C72 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/E059CE59 b/.Rproj.user/4F102347/sources/prop/E059CE59 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/E059CE59 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/E44D9DA6 b/.Rproj.user/4F102347/sources/prop/E44D9DA6 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/E44D9DA6 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/ED610C92 b/.Rproj.user/4F102347/sources/prop/ED610C92 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/ED610C92 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/ED69A4CA b/.Rproj.user/4F102347/sources/prop/ED69A4CA new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/ED69A4CA @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/F731FB4B b/.Rproj.user/4F102347/sources/prop/F731FB4B new file mode 100644 index 0000000..72a3a92 --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/F731FB4B @@ -0,0 +1,3 @@ +{ + "tempName": "Untitled1" +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/F934CB88 b/.Rproj.user/4F102347/sources/prop/F934CB88 new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/F934CB88 @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/prop/INDEX b/.Rproj.user/4F102347/sources/prop/INDEX new file mode 100644 index 0000000..5b12250 --- /dev/null +++ b/.Rproj.user/4F102347/sources/prop/INDEX @@ -0,0 +1,40 @@ +%2Frdx%2Fprojects%2FGeCKO%2F.travis.yml="ED610C92" +%2Frdx%2Fprojects%2FGeCKO%2FDESCRIPTION="1DD97DDE" +%2Frdx%2Fprojects%2FGeCKO%2FNAMESPACE="7A8FA062" +%2Frdx%2Fprojects%2FGeCKO%2F_pkgdown.yml="E0576C72" +%2Frdx%2Fprojects%2FPerNiche%2FDESCRIPTION="14D99F79" +%2Frdx%2Fprojects%2FPerNiche%2FR%2FNewMeta.R="E059CE59" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fbetter.R="69252159" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fcreate_merge.R="38FF06E6" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fdb_read10x.R="C046BAAF" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fhdbscan.R="4F76852F" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fplot_umap.R="B7BAAE89" +%2Frdx%2Fprojects%2FPerNiche%2FR%2FscRNA_helpers.R="377C7BA5" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fumap-learn.R="133D5E34" +%2Frdx%2Fprojects%2FPerNiche%2FR%2Fumap.R="CD8192A0" +%2Frdx%2Fprojects%2FQE%2FR%2FCellPhoneDB.R="64FDE598" +%2Frdx%2Fprojects%2FQE%2FR%2FPerNiche%2F02_dimReduction.R="E44D9DA6" +%2Frdx%2Fprojects%2FQE%2FR%2FPerNiche%2Fcomplete2.R="AB64079C" +%2Frdx%2Fprojects%2FQE%2Fdocs%2FPerNiche.Rmd="2EA126F7" +%2Frdx%2Fprojects%2Fdb_singlecell%2FDESCRIPTION="37189DC7" +%2Frdx%2Fprojects%2Fdb_singlecell%2FNAMESPACE="ED69A4CA" +%2Frdx%2Fprojects%2Fdb_singlecell%2FR%2Fcellphonedb_utilities.R="C230E935" +%2Frdx%2Fprojects%2Fdb_singlecell%2FRead-and-delete-me="8F8E8389" +%2Frdx%2Fprojects%2Fdbsinglecell%2FDESCRIPTION="B343C981" +%2Frdx%2Fprojects%2Fdbsinglecell%2FNAMESPACE="741250F7" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2FNewMeta.R="DC147442" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fbetter.R="1443B4E2" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fcellphonedb_utilities.R="9122B8A0" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fcreate_merge.R="8695D2EE" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fdb_read10x.R="4581755F" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fhdbscan.R="F934CB88" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Forganize_10x.R="B5AFB4CD" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fplot_umap.R="B497A198" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Freticulate_helpers.R="F731FB4B" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2FscRNA_helpers.R="3C93B1F8" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fumap-learn.R="051E5C13" +%2Frdx%2Fprojects%2Fdbsinglecell%2FR%2Fumap.R="75394C5A" +%2Frdx%2Fprojects%2Fdbsinglecell%2FREADME.md="3FB33404" +%2Frdx%2Fprojects%2Fdbsinglecell%2FRead-and-delete-me="59DFEF3E" +%2Frdx%2Fprojects%2Fdbsinglecell%2F_pkgdown.yml="ADDF398F" +%2Frdx%2Fprojects%2Fdbsinglecell%2Fman%2Fcellphonedb_summary.Rd="896AF6F0" diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents new file mode 100644 index 0000000..7671da6 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/064AE12D-contents @@ -0,0 +1,19 @@ +# Generated by roxygen2: do not edit by hand + +export(HDBSCAN) +export(HDBSCAN.Seurat) +export(NewMeta) +export(cellphonedb_summary) +export(clustUMAP) +export(create_seurat) +export(db_read10x) +export(gg_umap) +export(hcl_umap) +export(hue_umap) +export(pal_umap) +export(pre_processing) +export(rbw_umap) +export(read10x) +export(read10x_atlas) +export(umap) +export(visUMAP) diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents new file mode 100644 index 0000000..711ea0d --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/066ADC9C-contents @@ -0,0 +1,176 @@ +#' Custom Palette for UMAP +#' +#' @param object +#' @param group_col +#' @param base_col +#' @param jitter randomize the colors +#' +#' @return +#' @export +#' @importFrom colortools setColors +#' @import viridis +#' +#' @examples +#' +pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + + + if(jitter){ + new_order <- unlist(sapply(1:5, function(x) seq(x, n,5))) + } + pal <- c(pal, colortools::setColors(base_col,n))[new_order] + return(pal) +} + +#' UMAP Palette using HCL presets +#' +#' @param object +#' @param group_col +#' @param hcl_pal +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order]) + return(pal) +} + +#' UMAP palette using rainbow colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order]) + return(pal) +} + +#' UMAP Palette using soft hues +#' +#' @param object +#' @param group_col +#' @param jitter integer setting the color complementary to be used +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @import colorspace +#' +#' @examples +hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order]) + return(pal) +} + +gg_color_hue <- function(n) { + hues = seq(15, 375, length = n + 1) + hcl(h = hues, l = 65, c = 100)[1:n] +} + +#' UMAP Palette using ggplot2 colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementarity to be used +#' +#' @return +#' @export +#' +#' @examples +gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, gg_color_hue(n)[new_order]) + return(pal) +} + +hex_convert <- function(x){ + if(x>=256) stop() + tmp <- c(0:9, LETTERS[1:6]) + + first <- floor(x/16) + + first <- ifelse(first==16, 15, first) + second <- x - first*16 + res <- paste0(tmp[first+1], tmp[second+1]) + return(res) +} + +hex_convert <- Vectorize(hex_convert) + +incA <- function(n, min = 0, base = '#E1E1E1'){ + low <- hex_convert(min/100*255) + c1 <- paste0(base, low) + + res <- c(c1,paste0( + substring(viridis::plasma(n), 1, 7), + hex_convert(seq(min/100*255,255, length.out = n)))) + return(res) +} + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents new file mode 100644 index 0000000..ac2ced4 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/07F7096D-contents @@ -0,0 +1,11 @@ +# global reference to scipy (will be initialized in .onLoad) +scipy <- NULL + +.onLoad <- function(libname, pkgname) { + # use superassignment to update global reference to scipy + scipy <<- reticulate::import("scipy", delay_load = TRUE) +} + +install_python_packages <- function(method = "auto", conda = "auto") { + reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents new file mode 100644 index 0000000..6277ef3 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/0C8DBFCD-contents @@ -0,0 +1,17 @@ +#' Easy Add Meta data to Seurat Object +#' +#' @param object +#' @param meta +#' @param col.name +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +NewMeta <- function(object, meta, col.name){ + test <- meta[as.character(Seurat::Idents(object))] + names(test) <- colnames(object) + result <- Seurat::AddMetaData(object, test, col.name) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents new file mode 100644 index 0000000..3c7df6f --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/11D56325-contents @@ -0,0 +1,89 @@ +prep_cellphonedb <- function(rds){ + require(data.table) + require(Seurat) + require(Matrix) + setDTthreads(20) + pn <- readRDS('../../Peritoneal_Niche.rds') + require(Matrix) + + # exporting normalized data + mat <- pn@assays$RNA@data[, Cells(pn)] + + # creating i,j,x format + mm.sum <- summary(mat) + + # creating workable dataset of count data + res <- data.table::data.table(Genes = rownames(mat)[mm.sum$i], Cell = colnames(mat)[mm.sum$j], Count = mm.sum$x) + # create vector with values interested to use for cell_type + new.meta <- pn@meta.data[,'cell_subset'] + names(new.meta) <- rownames(pn@meta.data) + + # add cell_types to res + res[,cell_subset:=new.meta[res$Cell]] + setkey(res, Genes, cell_subset) + + # generate summary information to be used for filtering uninformative genes + test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes','cell_subset')] + test[,total:=sum(N),Genes] + test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] + + # create vector with leftover genes + gl <- unique(test$Genes) + length(gl) + + # subset count dataset + res <- res[Genes %in% gl] + + # create counts file + counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) + colnames(counts)[1] <- 'Gene' + setkey(counts, Gene) + + m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') + mz_genes <- m2h$Ensembl_gene_id + names(mz_genes) <- m2h$mouse + new_genes <- mz_genes[counts$Gene] + names(new_genes) <- counts$Gene + new_genes <- new_genes[!is.na(new_genes)] + + dim(counts) + counts <- counts[Gene %in% names(new_genes)] + counts[,Gene:=new_genes[Gene]] + + # create meta file + meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) + meta <- meta[Cell %in% colnames(counts)[-1]] + fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) + + fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +} + +sparse2DT <- function(matrix){ + require(data.table) + require(Seurat) + require(Matrix) + + require(Matrix) + # creating i,j,x format + mm.sum <- summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) +return(result) +} + + + +sparse2DT.Seurat <- function(object){ + require(data.table) + require(Seurat) + require(Matrix) + + require(Matrix) + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents new file mode 100644 index 0000000..51355f8 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/1D6516AD-contents @@ -0,0 +1,156 @@ +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param x +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param prediction_data not sure what this is for. Will update later. +#' +#' @return +#' @export +#' +#' @import reticulate +#' +#' @examples +HDBSCAN <- function(x, + algorithm='best', + alpha=1.0, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + prediction_data=TRUE, + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores() +){ + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm = algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + + + + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + return(result) +} + + + +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param object +#' @param reduction +#' @param dims +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param return_seurat logical to return the result within the orignal object or as the raw HDBSCAN result +#' @param prediction_data not sure what this is for. Will update later. +#' +#' @return +#' @export +#' +#' @examples +HDBSCAN.Seurat <- function(object, + reduction = 'umap', + dims = NULL, + algorithm='best', + alpha=1.0, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores(), + return_seurat = TRUE +){ + + if(is.null(dims)){ + x <- Seurat::Embeddings(object, reduction = reduction) + } else { + x <- Seurat::Embeddings(object, reduction = reduction)[,dims] + } + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm=algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + if(return_seurat){ + object@misc$hdbscan <- result + object$cl <- factor(clusterer$labels_) + return(object) + } else { + return(result) + } + + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents new file mode 100644 index 0000000..44533d2 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/202FF0DD-contents @@ -0,0 +1,15 @@ +# global reference to scipy (will be initialized in .onLoad) +scipy <- NULL +hdbscan <- NULL +umap <- NULL + +.onLoad <- function(libname, pkgname) { + # use superassignment to update global reference to scipy + scipy <<- reticulate::import("scipy", delay_load = TRUE) + hdbscan <<- reticulate::import('hdbscan', delay_load = TRUE) + umap <<- reticulate::import('umap', delay_load = TRUE) +} + +install_python_packages <- function(method = "auto", conda = "auto") { + reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents new file mode 100644 index 0000000..1768c36 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/211E9D59-contents @@ -0,0 +1,132 @@ +#' Simple method for creating Seurat Objects +#' +#' @param filepath +#' @param sample +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +create_seurat <- function(filepath, sample = NULL ){ + if(is.null(sample)){ + sample <- basename(filepath) + } + + # read in 10X data + x <- Seurat::Read10X(data.dir = filepath) + + # create unique cell ids + cell_ids <- paste0(sample, '_', colnames(x)) + colnames(x) <-cell_ids + + # create Seurat Object and include meta data + suppressWarnings({ + res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample) + }) + + return(res) +} + +#' Seurat Preprocessing +#' +#' @param object +#' @param species +#' @param nfeatures +#' @param npcs +#' +#' @return +#' @export +#' @import Seurat +#' @importFrom stringr str_to_title +#' @import crayon +#' +#' @examples +pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){ + if(species == 'Homo sapiens'){ + mt_pattern <- '^MT-' + } else { + mt_pattern <- '^mt-' + } + + object <- Seurat::PercentageFeatureSet(object, + pattern = mt_pattern, + col.name = "percent.mt") + + message_section('Filtering out low quality cells and doublets') + + # Removing low quality cells and doublets + object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100) + + message_section('Normalizing data') + # Normalization + object<- Seurat::NormalizeData(object, verbose = TRUE) + # Variable Features + + message_section(paste('Finding',nfeatures,'most variable fatures')) + object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures) + + if(species =='Mus musculus'){ # change gene name format to title capitalization + ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) + ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + } else { # use build in gene names + ccss <- cc.genes.updated.2019$s.genes + ccg2m <- cc.genes.updated.2019$g2m.genes + } + + # scoring function + object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m) + + # difference between s and g2m scores + object$CC.Difference <-object$S.Score -object$G2M.Score + + message_section('Scaling data') + # Scaling Data ---- + object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt')) + + message_section('Performing PCA') + message_append(paste('using npcs =',npcs)) + # PCA ---- + object<- Seurat::RunPCA( + object, + pc.genes =object@var.genes, + npcs = npcs) + return(object) + +} + +message_section <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + cat("\n",rep('-',n), "\n",sep = '') + cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n') +} + +message_task <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n -22){ + cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n') + } else { + cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n') + } + +} + +message_append <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n - 22){ + invisible() + } else { + cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '') + } +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents new file mode 100644 index 0000000..4016df9 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/37463D0E-contents @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cellphonedb_utilities.R +\name{cellphonedb_summary} +\alias{cellphonedb_summary} +\title{CellPhoneDB Summary File} +\usage{ +cellphonedb_summary(path, pvalue = "all") +} +\arguments{ +\item{pvalue}{} +} +\value{ + +} +\description{ +CellPhoneDB Summary File +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents new file mode 100644 index 0000000..4164644 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3CD993A7-contents @@ -0,0 +1,154 @@ +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param x +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' +#' @return +#' @export +#' +#' @import reticulate +#' +#' @examples +HDBSCAN <- function(x, + algorithm='best', + alpha=1.0, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + prediction_data=TRUE, + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores() +){ + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm = algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + + + + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + return(result) +} + + + +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param object +#' @param reduction +#' @param dims +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param return_seurat +#' +#' @return +#' @export +#' +#' @examples +HDBSCAN.Seurat <- function(object, + reduction = 'umap', + dims = NULL, + algorithm='best', + alpha=1.0, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores(), + return_seurat = TRUE +){ + + if(is.null(dims)){ + x <- Seurat::Embeddings(object, reduction = reduction) + } else { + x <- Seurat::Embeddings(object, reduction = reduction)[,dims] + } + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm=algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + if(return_seurat){ + object@misc$hdbscan <- result + object$cl <- factor(clusterer$labels_) + return(object) + } else { + return(result) + } + + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents new file mode 100644 index 0000000..848aa5c --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/3E4439EF-contents @@ -0,0 +1,145 @@ +#' CellPhoneDB Summary File +#' +#' @param path the directory containing the CellPhoneDB Output +#' @param pvalue setting this will return results less than it +#' +#' @return +#' @export +#' +#' @examples +#' @import data.table +#' @import Matrix +#' @import crayon +#' +cellphonedb_summary <- function(path, pvalue = 'all'){ + means <- data.table::fread(file.path(path,'means.txt')) + pvalues <- data.table::fread(file.path(path, 'pvalues.txt')) + id.vars <- colnames(means)[1:11] + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files')))) + means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean') + pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue') + + data.table::setkeyv(means, c('cell_pair',id.vars)) + data.table::setkeyv(pvalues, c('cell_pair',id.vars)) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets')))) + result <- data.table::merge.data.table(means, pvalues) + result <- as.data.table(result) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs')))) + int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair, + data.table::data.table(gA = character(), + gB = character())) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs')))) + cell_pair <- strcapture('(.+)\\|(.+)', + result$cell_pair, + data.table::data.table(cell_a = character(), + cell_b = character())) + + result <- data.table(cell_pair, int_pairs, result) + if(pvalue=='significant'){ + result <- result[pvalue<0.05] + } + cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished')))) + + return(result) +} + + +# prep_cellphonedb <- function(rds, meta_column, path){ +# require(data.table) +# require(Seurat) +# +# object <- readRDS(rds) +# +# res <- sparse2DT.Seurat(object) +# +# new.meta <- object@meta.data[,meta_column] +# names(new.meta) <- rownames(object@meta.data) +# +# # add cell_types to res +# res[,cell_subset:=new.meta[res$Cell]] +# data.table::setkey(res, Genes, cell_subset) +# +# # generate summary information to be used for filtering uninformative genes +# test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)] +# test[,total:=sum(N),Genes] +# test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] +# +# # create vector with leftover genes +# gl <- unique(test$Genes) +# +# # subset count dataset +# res <- res[Genes %in% gl] +# +# # create counts file +# counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) +# colnames(counts)[1] <- 'Gene' +# setkey(counts, Gene) +# +# m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') +# mz_genes <- m2h$Ensembl_gene_id +# names(mz_genes) <- m2h$mouse +# new_genes <- mz_genes[counts$Gene] +# names(new_genes) <- counts$Gene +# new_genes <- new_genes[!is.na(new_genes)] +# +# dim(counts) +# counts <- counts[Gene %in% names(new_genes)] +# counts[,Gene:=new_genes[Gene]] +# +# # create meta file +# meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) +# meta <- meta[Cell %in% colnames(counts)[-1]] +# fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) +# +# fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +# } + +#' Convert a sparse matrix to a data.table +#' +#' @param matrix sparse matrix to be used +#' +#' @return +#' @export +#' +#' @import data.table +#' @import Seurat +#' @import Matrix +#' +#' @examples +#' +#' +sparse2DT <- function(matrix){ + + # creating i,j,x format + mm.sum <- Matrix::summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) + return(result) +} + + + +#' Convert a sparse matrix to a data.table +#' +#' @param object Seurat object +#' +#' @return +#' @export +#' @import data.table +#' @import Seurat +#' @importMatrix +#' +#' @examples +sparse2DT.Seurat <- function(object){ + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents new file mode 100644 index 0000000..8f94962 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/419D593F-contents @@ -0,0 +1,10 @@ +* Edit the help file skeletons in 'man', possibly combining help + files for multiple functions. +* Edit the exports in 'NAMESPACE', and add necessary imports. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a useDynLib() directive to + 'NAMESPACE'. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents new file mode 100644 index 0000000..8165dea --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/41AD7347-contents @@ -0,0 +1,53 @@ +#' Reorganize another person's mess into a usable 10X dataset +#' +#' @param x +#' +#' @return +#' @export +#' @import data.table +#' +#' @examples +organize_10x <- function(x ){ + path_main <- x + file_list <- dir(path = x, full.names = T) + file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)] + + res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list), + proto = data.table::data.table(accession_id = character(), + file_type = character()))) + res$old_path <- file_list + res$old_name<- basename(file_list) + res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz' + res$new_folder <- file.path(path_main,paste0(res$accession_id)) + res$new_path <- file.path(res$new_folder, res$file_type) + + new_dirs <- unique(res$new_folder) + + length(file_list) + pb <- progress::progress_bar$new( + format = " [:bar] :percent eta: :eta", + clear = FALSE, total = length(file_list), width = 80) + + + for(i in new_dirs){ + + if(!dir.exists(i)){ + dir.create(i) + } + + f2m <- res[new_folder==i] + + for(j in 1:nrow(f2m)){ + pb$tick() + file.copy(f2m[j,old_path ],f2m[j,new_path] ) + } + + } + +for(i in file_list){ + file.remove(i) +} + +} + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents new file mode 100644 index 0000000..aca7d14 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/474705CA-contents @@ -0,0 +1,156 @@ +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param x +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param prediction_data +#' +#' @return +#' @export +#' +#' @import reticulate +#' +#' @examples +HDBSCAN <- function(x, + algorithm='best', + alpha=1.0, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + prediction_data=TRUE, + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores() +){ + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm = algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + + + + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + return(result) +} + + + +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param object +#' @param reduction +#' @param dims +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param return_seurat logical to return the result within the orignal object or as the raw HDBSCAN result +#' @param prediction_data +#' +#' @return +#' @export +#' +#' @examples +HDBSCAN.Seurat <- function(object, + reduction = 'umap', + dims = NULL, + algorithm='best', + alpha=1.0, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores(), + return_seurat = TRUE +){ + + if(is.null(dims)){ + x <- Seurat::Embeddings(object, reduction = reduction) + } else { + x <- Seurat::Embeddings(object, reduction = reduction)[,dims] + } + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm=algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + if(return_seurat){ + object@misc$hdbscan <- result + object$cl <- factor(clusterer$labels_) + return(object) + } else { + return(result) + } + + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents new file mode 100644 index 0000000..3a20887 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/576874CB-contents @@ -0,0 +1,132 @@ +#' UWOT-UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose +#' +#' @return +#' @export +#' @import uwot +#' @import Seurat +#' +#' @examples +visUMAP <- function(object, + reduction = 'harmony', + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = 'cosine', + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + umap_res <- uwot::umap(embds, + spread = 1, + n_components = n_components, + min_dist = min_dist, + metric = metric, + n_threads = n_threads, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate + ) + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} + + +#' UWOT-UAMP: Clustering Specific UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose +#' +#' @return +#' @export +#' +#' @examples +clustUMAP <- function(object, + reduction = 'harmony', + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = 'cosine', + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + if(is.null(n_components)){ + n_components <- ncol(embds) + } + umap_res <- uwot::umap(embds, + spread = spread, + n_components = n_components, + min_dist = min_dist, + n_threads = n_threads, + metric = metric, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate, + verbose = verbose + ) + + rownames(umap_res) <- rownames(embds) + colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res)) + + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, + key = 'clustUMAP_', + assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents new file mode 100644 index 0000000..d6c923b --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/5D4DCD88-contents @@ -0,0 +1,181 @@ +#' Read10x v1 +#' +#' @param path +#' @param return.sce +#' +#' @return +#' @export +#' +#' @examples +read10x <- function(path, return.sce = TRUE){ + require(data.table, quietly = TRUE) + require(Matrix, quietly = TRUE) + require(SingleCellExperiment, quietly = TRUE) + fl <- dir(path) + +# reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + +# imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + +# imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V2 + + +# duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table::data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene,barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta) + } else { + return(res) + } + +} + +#' Read10x v2 +#' +#' @param filepaths +#' @param project +#' @param meta +#' +#' @return +#' @export +#' +#' @examples +read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){ + require(doParallel,quietly = TRUE) + require(foreach, quietly = TRUE) + int_list <- 1:length(filepaths) + +# checking meta data ------------------------------------------------------ + # if(is.null(meta)){ + # meta = list() + # } else if(nrow(meta)!=length(filepaths)){ + # stop('meta data needs to be the same length as filepaths') + # } else { + # meta <- as.list(meta) + # } + +# setting project vector -------------------------------------------------- + # if(length(project)!=length(filepaths)){ + # if( length(project) == 1){ + # project <- rep(project, times = length(filepaths)) + # } else { + # stop('supply either one project or a vector the same length as filepaths') + # } + # } + +# creating cluster and registering doSNOW --------------------------------- + numCores <- parallel::detectCores() -1 + cl <- snow::makeCluster(numCores) + doSNOW::registerDoSNOW(cl) + on.exit(snow::stopCluster(cl)) + e <- simpleError("error occured") + +# progress bar ------------------------------------------------------------ + iterations <- length(int_list) # used for the foreach loop + + pb <- progress::progress_bar$new( + format = ":percent item = :item [:bar] :elapsed | eta: :eta", + total = iterations, + width = floor(options()$width*0.9), + clear = TRUE + ) + + # allowing progress bar to be used in foreach ----------------------------- + + progress <- function(n) { + pb$tick(tokens = list(item = int_list[n])) # report the int_list item + } + + opts <- list(progress = progress) # used in the the foreach loop + + result <- foreach( i = 1:iterations, + .options.snow = opts, + .export = 'db_read10x', + .combine = 'cbind', + .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% { + db_read10x(path = filepaths[i]) + } + + + return(result) +} + + +#' Read10x v3 +#' +#' @param path +#' @param return.sce +#' +#' @return +#' @export +#' +#' @examples +db_read10x <- function(path, return.sce = TRUE){ + require(data.table, quietly = TRUE) + fl <- dir(path) + + # reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + + # imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + + # imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V1 + + + # duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + max_i <- max(mat$i) + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene[1:max_i],barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res)) + } else { + return(gene) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents new file mode 100644 index 0000000..3d824a5 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/60C143E1-contents @@ -0,0 +1,201 @@ +#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +#' +#' @param embedding +#' @param a +#' @param angular_rp_forest +#' @param b +#' @param force_approximation_algorithm +#' @param init +#' @param learning_rate +#' @param local_connectivity +#' @param low_memory +#' @param metric +#' @param metric_kwds +#' @param min_dist +#' @param n_components +#' @param n_epochs +#' @param n_neighbors +#' @param negative_sample_rate +#' @param output_metric +#' @param output_metric_kwds +#' @param random_state +#' @param repulsion_strength +#' @param set_op_mix_ratio +#' @param spread +#' @param target_metric +#' @param target_metric_kwds +#' @param target_n_neighbors +#' @param target_weight +#' @param transform_queue_size +#' @param transform_seed +#' @param unique +#' @param verbose +#' @param nThreads +#' +#' @return +#' @export +#' +#' @import reticulate +#' @import Seurat +#' +#' @examples +umap <- function( + embedding, + a=NULL, + angular_rp_forest=FALSE, + b=NULL, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=200, + n_neighbors=15, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1 +){ + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=as.intger(local_connectivity), + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=as.integer(target_n_neighbors), + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + return(result) +} + +umap.Seurat <- function( + object, + reduction = 'pca', + reduction_name = 'umap', + dims = NULL, + a=1.662, + angular_rp_forest=FALSE, + b=0.7905, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=100, + n_neighbors=50, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1, + return_seurat = TRUE +){ + + if(is.null(dims)){ + embedding <- Seurat::Embeddings(object, reduction = reduction) + } else { + embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims) + } + + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=local_connectivity, + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=target_n_neighbors, + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + if(return_seurat){ + object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA') + return(object) + } else { + return(result) + } +} + + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents new file mode 100644 index 0000000..9411853 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/63F56747-contents @@ -0,0 +1,53 @@ +#' Reorganize another person's mess into a usable 10X dataset +#' +#' @param x path containing the unorganized disaster +#' +#' @return +#' @export +#' @import data.table +#' +#' @examples +organize_10x <- function(x ){ + path_main <- x + file_list <- dir(path = x, full.names = T) + file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)] + + res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list), + proto = data.table::data.table(accession_id = character(), + file_type = character()))) + res$old_path <- file_list + res$old_name<- basename(file_list) + res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz' + res$new_folder <- file.path(path_main,paste0(res$accession_id)) + res$new_path <- file.path(res$new_folder, res$file_type) + + new_dirs <- unique(res$new_folder) + + length(file_list) + pb <- progress::progress_bar$new( + format = " [:bar] :percent eta: :eta", + clear = FALSE, total = length(file_list), width = 80) + + + for(i in new_dirs){ + + if(!dir.exists(i)){ + dir.create(i) + } + + f2m <- res[new_folder==i] + + for(j in 1:nrow(f2m)){ + pb$tick() + file.copy(f2m[j,old_path ],f2m[j,new_path] ) + } + + } + +for(i in file_list){ + file.remove(i) +} + +} + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents new file mode 100644 index 0000000..1768c36 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6CD58C67-contents @@ -0,0 +1,132 @@ +#' Simple method for creating Seurat Objects +#' +#' @param filepath +#' @param sample +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +create_seurat <- function(filepath, sample = NULL ){ + if(is.null(sample)){ + sample <- basename(filepath) + } + + # read in 10X data + x <- Seurat::Read10X(data.dir = filepath) + + # create unique cell ids + cell_ids <- paste0(sample, '_', colnames(x)) + colnames(x) <-cell_ids + + # create Seurat Object and include meta data + suppressWarnings({ + res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample) + }) + + return(res) +} + +#' Seurat Preprocessing +#' +#' @param object +#' @param species +#' @param nfeatures +#' @param npcs +#' +#' @return +#' @export +#' @import Seurat +#' @importFrom stringr str_to_title +#' @import crayon +#' +#' @examples +pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){ + if(species == 'Homo sapiens'){ + mt_pattern <- '^MT-' + } else { + mt_pattern <- '^mt-' + } + + object <- Seurat::PercentageFeatureSet(object, + pattern = mt_pattern, + col.name = "percent.mt") + + message_section('Filtering out low quality cells and doublets') + + # Removing low quality cells and doublets + object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100) + + message_section('Normalizing data') + # Normalization + object<- Seurat::NormalizeData(object, verbose = TRUE) + # Variable Features + + message_section(paste('Finding',nfeatures,'most variable fatures')) + object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures) + + if(species =='Mus musculus'){ # change gene name format to title capitalization + ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) + ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + } else { # use build in gene names + ccss <- cc.genes.updated.2019$s.genes + ccg2m <- cc.genes.updated.2019$g2m.genes + } + + # scoring function + object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m) + + # difference between s and g2m scores + object$CC.Difference <-object$S.Score -object$G2M.Score + + message_section('Scaling data') + # Scaling Data ---- + object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt')) + + message_section('Performing PCA') + message_append(paste('using npcs =',npcs)) + # PCA ---- + object<- Seurat::RunPCA( + object, + pc.genes =object@var.genes, + npcs = npcs) + return(object) + +} + +message_section <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + cat("\n",rep('-',n), "\n",sep = '') + cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n') +} + +message_task <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n -22){ + cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n') + } else { + cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n') + } + +} + +message_append <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n - 22){ + invisible() + } else { + cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '') + } +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents new file mode 100644 index 0000000..2f9f0fc --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6D731B61-contents @@ -0,0 +1,132 @@ +#' UWOT-UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' @import uwot +#' @import Seurat +#' +#' @examples +visUMAP <- function(object, + reduction = 'harmony', + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = 'cosine', + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + umap_res <- uwot::umap(embds, + spread = 1, + n_components = n_components, + min_dist = min_dist, + metric = metric, + n_threads = n_threads, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate + ) + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} + + +#' UWOT-UAMP: Clustering Specific UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' +#' @examples +clustUMAP <- function(object, + reduction = 'harmony', + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = 'cosine', + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + if(is.null(n_components)){ + n_components <- ncol(embds) + } + umap_res <- uwot::umap(embds, + spread = spread, + n_components = n_components, + min_dist = min_dist, + n_threads = n_threads, + metric = metric, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate, + verbose = verbose + ) + + rownames(umap_res) <- rownames(embds) + colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res)) + + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, + key = 'clustUMAP_', + assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents new file mode 100644 index 0000000..1998197 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/6EB58409-contents @@ -0,0 +1,176 @@ +#' Custom Palette for UMAP +#' +#' @param object +#' @param group_col +#' @param base_col +#' @param jitter +#' +#' @return +#' @export +#' @importFrom colortools setcolors +#' @import viridis +#' +#' @examples +#' +pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + + + if(jitter){ + new_order <- unlist(sapply(1:5, function(x) seq(x, n,5))) + } + pal <- c(pal, colortools::setColors(base_col,n))[new_order] + return(pal) +} + +#' UMAP Pallette using HCL presets +#' +#' @param object +#' @param group_col +#' @param hcl_pal +#' @param jitter +#' @param comp +#' +#' @return +#' @export +#' +#' @examples +hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order]) + return(pal) +} + +#' UMAP palette using rainbow colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp +#' +#' @return +#' @export +#' +#' @examples +rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order]) + return(pal) +} + +#' UMAP Palette using soft hues +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp +#' +#' @return +#' @export +#' +#' @importFrom colortools setcolors sequential_hcl +#' +#' @examples +hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order]) + return(pal) +} + +gg_color_hue <- function(n) { + hues = seq(15, 375, length = n + 1) + hcl(h = hues, l = 65, c = 100)[1:n] +} + +#' UMAP Palette using ggplot2 colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp +#' +#' @return +#' @export +#' +#' @examples +gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, gg_color_hue(n)[new_order]) + return(pal) +} + +hex_convert <- function(x){ + if(x>=256) stop() + tmp <- c(0:9, LETTERS[1:6]) + + first <- floor(x/16) + + first <- ifelse(first==16, 15, first) + second <- x - first*16 + res <- paste0(tmp[first+1], tmp[second+1]) + return(res) +} + +hex_convert <- Vectorize(hex_convert) + +incA <- function(n, min = 0, base = '#E1E1E1'){ + low <- hex_convert(min/100*255) + c1 <- paste0(base, low) + + res <- c(c1,paste0( + substring(viridis::plasma(n), 1, 7), + hex_convert(seq(min/100*255,255, length.out = n)))) + return(res) +} + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents new file mode 100644 index 0000000..0577bd5 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/72B9D613-contents @@ -0,0 +1,132 @@ +#' Simple method for creating Seurat Objects +#' +#' @param filepath +#' @param sample sample name to use +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +create_seurat <- function(filepath, sample = NULL ){ + if(is.null(sample)){ + sample <- basename(filepath) + } + + # read in 10X data + x <- Seurat::Read10X(data.dir = filepath) + + # create unique cell ids + cell_ids <- paste0(sample, '_', colnames(x)) + colnames(x) <-cell_ids + + # create Seurat Object and include meta data + suppressWarnings({ + res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample) + }) + + return(res) +} + +#' Seurat Preprocessing +#' +#' @param object +#' @param species +#' @param nfeatures +#' @param npcs number of principle component dimensions to calculate +#' +#' @return +#' @export +#' @import Seurat +#' @importFrom stringr str_to_title +#' @import crayon +#' +#' @examples +pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){ + if(species == 'Homo sapiens'){ + mt_pattern <- '^MT-' + } else { + mt_pattern <- '^mt-' + } + + object <- Seurat::PercentageFeatureSet(object, + pattern = mt_pattern, + col.name = "percent.mt") + + message_section('Filtering out low quality cells and doublets') + + # Removing low quality cells and doublets + object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100) + + message_section('Normalizing data') + # Normalization + object<- Seurat::NormalizeData(object, verbose = TRUE) + # Variable Features + + message_section(paste('Finding',nfeatures,'most variable fatures')) + object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures) + + if(species =='Mus musculus'){ # change gene name format to title capitalization + ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) + ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + } else { # use build in gene names + ccss <- cc.genes.updated.2019$s.genes + ccg2m <- cc.genes.updated.2019$g2m.genes + } + + # scoring function + object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m) + + # difference between s and g2m scores + object$CC.Difference <-object$S.Score -object$G2M.Score + + message_section('Scaling data') + # Scaling Data ---- + object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt')) + + message_section('Performing PCA') + message_append(paste('using npcs =',npcs)) + # PCA ---- + object<- Seurat::RunPCA( + object, + pc.genes =object@var.genes, + npcs = npcs) + return(object) + +} + +message_section <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + cat("\n",rep('-',n), "\n",sep = '') + cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n') +} + +message_task <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n -22){ + cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n') + } else { + cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n') + } + +} + +message_append <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n - 22){ + invisible() + } else { + cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '') + } +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents new file mode 100644 index 0000000..32499c2 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/74BC0378-contents @@ -0,0 +1,145 @@ +#' CellPhoneDB Summary File +#' +#' @param path the directory containing the CellPhoneDB Output +#' @param pvalue setting this will return results less than it +#' +#' @return +#' @export +#' +#' @examples +#' @import data.table +#' @import Matrix +#' @import crayon +#' +cellphonedb_summary <- function(path, pvalue = 'all'){ + means <- data.table::fread(file.path(path,'means.txt')) + pvalues <- data.table::fread(file.path(path, 'pvalues.txt')) + id.vars <- colnames(means)[1:11] + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files')))) + means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean') + pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue') + + data.table::setkeyv(means, c('cell_pair',id.vars)) + data.table::setkeyv(pvalues, c('cell_pair',id.vars)) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets')))) + result <- data.table::merge.data.table(means, pvalues) + result <- as.data.table(result) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs')))) + int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair, + data.table::data.table(gA = character(), + gB = character())) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs')))) + cell_pair <- strcapture('(.+)\\|(.+)', + result$cell_pair, + data.table::data.table(cell_a = character(), + cell_b = character())) + + result <- data.table(cell_pair, int_pairs, result) + if(pvalue=='significant'){ + result <- result[pvalue<0.05] + } + cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished')))) + + return(result) +} + + +# prep_cellphonedb <- function(rds, meta_column, path){ +# require(data.table) +# require(Seurat) +# +# object <- readRDS(rds) +# +# res <- sparse2DT.Seurat(object) +# +# new.meta <- object@meta.data[,meta_column] +# names(new.meta) <- rownames(object@meta.data) +# +# # add cell_types to res +# res[,cell_subset:=new.meta[res$Cell]] +# data.table::setkey(res, Genes, cell_subset) +# +# # generate summary information to be used for filtering uninformative genes +# test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)] +# test[,total:=sum(N),Genes] +# test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] +# +# # create vector with leftover genes +# gl <- unique(test$Genes) +# +# # subset count dataset +# res <- res[Genes %in% gl] +# +# # create counts file +# counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) +# colnames(counts)[1] <- 'Gene' +# setkey(counts, Gene) +# +# m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') +# mz_genes <- m2h$Ensembl_gene_id +# names(mz_genes) <- m2h$mouse +# new_genes <- mz_genes[counts$Gene] +# names(new_genes) <- counts$Gene +# new_genes <- new_genes[!is.na(new_genes)] +# +# dim(counts) +# counts <- counts[Gene %in% names(new_genes)] +# counts[,Gene:=new_genes[Gene]] +# +# # create meta file +# meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) +# meta <- meta[Cell %in% colnames(counts)[-1]] +# fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) +# +# fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +# } + +#' Convert a sparse matrix to a data.table +#' +#' @param matrix +#' +#' @return +#' @export +#' +#' @import data.table +#' @import Seurat +#' @import Matrix +#' +#' @examples +#' +#' +sparse2DT <- function(matrix){ + + # creating i,j,x format + mm.sum <- Matrix::summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) + return(result) +} + + + +#' Convert a sparse matrix to a data.table +#' +#' @param object +#' +#' @return +#' @export +#' @import data.table +#' @import Seurat +#' @importMatrix +#' +#' @examples +sparse2DT.Seurat <- function(object){ + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents new file mode 100644 index 0000000..ce41ba9 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/75BE8702-contents @@ -0,0 +1,201 @@ +#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +#' +#' @param embedding +#' @param a +#' @param angular_rp_forest +#' @param b +#' @param force_approximation_algorithm +#' @param init +#' @param learning_rate +#' @param local_connectivity +#' @param low_memory +#' @param metric +#' @param metric_kwds +#' @param min_dist +#' @param n_components +#' @param n_epochs +#' @param n_neighbors +#' @param negative_sample_rate +#' @param output_metric +#' @param output_metric_kwds +#' @param random_state +#' @param repulsion_strength +#' @param set_op_mix_ratio +#' @param spread +#' @param target_metric +#' @param target_metric_kwds +#' @param target_n_neighbors +#' @param target_weight +#' @param transform_queue_size +#' @param transform_seed +#' @param unique +#' @param verbose +#' @param nThreads number of parallel threads to be used +#' +#' @return +#' @export +#' +#' @import reticulate +#' @import Seurat +#' +#' @examples +umap <- function( + embedding, + a=NULL, + angular_rp_forest=FALSE, + b=NULL, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=200, + n_neighbors=15, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1 +){ + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=as.intger(local_connectivity), + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=as.integer(target_n_neighbors), + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + return(result) +} + +umap.Seurat <- function( + object, + reduction = 'pca', + reduction_name = 'umap', + dims = NULL, + a=1.662, + angular_rp_forest=FALSE, + b=0.7905, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=100, + n_neighbors=50, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1, + return_seurat = TRUE +){ + + if(is.null(dims)){ + embedding <- Seurat::Embeddings(object, reduction = reduction) + } else { + embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims) + } + + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=local_connectivity, + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=target_n_neighbors, + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + if(return_seurat){ + object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA') + return(object) + } else { + return(result) + } +} + + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents new file mode 100644 index 0000000..2f4b795 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/808A4BA4-contents @@ -0,0 +1,145 @@ +#' CellPhoneDB Summary File +#' +#' @param path +#' @param pvalue +#' +#' @return +#' @export +#' +#' @examples +#' @import data.table +#' @import Matrix +#' @import crayon +#' +cellphonedb_summary <- function(path, pvalue = 'all'){ + means <- data.table::fread(file.path(path,'means.txt')) + pvalues <- data.table::fread(file.path(path, 'pvalues.txt')) + id.vars <- colnames(means)[1:11] + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files')))) + means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean') + pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue') + + data.table::setkeyv(means, c('cell_pair',id.vars)) + data.table::setkeyv(pvalues, c('cell_pair',id.vars)) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets')))) + result <- data.table::merge.data.table(means, pvalues) + result <- as.data.table(result) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs')))) + int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair, + data.table::data.table(gA = character(), + gB = character())) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs')))) + cell_pair <- strcapture('(.+)\\|(.+)', + result$cell_pair, + data.table::data.table(cell_a = character(), + cell_b = character())) + + result <- data.table(cell_pair, int_pairs, result) + if(pvalue=='significant'){ + result <- result[pvalue<0.05] + } + cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished')))) + + return(result) +} + + +# prep_cellphonedb <- function(rds, meta_column, path){ +# require(data.table) +# require(Seurat) +# +# object <- readRDS(rds) +# +# res <- sparse2DT.Seurat(object) +# +# new.meta <- object@meta.data[,meta_column] +# names(new.meta) <- rownames(object@meta.data) +# +# # add cell_types to res +# res[,cell_subset:=new.meta[res$Cell]] +# data.table::setkey(res, Genes, cell_subset) +# +# # generate summary information to be used for filtering uninformative genes +# test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)] +# test[,total:=sum(N),Genes] +# test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] +# +# # create vector with leftover genes +# gl <- unique(test$Genes) +# +# # subset count dataset +# res <- res[Genes %in% gl] +# +# # create counts file +# counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) +# colnames(counts)[1] <- 'Gene' +# setkey(counts, Gene) +# +# m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') +# mz_genes <- m2h$Ensembl_gene_id +# names(mz_genes) <- m2h$mouse +# new_genes <- mz_genes[counts$Gene] +# names(new_genes) <- counts$Gene +# new_genes <- new_genes[!is.na(new_genes)] +# +# dim(counts) +# counts <- counts[Gene %in% names(new_genes)] +# counts[,Gene:=new_genes[Gene]] +# +# # create meta file +# meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) +# meta <- meta[Cell %in% colnames(counts)[-1]] +# fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) +# +# fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +# } + +#' Convert a sparse matrix to a data.table +#' +#' @param matrix +#' +#' @return +#' @export +#' +#' @import data.table +#' @import Seurat +#' @import Matrix +#' +#' @examples +#' +#' +sparse2DT <- function(matrix){ + + # creating i,j,x format + mm.sum <- Matrix::summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) + return(result) +} + + + +#' Convert a sparse matrix to a data.table +#' +#' @param object +#' +#' @return +#' @export +#' @import data.table +#' @import Seurat +#' @importMatrix +#' +#' @examples +sparse2DT.Seurat <- function(object){ + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents new file mode 100644 index 0000000..442d352 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/83D9C51F-contents @@ -0,0 +1,186 @@ +#' Read10x v1 +#' +#' @param path +#' @param return.sce return result as SingleCellExperiment object +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + +# reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + +# imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + +# imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V2 + + +# duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table::data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene,barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta) + } else { + return(res) + } + +} + +#' Read10x v2 +#' +#' @param filepaths +#' @param project +#' @param meta +#' +#' @return +#' @export +#' @import doParallel +#' @import foreach +#' @import doSNOW +#' @import snow +#' @import progress +#' +#' @examples +read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){ + int_list <- 1:length(filepaths) + +# checking meta data ------------------------------------------------------ + # if(is.null(meta)){ + # meta = list() + # } else if(nrow(meta)!=length(filepaths)){ + # stop('meta data needs to be the same length as filepaths') + # } else { + # meta <- as.list(meta) + # } + +# setting project vector -------------------------------------------------- + # if(length(project)!=length(filepaths)){ + # if( length(project) == 1){ + # project <- rep(project, times = length(filepaths)) + # } else { + # stop('supply either one project or a vector the same length as filepaths') + # } + # } + +# creating cluster and registering doSNOW --------------------------------- + numCores <- parallel::detectCores() -1 + cl <- snow::makeCluster(numCores) + doSNOW::registerDoSNOW(cl) + on.exit(snow::stopCluster(cl)) + e <- simpleError("error occured") + +# progress bar ------------------------------------------------------------ + iterations <- length(int_list) # used for the foreach loop + + pb <- progress::progress_bar$new( + format = ":percent item = :item [:bar] :elapsed | eta: :eta", + total = iterations, + width = floor(options()$width*0.9), + clear = TRUE + ) + + # allowing progress bar to be used in foreach ----------------------------- + + progress <- function(n) { + pb$tick(tokens = list(item = int_list[n])) # report the int_list item + } + + opts <- list(progress = progress) # used in the the foreach loop + + result <- foreach::foreach( i = 1:iterations, + .options.snow = opts, + .export = 'db_read10x', + .combine = 'cbind', + .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% { + db_read10x(path = filepaths[i]) + } + + + return(result) +} + + +#' Read10x v3 +#' +#' @param path +#' @param return.sce +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +db_read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + + # reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + + # imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + + # imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V1 + + + # duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + max_i <- max(mat$i) + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene[1:max_i],barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res)) + } else { + return(gene) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents new file mode 100644 index 0000000..a9f80ff --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/84A0FD70-contents @@ -0,0 +1,186 @@ +#' Read10x v1 +#' +#' @param path +#' @param return.sce return result as SingleCellExperiment object +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + +# reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + +# imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + +# imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V2 + + +# duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table::data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene,barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta) + } else { + return(res) + } + +} + +#' Read10x v2 +#' +#' @param filepaths +#' @param project +#' @param meta meta data to include with the various datasets +#' +#' @return +#' @export +#' @import doParallel +#' @import foreach +#' @import doSNOW +#' @import snow +#' @import progress +#' +#' @examples +read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){ + int_list <- 1:length(filepaths) + +# checking meta data ------------------------------------------------------ + # if(is.null(meta)){ + # meta = list() + # } else if(nrow(meta)!=length(filepaths)){ + # stop('meta data needs to be the same length as filepaths') + # } else { + # meta <- as.list(meta) + # } + +# setting project vector -------------------------------------------------- + # if(length(project)!=length(filepaths)){ + # if( length(project) == 1){ + # project <- rep(project, times = length(filepaths)) + # } else { + # stop('supply either one project or a vector the same length as filepaths') + # } + # } + +# creating cluster and registering doSNOW --------------------------------- + numCores <- parallel::detectCores() -1 + cl <- snow::makeCluster(numCores) + doSNOW::registerDoSNOW(cl) + on.exit(snow::stopCluster(cl)) + e <- simpleError("error occured") + +# progress bar ------------------------------------------------------------ + iterations <- length(int_list) # used for the foreach loop + + pb <- progress::progress_bar$new( + format = ":percent item = :item [:bar] :elapsed | eta: :eta", + total = iterations, + width = floor(options()$width*0.9), + clear = TRUE + ) + + # allowing progress bar to be used in foreach ----------------------------- + + progress <- function(n) { + pb$tick(tokens = list(item = int_list[n])) # report the int_list item + } + + opts <- list(progress = progress) # used in the the foreach loop + + result <- foreach::foreach( i = 1:iterations, + .options.snow = opts, + .export = 'db_read10x', + .combine = 'cbind', + .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% { + db_read10x(path = filepaths[i]) + } + + + return(result) +} + + +#' Read10x v3 +#' +#' @param path +#' @param return.sce return result as SingleCellExperiment object +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +db_read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + + # reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + + # imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + + # imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V1 + + + # duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + max_i <- max(mat$i) + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene[1:max_i],barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res)) + } else { + return(gene) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents new file mode 100644 index 0000000..cdcf33e --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/8A4178A2-contents @@ -0,0 +1,358 @@ +library(logger) +# log_info('Starting the script...') +# log_debug('This is the second log line') +# log_trace('Note that the 2nd line is being placed right after the 1st one.') +# log_success('Doing pretty well so far!') +# log_warn('But beware, as some errors might come :/') +# log_error('This is a problem') +# log_debug('Note that getting an error is usually bad') +# log_error('This is another problem') +# log_fatal('The last problem') + +log_layout(layout_glue_colors) +log_threshold(TRACE) + +log_info('Start of Script.') + +log_info('Creating Seurat Objects for Each Tissue') +# Creating Seurat Objects for Each Tissue ---- +library(Seurat) +f <- function(y, project){ + x <- CreateSeuratObject(Read10X(y), + project = project) + return(x) +} + +log_trace('..... Omental Macrophages') +# Omental Macrophages +path.OmMac <- list.dirs('/rdx/db/E-MTAB/E-MTAB-8593.processed.1', + recursive = F) + +tmp <- lapply(path.OmMac, function(x) f(x, project = 'OmMac')) +OmMac <- merge(x = tmp[[1]], + y = c(tmp[[2]], tmp[[3]], tmp[[4]]), + add.cell.ids = paste0('rep',1:4), + project = "OmMac") +log_trace('..... Omental Stroma') +# Omental Stroma +path.Stroma <- '/rdx/db/scRNA/GSE136636_RAW/' +Stroma <- CreateSeuratObject(counts = Read10X(path.Stroma), + project = 'Stroma') + +log_trace('..... Peritoneal Cavity') + +# Peritoneal Cavity +path.PerC <- c("/rdx/db/scRNA/GSE121521_RAW/GSM3438655_Beclin_lysM", + '/rdx/db/scRNA/GSE121521_RAW/GSM3438654_Beclin_flox', + '/rdx/db/scRNA/GSE124562_RAW/GSM3536577_wt', + '/rdx/db/scRNA/GSE124562_RAW/GSM3536578_ko' +) + +PerC_series.list <- c('GSE121521','GSE124562') +PerC_sample.list <- c('GSM3438655','GSM3438654','GSM3536577','GSM3536578') + +tmp <- lapply(path.PerC, function(x) f(x, project = 'PerC')) +PerC <- merge( x= tmp[[1]], + y = tmp[-1], + add.cell.ids = PerC_sample.list, + project = 'PerC' +) + + +PerC_lengths <- sapply(tmp, ncol) + +log_trace('..... Inflammatory Fibroblasts') +# Inflammatory Fibroblasts +path.fibro <- '/rdx/db/scRNA/GSE129087/' +Fibroblasts <- CreateSeuratObject(counts = Read10X(path.fibro), + project = 'Fibroblasts') + +log_trace('..... Inflammatory Endothelium') +# Inflammatory Endothelium +path.Endo <- c("/rdx/db/E-MTAB/E-MTAB-7149/result/outs/filtered_feature_bc_matrix/") +Endothelium <- CreateSeuratObject(counts = Read10X(path.Endo), + project = 'Endothelium') + +log_trace('..... Stromal Vascular Cells from adipose tissue') +# Stromal Vascular Cells from adipose tissue +path.SVC <- c("/rdx/db/scRNA/GSE128890_RAW/GSM3717977_SCmurinep12/", + '/rdx/db/scRNA/GSE128890_RAW/GSM3717978_SCmurineAdult/', + '/rdx/db/scRNA/E-MTAB-6677/data/' +) + +SVC_series.list <- c('GSE128890','E.MTAB.6677') +SVC_sample.list <- c('GSM3717977','GSM3717978','E-MTAB-6677') + + + + +tmp <- lapply(path.SVC, function(x) f(x, project = 'SVC')) +SVC <- merge( x= tmp[[1]], + y = tmp[-1], + add.cell.ids = SVC_sample.list, + project = 'SVC' +) + + +SVC_lengths <- sapply(tmp, ncol) + +log_success('Created Seurat Objects') + +log_info('Merging all into one dataset') +# Merge into One Dataset ---- + +PerNiche <- merge( + x = OmMac, + y = c(Stroma, PerC, Endothelium, Fibroblasts,SVC), + add.cell.ids = c('OmMac', 'Stroma', 'PerC', 'Endothelium','Stroma','SVC'), + project = 'PerNiche' +) + +log_success('PerNiche object created') + +log_info('Adding Meta data') +# Add Meta-data ---- +PerNiche@meta.data$tissue <- c( + rep('Omentum', ncol(OmMac)+ncol(Stroma)), + rep('Peritoneal Cavity', ncol(PerC)), + rep('Aorta Endothelium', ncol(Endothelium)), + rep('Synovial Stroma', ncol(Fibroblasts)), + rep('Stromal Vascular Cells', ncol(SVC)) +) + +PerNiche@meta.data$type <- c( + rep('Macrophage', ncol(OmMac)), + rep('Stromal', ncol(Stroma)), + rep('PerC-Cell', ncol(PerC)), + rep('Endothlium', ncol(Endothelium)), + rep('Stromal', ncol(Fibroblasts)), + rep('SVC', ncol(SVC)) +) + + +PerNiche@meta.data$dataset <- c( + rep('E.MTAB.8593', ncol(OmMac)), + rep('GSE136636', ncol(Stroma)), + unlist(mapply(rep, PerC_series.list, c(sum(PerC_lengths[1:2]), sum(PerC_lengths[3:4])))), + rep('E.MTAB.7149', ncol(Endothelium)), + rep('GSE129087', ncol(Fibroblasts)), + unlist(mapply(rep, SVC_series.list, c(sum(SVC_lengths[1:2]), sum(SVC_lengths[3])))) +) + + +# Cleanup ---- +rm(list = c('OmMac','Stroma','PerC', 'Endothelium','Fibroblasts', + 'path.PerC','path.OmMac','path.Stroma','path.fibro','path.Endo', + 'tmp', 'SVC','path.SVC' )) + + +log_info('Seurat Preprocessing') +# Seurat Preprocessing ---- + + +PerNiche <- PercentageFeatureSet(PerNiche, + pattern = "^mt-", + col.name = "percent.mt") + +PerNiche <- PercentageFeatureSet(PerNiche, + pattern = "^Rp[sl]", + col.name = "percent.ribo") + + +low <- 200 +high <- 5700 +ribo <- 3.5 +mito <- 20 + +library(data.table) + +tc <- length(Cells(PerNiche)) + +#start with cells with many genes detected. +high.det <- WhichCells(PerNiche, expression = nFeature_RNA < high) + +#start with cells with many genes detected. +low.det<- WhichCells(PerNiche, expression = nFeature_RNA > low ) + +# Mito/Ribo filtering +selected.mt <- WhichCells(PerNiche, expression = percent.mt < mito) +selected.ribo <- WhichCells(PerNiche, expression = percent.ribo > ribo) + +log_trace('..... removing high expressing cells') +# remove these cells +PerNiche <- subset(PerNiche, + cells=high.det) + +log_trace('..... removing low expressing low') +# remove these cells +PerNiche <- subset(PerNiche, + cells=low.det) + +log_trace('.....removing cells with >20% MT genes') +# and subset the object to only keep those cells +PerNiche <- subset(PerNiche, cells = selected.mt) + +log_trace('.....removing cells with <3.5% Rp genes') +PerNiche <- subset(PerNiche, cells = selected.ribo) + +log_success('Finished Filtering') + +log_info('Normalization') + +# Normalization ---- +PerNiche <- NormalizeData( + PerNiche, + verbose = TRUE +) + +log_success('') + +log_info('Finding 4000 Variable Features') +# Variable Features ---- +PerNiche <- FindVariableFeatures( + PerNiche, + selection.method = "vst", + nfeatures = 4000 +) + +log_success('') + +log_info('Determining Cell Cycle Score') + +ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) +ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + +PerNiche <- CellCycleScoring(PerNiche, + s.features = ccss, + g2m.features = ccg2m) + +PerNiche$CC.Difference <- PerNiche$S.Score - PerNiche$G2m.Score + +log_info('Scaling Data') +# Scaling Data ---- +PerNiche <- ScaleData( + PerNiche, + vars.to.regress = 'CC.Difference', + verbose = TRUE +) + +log_success('') + +log_info('Performing PCA using 100npcs') +# PCA ---- +PerNiche <- RunPCA( + PerNiche, + pc.genes = PerNiche@var.genes, + npcs = 100, + verbose = TRUE +) + +log_success('') + +log_info('Harmonizing datasets') +# Harmonizing the Integrated Datasets ---- + +library(harmony) + +PerNiche <- RunHarmony( + PerNiche,group.by.vars = 'dataset', + # sigma = 0.1, # width of soft kmeans clusters, + # theta = 8, # encourages diverse clusters + nclust = 50, # number of clusters in model, + max.iter.cluster = 100, + max.iter.harmony = 100 +) + +log_success('') + +log_info('Dimensional Reduction Post-Harmonizing') +# Dimensional Reduction Post-Harmonizing ---- + +log_trace('.......UMAP') +# UMAP +PerNiche <- RunUMAP(PerNiche, + reduction = "harmony", + dims = 1:100) + + +log_trace('.......TSNE') + +#t-SNE (currently using FIt-SNE Method) +# PerNiche <- RunTSNE(PerNiche, reduction = "harmony", dims = 1:20, tsne.method = "fftRtsne") +learningRate <- dim(PerNiche)[2]/12 +Perplexity <- dim(PerNiche)[2]/100 + +createINITmatrix <- function(x, reduction = 'pca', dims = 1:2){ + res <- as.matrix(Seurat::Embeddings(x, reduction = reduction))[,dims] + res <- (res/sd(res[,1]))*0.0001 + return(res) +} + +init.mat <- createINITmatrix(PerNiche, reduction = 'harmony',dims = 1:100) + +PerNiche <- RunTSNE(PerNiche, + reduction = "harmony", + dims = 1:100, + tsne.method = "FIt-SNE", + fast_tsne_path = '/rdx/software/FIt-SNE-1.1.0/bin/fast_tsne', + perplexity = Perplexity, + learning_rate = learningRate, + k = 10, + late_exag_coeff = 4, + initialization = init.mat +) + +log_success('Finished with dimension reduction') + +log_warn('Saving current results to tmpPerniche.rds') +saveRDS(PerNiche,'tmpPerniche.rds') +log_success('') + +log_info('Finding Neighbors') +# Finding Neighbors ---- +PerNiche <- FindNeighbors(PerNiche, + nn.method = 'annoy', + reduction = "harmony", + annoy.metric = 'cosine', + force.recalc = TRUE, + dims = 1:100) + +log_info('Finding Clusters') +# Finding Clusters ---- +PerNiche <- FindClusters(PerNiche, + algorithm = 3, + resolution = 1, + method = 'igraph', + n.start = 30, + n.iter = 100, + verbose = TRUE + ) + +PerNiche <- identity(PerNiche) + +png('~/Desktop/PerNiche-umap.png', height = 10, width = 10, res = 300, units = 'in') +DimPlot(PerNiche, reduction = 'umap', repel = TRUE, label = TRUE, order = TRUE)+NoLegend() +dev.off() + +png('~/Desktop/PerNiche-tsne.png', height = 10, width = 10, res = 300, units = 'in') +DimPlot(PerNiche, reduction = 'tsne', repel = TRUE,label = TRUE, order = TRUE)+NoLegend() +dev.off() + +log_info('Indentifying Cluster Markers') +# Identifying Cluster Markers --- + +cluster.markers <- FindAllMarkers( + PerNiche, + logfc.threshold = 0.5, + verbose = TRUE +) + +library(data.table) +setDT(cluster.markers) +feat_cols <- c('lightgrey',viridis::plasma(5)) # best color palette for FeaturePlot + +log_info('Saving Results as .rds file') +# Saving Results as .rds file ---- +saveRDS(list(PerNiche = PerNiche, markers = cluster.markers, feat_cols = feat_cols), '/rdx/projects/QE/data/20200519_PerNiche_01.rds') + +log_success('Enjoy your spoils') diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents new file mode 100644 index 0000000..a9dd400 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/9CB716C9-contents @@ -0,0 +1,132 @@ +#' UWOT-UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose +#' +#' @return +#' @export +#' @import uwot +#' @import Seurat +#' +#' @examples +visUMAP <- function(object, + reduction = 'harmony', + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = 'cosine', + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + umap_res <- uwot::umap(embds, + spread = 1, + n_components = n_components, + min_dist = min_dist, + metric = metric, + n_threads = n_threads, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate + ) + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} + + +#' UWOT-UAMP: Clustering Specific UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' +#' @examples +clustUMAP <- function(object, + reduction = 'harmony', + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = 'cosine', + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + if(is.null(n_components)){ + n_components <- ncol(embds) + } + umap_res <- uwot::umap(embds, + spread = spread, + n_components = n_components, + min_dist = min_dist, + n_threads = n_threads, + metric = metric, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate, + verbose = verbose + ) + + rownames(umap_res) <- rownames(embds) + colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res)) + + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, + key = 'clustUMAP_', + assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3 b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3 new file mode 100644 index 0000000..1ddfb65 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3 @@ -0,0 +1,21 @@ +{ + "id": "A102BAE3", + "path": "/rdx/projects/dbsinglecell/DESCRIPTION", + "project_path": "DESCRIPTION", + "type": "dcf", + "hash": "3089006847", + "contents": "", + "dirty": false, + "created": 1601696304924.0, + "source_on_save": false, + "relative_order": 5, + "properties": {}, + "folds": "", + "lastKnownWriteTime": 1601700213, + "encoding": "UTF-8", + "collab_server": "", + "source_window": "", + "last_content_update": 1601700213309, + "read_only": false, + "read_only_alternatives": [] +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents new file mode 100644 index 0000000..8cddfb3 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/A102BAE3-contents @@ -0,0 +1,34 @@ +Package: dbsinglecell +Type: Package +Title: Dewey Brooke's Single-Cell Toolkit +Version: 0.1 +Date: 2019-06-21 +Authors@R: c( + person("Dewey", "Brooke", , "dbrooke@uab.edu", role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-4290-3809") + ) + ) +Maintainer: Dewey Brooke +Description: A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk. +License: GPL (>= 2) +Imports: + Rcpp (>= 1.0.5), + Matrix, + data.table, + doParallel, + foreach, + progress, + Seurat, + colorspace, + crayon, + snow, + doSNOW, + colortools, + stringr, + uwot, + viridis, + SingleCellExperiment, + reticulate +LinkingTo: Rcpp +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.1 diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents new file mode 100644 index 0000000..ce41ba9 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/B8A3D5D2-contents @@ -0,0 +1,201 @@ +#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +#' +#' @param embedding +#' @param a +#' @param angular_rp_forest +#' @param b +#' @param force_approximation_algorithm +#' @param init +#' @param learning_rate +#' @param local_connectivity +#' @param low_memory +#' @param metric +#' @param metric_kwds +#' @param min_dist +#' @param n_components +#' @param n_epochs +#' @param n_neighbors +#' @param negative_sample_rate +#' @param output_metric +#' @param output_metric_kwds +#' @param random_state +#' @param repulsion_strength +#' @param set_op_mix_ratio +#' @param spread +#' @param target_metric +#' @param target_metric_kwds +#' @param target_n_neighbors +#' @param target_weight +#' @param transform_queue_size +#' @param transform_seed +#' @param unique +#' @param verbose +#' @param nThreads number of parallel threads to be used +#' +#' @return +#' @export +#' +#' @import reticulate +#' @import Seurat +#' +#' @examples +umap <- function( + embedding, + a=NULL, + angular_rp_forest=FALSE, + b=NULL, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=200, + n_neighbors=15, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1 +){ + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=as.intger(local_connectivity), + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=as.integer(target_n_neighbors), + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + return(result) +} + +umap.Seurat <- function( + object, + reduction = 'pca', + reduction_name = 'umap', + dims = NULL, + a=1.662, + angular_rp_forest=FALSE, + b=0.7905, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=100, + n_neighbors=50, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1, + return_seurat = TRUE +){ + + if(is.null(dims)){ + embedding <- Seurat::Embeddings(object, reduction = reduction) + } else { + embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims) + } + + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=local_connectivity, + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=target_n_neighbors, + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + if(return_seurat){ + object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA') + return(object) + } else { + return(result) + } +} + + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents new file mode 100644 index 0000000..1768c36 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/C7D2B89B-contents @@ -0,0 +1,132 @@ +#' Simple method for creating Seurat Objects +#' +#' @param filepath +#' @param sample +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +create_seurat <- function(filepath, sample = NULL ){ + if(is.null(sample)){ + sample <- basename(filepath) + } + + # read in 10X data + x <- Seurat::Read10X(data.dir = filepath) + + # create unique cell ids + cell_ids <- paste0(sample, '_', colnames(x)) + colnames(x) <-cell_ids + + # create Seurat Object and include meta data + suppressWarnings({ + res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample) + }) + + return(res) +} + +#' Seurat Preprocessing +#' +#' @param object +#' @param species +#' @param nfeatures +#' @param npcs +#' +#' @return +#' @export +#' @import Seurat +#' @importFrom stringr str_to_title +#' @import crayon +#' +#' @examples +pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){ + if(species == 'Homo sapiens'){ + mt_pattern <- '^MT-' + } else { + mt_pattern <- '^mt-' + } + + object <- Seurat::PercentageFeatureSet(object, + pattern = mt_pattern, + col.name = "percent.mt") + + message_section('Filtering out low quality cells and doublets') + + # Removing low quality cells and doublets + object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100) + + message_section('Normalizing data') + # Normalization + object<- Seurat::NormalizeData(object, verbose = TRUE) + # Variable Features + + message_section(paste('Finding',nfeatures,'most variable fatures')) + object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures) + + if(species =='Mus musculus'){ # change gene name format to title capitalization + ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) + ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + } else { # use build in gene names + ccss <- cc.genes.updated.2019$s.genes + ccg2m <- cc.genes.updated.2019$g2m.genes + } + + # scoring function + object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m) + + # difference between s and g2m scores + object$CC.Difference <-object$S.Score -object$G2M.Score + + message_section('Scaling data') + # Scaling Data ---- + object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt')) + + message_section('Performing PCA') + message_append(paste('using npcs =',npcs)) + # PCA ---- + object<- Seurat::RunPCA( + object, + pc.genes =object@var.genes, + npcs = npcs) + return(object) + +} + +message_section <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + cat("\n",rep('-',n), "\n",sep = '') + cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n') +} + +message_task <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n -22){ + cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n') + } else { + cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n') + } + +} + +message_append <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n - 22){ + invisible() + } else { + cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '') + } +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents new file mode 100644 index 0000000..9e9e7ea --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5280646-contents @@ -0,0 +1,17 @@ +#' Easy Add Meta data to Seurat Object +#' +#' @param object +#' @param meta +#' @param col.name name of the column for the new meta data +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +NewMeta <- function(object, meta, col.name){ + test <- meta[as.character(Seurat::Idents(object))] + names(test) <- colnames(object) + result <- Seurat::AddMetaData(object, test, col.name) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents new file mode 100644 index 0000000..3d824a5 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/D5A58BA0-contents @@ -0,0 +1,201 @@ +#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +#' +#' @param embedding +#' @param a +#' @param angular_rp_forest +#' @param b +#' @param force_approximation_algorithm +#' @param init +#' @param learning_rate +#' @param local_connectivity +#' @param low_memory +#' @param metric +#' @param metric_kwds +#' @param min_dist +#' @param n_components +#' @param n_epochs +#' @param n_neighbors +#' @param negative_sample_rate +#' @param output_metric +#' @param output_metric_kwds +#' @param random_state +#' @param repulsion_strength +#' @param set_op_mix_ratio +#' @param spread +#' @param target_metric +#' @param target_metric_kwds +#' @param target_n_neighbors +#' @param target_weight +#' @param transform_queue_size +#' @param transform_seed +#' @param unique +#' @param verbose +#' @param nThreads +#' +#' @return +#' @export +#' +#' @import reticulate +#' @import Seurat +#' +#' @examples +umap <- function( + embedding, + a=NULL, + angular_rp_forest=FALSE, + b=NULL, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=200, + n_neighbors=15, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1 +){ + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=as.intger(local_connectivity), + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=as.integer(target_n_neighbors), + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + return(result) +} + +umap.Seurat <- function( + object, + reduction = 'pca', + reduction_name = 'umap', + dims = NULL, + a=1.662, + angular_rp_forest=FALSE, + b=0.7905, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=100, + n_neighbors=50, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1, + return_seurat = TRUE +){ + + if(is.null(dims)){ + embedding <- Seurat::Embeddings(object, reduction = reduction) + } else { + embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims) + } + + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=local_connectivity, + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=target_n_neighbors, + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + if(return_seurat){ + object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA') + return(object) + } else { + return(result) + } +} + + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents new file mode 100644 index 0000000..aca7d14 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DAA1DF4E-contents @@ -0,0 +1,156 @@ +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param x +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param prediction_data +#' +#' @return +#' @export +#' +#' @import reticulate +#' +#' @examples +HDBSCAN <- function(x, + algorithm='best', + alpha=1.0, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + prediction_data=TRUE, + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores() +){ + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm = algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + + + + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + return(result) +} + + + +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param object +#' @param reduction +#' @param dims +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param return_seurat logical to return the result within the orignal object or as the raw HDBSCAN result +#' @param prediction_data +#' +#' @return +#' @export +#' +#' @examples +HDBSCAN.Seurat <- function(object, + reduction = 'umap', + dims = NULL, + algorithm='best', + alpha=1.0, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores(), + return_seurat = TRUE +){ + + if(is.null(dims)){ + x <- Seurat::Embeddings(object, reduction = reduction) + } else { + x <- Seurat::Embeddings(object, reduction = reduction)[,dims] + } + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm=algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + if(return_seurat){ + object@misc$hdbscan <- result + object$cl <- factor(clusterer$labels_) + return(object) + } else { + return(result) + } + + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents new file mode 100644 index 0000000..2f9f0fc --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/DDAA5EEA-contents @@ -0,0 +1,132 @@ +#' UWOT-UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' @import uwot +#' @import Seurat +#' +#' @examples +visUMAP <- function(object, + reduction = 'harmony', + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = 'cosine', + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + umap_res <- uwot::umap(embds, + spread = 1, + n_components = n_components, + min_dist = min_dist, + metric = metric, + n_threads = n_threads, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate + ) + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} + + +#' UWOT-UAMP: Clustering Specific UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' +#' @examples +clustUMAP <- function(object, + reduction = 'harmony', + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = 'cosine', + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + if(is.null(n_components)){ + n_components <- ncol(embds) + } + umap_res <- uwot::umap(embds, + spread = spread, + n_components = n_components, + min_dist = min_dist, + n_threads = n_threads, + metric = metric, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate, + verbose = verbose + ) + + rownames(umap_res) <- rownames(embds) + colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res)) + + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, + key = 'clustUMAP_', + assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents new file mode 100644 index 0000000..8801c0f --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/E0F84C30-contents @@ -0,0 +1,145 @@ +#' CellPhoneDB Summary File +#' +#' @param path the directory containing the CellPhoneDB Output +#' @param pvalue setting this will return results less than it +#' +#' @return +#' @export +#' +#' @examples +#' @import data.table +#' @import Matrix +#' @import crayon +#' +cellphonedb_summary <- function(path, pvalue = 'all'){ + means <- data.table::fread(file.path(path,'means.txt')) + pvalues <- data.table::fread(file.path(path, 'pvalues.txt')) + id.vars <- colnames(means)[1:11] + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files')))) + means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean') + pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue') + + data.table::setkeyv(means, c('cell_pair',id.vars)) + data.table::setkeyv(pvalues, c('cell_pair',id.vars)) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets')))) + result <- data.table::merge.data.table(means, pvalues) + result <- as.data.table(result) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs')))) + int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair, + data.table::data.table(gA = character(), + gB = character())) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs')))) + cell_pair <- strcapture('(.+)\\|(.+)', + result$cell_pair, + data.table::data.table(cell_a = character(), + cell_b = character())) + + result <- data.table(cell_pair, int_pairs, result) + if(pvalue=='significant'){ + result <- result[pvalue<0.05] + } + cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished')))) + + return(result) +} + + +# prep_cellphonedb <- function(rds, meta_column, path){ +# require(data.table) +# require(Seurat) +# +# object <- readRDS(rds) +# +# res <- sparse2DT.Seurat(object) +# +# new.meta <- object@meta.data[,meta_column] +# names(new.meta) <- rownames(object@meta.data) +# +# # add cell_types to res +# res[,cell_subset:=new.meta[res$Cell]] +# data.table::setkey(res, Genes, cell_subset) +# +# # generate summary information to be used for filtering uninformative genes +# test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)] +# test[,total:=sum(N),Genes] +# test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] +# +# # create vector with leftover genes +# gl <- unique(test$Genes) +# +# # subset count dataset +# res <- res[Genes %in% gl] +# +# # create counts file +# counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) +# colnames(counts)[1] <- 'Gene' +# setkey(counts, Gene) +# +# m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') +# mz_genes <- m2h$Ensembl_gene_id +# names(mz_genes) <- m2h$mouse +# new_genes <- mz_genes[counts$Gene] +# names(new_genes) <- counts$Gene +# new_genes <- new_genes[!is.na(new_genes)] +# +# dim(counts) +# counts <- counts[Gene %in% names(new_genes)] +# counts[,Gene:=new_genes[Gene]] +# +# # create meta file +# meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) +# meta <- meta[Cell %in% colnames(counts)[-1]] +# fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) +# +# fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +# } + +#' Convert a sparse matrix to a data.table +#' +#' @param matrix +#' +#' @return +#' @export +#' +#' @import data.table +#' @import Seurat +#' @import Matrix +#' +#' @examples +#' +#' +sparse2DT <- function(matrix){ + + # creating i,j,x format + mm.sum <- Matrix::summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) + return(result) +} + + + +#' Convert a sparse matrix to a data.table +#' +#' @param object Seurat object +#' +#' @return +#' @export +#' @import data.table +#' @import Seurat +#' @importMatrix +#' +#' @examples +sparse2DT.Seurat <- function(object){ + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents new file mode 100644 index 0000000..711ea0d --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/F63B8057-contents @@ -0,0 +1,176 @@ +#' Custom Palette for UMAP +#' +#' @param object +#' @param group_col +#' @param base_col +#' @param jitter randomize the colors +#' +#' @return +#' @export +#' @importFrom colortools setColors +#' @import viridis +#' +#' @examples +#' +pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + + + if(jitter){ + new_order <- unlist(sapply(1:5, function(x) seq(x, n,5))) + } + pal <- c(pal, colortools::setColors(base_col,n))[new_order] + return(pal) +} + +#' UMAP Palette using HCL presets +#' +#' @param object +#' @param group_col +#' @param hcl_pal +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order]) + return(pal) +} + +#' UMAP palette using rainbow colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order]) + return(pal) +} + +#' UMAP Palette using soft hues +#' +#' @param object +#' @param group_col +#' @param jitter integer setting the color complementary to be used +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @import colorspace +#' +#' @examples +hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order]) + return(pal) +} + +gg_color_hue <- function(n) { + hues = seq(15, 375, length = n + 1) + hcl(h = hues, l = 65, c = 100)[1:n] +} + +#' UMAP Palette using ggplot2 colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementarity to be used +#' +#' @return +#' @export +#' +#' @examples +gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, gg_color_hue(n)[new_order]) + return(pal) +} + +hex_convert <- function(x){ + if(x>=256) stop() + tmp <- c(0:9, LETTERS[1:6]) + + first <- floor(x/16) + + first <- ifelse(first==16, 15, first) + second <- x - first*16 + res <- paste0(tmp[first+1], tmp[second+1]) + return(res) +} + +hex_convert <- Vectorize(hex_convert) + +incA <- function(n, min = 0, base = '#E1E1E1'){ + low <- hex_convert(min/100*255) + c1 <- paste0(base, low) + + res <- c(c1,paste0( + substring(viridis::plasma(n), 1, 7), + hex_convert(seq(min/100*255,255, length.out = n)))) + return(res) +} + + diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8 b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8 new file mode 100644 index 0000000..3273cb0 --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8 @@ -0,0 +1,21 @@ +{ + "id": "FA66A4D8", + "path": "/rdx/projects/dbsinglecell/README.md", + "project_path": "README.md", + "type": "markdown", + "hash": "3239581323", + "contents": "", + "dirty": false, + "created": 1601700425517.0, + "source_on_save": false, + "relative_order": 7, + "properties": {}, + "folds": "", + "lastKnownWriteTime": 1601700650, + "encoding": "UTF-8", + "collab_server": "", + "source_window": "", + "last_content_update": 1601700650154, + "read_only": false, + "read_only_alternatives": [] +} \ No newline at end of file diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents new file mode 100644 index 0000000..1ce2d6b --- /dev/null +++ b/.Rproj.user/4F102347/sources/s-2CBFA7B6/FA66A4D8-contents @@ -0,0 +1,23 @@ + +# dbsinglecell + + + + +A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk. + +## Installation + +If you so desire to use this package, install by using + +``` r +remotes::install_github("dbrookeUAB/dbsinglecell") +``` + +To use `HDSCAN` or `umap` functions, you must install their respective python libraries by + +``` r +library(dbsinglecell) +install_python_packages() +``` +Cheers! diff --git a/.Rproj.user/4F102347/sources/s-2CBFA7B6/lock_file b/.Rproj.user/4F102347/sources/s-2CBFA7B6/lock_file new file mode 100644 index 0000000..e69de29 diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names new file mode 100644 index 0000000..e69de29 diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths new file mode 100644 index 0000000..7f2a1d0 --- /dev/null +++ b/.Rproj.user/shared/notebooks/paths @@ -0,0 +1,31 @@ +/rdx/projects/GeCKO/.travis.yml="36C21FFA" +/rdx/projects/GeCKO/DESCRIPTION="ADFA58AB" +/rdx/projects/GeCKO/NAMESPACE="00A958B8" +/rdx/projects/GeCKO/_pkgdown.yml="F63E4187" +/rdx/projects/PerNiche/DESCRIPTION="8831743E" +/rdx/projects/PerNiche/R/NewMeta.R="7F2B2DF3" +/rdx/projects/PerNiche/R/db_read10x.R="1190B6E6" +/rdx/projects/PerNiche/R/hdbscan.R="77499622" +/rdx/projects/PerNiche/R/plot_umap.R="2AA3DD28" +/rdx/projects/PerNiche/R/scRNA_helpers.R="C0D72FE7" +/rdx/projects/PerNiche/R/umap-learn.R="CC28AC64" +/rdx/projects/PerNiche/R/umap.R="15B8B434" +/rdx/projects/QE/R/CellPhoneDB.R="E718568E" +/rdx/projects/QE/R/PerNiche/02_dimReduction.R="7886EAB7" +/rdx/projects/QE/R/PerNiche/complete2.R="5E4D9938" +/rdx/projects/QE/docs/PerNiche.Rmd="2744004E" +/rdx/projects/dbsinglecell/DESCRIPTION="21C89D3A" +/rdx/projects/dbsinglecell/NAMESPACE="C917BDCD" +/rdx/projects/dbsinglecell/R/NewMeta.R="99B6A7BE" +/rdx/projects/dbsinglecell/R/cellphonedb_utilities.R="5E6CFB65" +/rdx/projects/dbsinglecell/R/db_read10x.R="42CB8524" +/rdx/projects/dbsinglecell/R/hdbscan.R="B49E6C2B" +/rdx/projects/dbsinglecell/R/organize_10x.R="3CC32046" +/rdx/projects/dbsinglecell/R/plot_umap.R="A99ACBE4" +/rdx/projects/dbsinglecell/R/reticulate_helpers.R="E0690E3C" +/rdx/projects/dbsinglecell/R/scRNA_helpers.R="6AC0283F" +/rdx/projects/dbsinglecell/R/umap-learn.R="B3715FD8" +/rdx/projects/dbsinglecell/R/umap.R="9EF542D3" +/rdx/projects/dbsinglecell/Read-and-delete-me="7C689967" +/rdx/projects/dbsinglecell/_pkgdown.yml="F5EBAB80" +/rdx/projects/dbsinglecell/man/cellphonedb_summary.Rd="5390F419" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..99b91e9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +docs +docs/ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..8cddfb3 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,34 @@ +Package: dbsinglecell +Type: Package +Title: Dewey Brooke's Single-Cell Toolkit +Version: 0.1 +Date: 2019-06-21 +Authors@R: c( + person("Dewey", "Brooke", , "dbrooke@uab.edu", role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-4290-3809") + ) + ) +Maintainer: Dewey Brooke +Description: A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk. +License: GPL (>= 2) +Imports: + Rcpp (>= 1.0.5), + Matrix, + data.table, + doParallel, + foreach, + progress, + Seurat, + colorspace, + crayon, + snow, + doSNOW, + colortools, + stringr, + uwot, + viridis, + SingleCellExperiment, + reticulate +LinkingTo: Rcpp +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..6e6b376 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,38 @@ +# Generated by roxygen2: do not edit by hand + +export(HDBSCAN) +export(HDBSCAN.Seurat) +export(NewMeta) +export(cellphonedb_summary) +export(clustUMAP) +export(create_seurat) +export(db_read10x) +export(gg_umap) +export(hcl_umap) +export(hue_umap) +export(organize_10x) +export(pal_umap) +export(pre_processing) +export(rbw_umap) +export(read10x) +export(read10x_atlas) +export(sparse2DT) +export(sparse2DT.Seurat) +export(umap) +export(visUMAP) +import(Matrix) +import(Seurat) +import(SingleCellExperiment) +import(colorspace) +import(crayon) +import(data.table) +import(doParallel) +import(doSNOW) +import(foreach) +import(progress) +import(reticulate) +import(snow) +import(uwot) +import(viridis) +importFrom(colortools,setColors) +importFrom(stringr,str_to_title) diff --git a/R/.DS_Store b/R/.DS_Store new file mode 100644 index 0000000..5033756 Binary files /dev/null and b/R/.DS_Store differ diff --git a/R/NewMeta.R b/R/NewMeta.R new file mode 100644 index 0000000..9e9e7ea --- /dev/null +++ b/R/NewMeta.R @@ -0,0 +1,17 @@ +#' Easy Add Meta data to Seurat Object +#' +#' @param object +#' @param meta +#' @param col.name name of the column for the new meta data +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +NewMeta <- function(object, meta, col.name){ + test <- meta[as.character(Seurat::Idents(object))] + names(test) <- colnames(object) + result <- Seurat::AddMetaData(object, test, col.name) + return(result) +} diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..56aa66c --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,7 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +rcpp_hello_world <- function() { + .Call('_dbsinglecell_rcpp_hello_world', PACKAGE = 'dbsinglecell') +} + diff --git a/R/cellphonedb_utilities.R b/R/cellphonedb_utilities.R new file mode 100644 index 0000000..848aa5c --- /dev/null +++ b/R/cellphonedb_utilities.R @@ -0,0 +1,145 @@ +#' CellPhoneDB Summary File +#' +#' @param path the directory containing the CellPhoneDB Output +#' @param pvalue setting this will return results less than it +#' +#' @return +#' @export +#' +#' @examples +#' @import data.table +#' @import Matrix +#' @import crayon +#' +cellphonedb_summary <- function(path, pvalue = 'all'){ + means <- data.table::fread(file.path(path,'means.txt')) + pvalues <- data.table::fread(file.path(path, 'pvalues.txt')) + id.vars <- colnames(means)[1:11] + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Reading CellPhoneDB files')))) + means <- data.table::melt(means, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'mean') + pvalues <- data.table::melt(pvalues, id.vars = id.vars, variable.name = 'cell_pair', value.name = 'pvalue') + + data.table::setkeyv(means, c('cell_pair',id.vars)) + data.table::setkeyv(pvalues, c('cell_pair',id.vars)) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Merging datasets')))) + result <- data.table::merge.data.table(means, pvalues) + result <- as.data.table(result) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Gene Pairs')))) + int_pairs <- strcapture('(.+)\\_(.+)',result$interacting_pair, + data.table::data.table(gA = character(), + gB = character())) + + cat(crayon::green(paste0('\n',Sys.time(),'|',crayon::white(' Capturing Cell Pairs')))) + cell_pair <- strcapture('(.+)\\|(.+)', + result$cell_pair, + data.table::data.table(cell_a = character(), + cell_b = character())) + + result <- data.table(cell_pair, int_pairs, result) + if(pvalue=='significant'){ + result <- result[pvalue<0.05] + } + cat(crayon::green(paste0('\n',Sys.time(),crayon::yellow('| Finished')))) + + return(result) +} + + +# prep_cellphonedb <- function(rds, meta_column, path){ +# require(data.table) +# require(Seurat) +# +# object <- readRDS(rds) +# +# res <- sparse2DT.Seurat(object) +# +# new.meta <- object@meta.data[,meta_column] +# names(new.meta) <- rownames(object@meta.data) +# +# # add cell_types to res +# res[,cell_subset:=new.meta[res$Cell]] +# data.table::setkey(res, Genes, cell_subset) +# +# # generate summary information to be used for filtering uninformative genes +# test <- res[,.(disp = var(Count)/mean(Count), N = .N), c('Genes',meta_column)] +# test[,total:=sum(N),Genes] +# test <- test[total>500&!grepl('^mt-',Genes)&!is.na(disp)] +# +# # create vector with leftover genes +# gl <- unique(test$Genes) +# +# # subset count dataset +# res <- res[Genes %in% gl] +# +# # create counts file +# counts <- dcast(res, Genes~Cell, value.var = 'Count', fill = 0) +# colnames(counts)[1] <- 'Gene' +# setkey(counts, Gene) +# +# m2h <- fread('/data/user/dbrooke/db/CellPhoneDB/data/mouse2human.csv', key = 'mouse') +# mz_genes <- m2h$Ensembl_gene_id +# names(mz_genes) <- m2h$mouse +# new_genes <- mz_genes[counts$Gene] +# names(new_genes) <- counts$Gene +# new_genes <- new_genes[!is.na(new_genes)] +# +# dim(counts) +# counts <- counts[Gene %in% names(new_genes)] +# counts[,Gene:=new_genes[Gene]] +# +# # create meta file +# meta <- data.table(Cell = colnames(counts)[-1],cell_type = new.meta[colnames(counts)[-1]]) +# meta <- meta[Cell %in% colnames(counts)[-1]] +# fwrite(meta, 'PerNiche_int/meta.csv', quote = FALSE) +# +# fwrite(counts,'PerNiche_int/counts.csv', nThread = 20, showProgress = TRUE) +# } + +#' Convert a sparse matrix to a data.table +#' +#' @param matrix sparse matrix to be used +#' +#' @return +#' @export +#' +#' @import data.table +#' @import Seurat +#' @import Matrix +#' +#' @examples +#' +#' +sparse2DT <- function(matrix){ + + # creating i,j,x format + mm.sum <- Matrix::summary(matrix) + + # creating workable dataset of count data + result <- data.table::data.table(Genes = rownames(matrix)[mm.sum$i], Cell = colnames(matrix)[mm.sum$j], Count = mm.sum$x) + return(result) +} + + + +#' Convert a sparse matrix to a data.table +#' +#' @param object Seurat object +#' +#' @return +#' @export +#' @import data.table +#' @import Seurat +#' @importMatrix +#' +#' @examples +sparse2DT.Seurat <- function(object){ + + # exporting normalized data + mat <- object@assays$RNA@data[, Cells(object)] + + result <- sparse2DT(mat) + return(result) +} diff --git a/R/db_read10x.R b/R/db_read10x.R new file mode 100644 index 0000000..a9f80ff --- /dev/null +++ b/R/db_read10x.R @@ -0,0 +1,186 @@ +#' Read10x v1 +#' +#' @param path +#' @param return.sce return result as SingleCellExperiment object +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + +# reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + +# imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + +# imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V2 + + +# duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table::data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene,barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res), meta = meta) + } else { + return(res) + } + +} + +#' Read10x v2 +#' +#' @param filepaths +#' @param project +#' @param meta meta data to include with the various datasets +#' +#' @return +#' @export +#' @import doParallel +#' @import foreach +#' @import doSNOW +#' @import snow +#' @import progress +#' +#' @examples +read10x_atlas <- function(filepaths, project = 'scRNAseq', meta = NULL){ + int_list <- 1:length(filepaths) + +# checking meta data ------------------------------------------------------ + # if(is.null(meta)){ + # meta = list() + # } else if(nrow(meta)!=length(filepaths)){ + # stop('meta data needs to be the same length as filepaths') + # } else { + # meta <- as.list(meta) + # } + +# setting project vector -------------------------------------------------- + # if(length(project)!=length(filepaths)){ + # if( length(project) == 1){ + # project <- rep(project, times = length(filepaths)) + # } else { + # stop('supply either one project or a vector the same length as filepaths') + # } + # } + +# creating cluster and registering doSNOW --------------------------------- + numCores <- parallel::detectCores() -1 + cl <- snow::makeCluster(numCores) + doSNOW::registerDoSNOW(cl) + on.exit(snow::stopCluster(cl)) + e <- simpleError("error occured") + +# progress bar ------------------------------------------------------------ + iterations <- length(int_list) # used for the foreach loop + + pb <- progress::progress_bar$new( + format = ":percent item = :item [:bar] :elapsed | eta: :eta", + total = iterations, + width = floor(options()$width*0.9), + clear = TRUE + ) + + # allowing progress bar to be used in foreach ----------------------------- + + progress <- function(n) { + pb$tick(tokens = list(item = int_list[n])) # report the int_list item + } + + opts <- list(progress = progress) # used in the the foreach loop + + result <- foreach::foreach( i = 1:iterations, + .options.snow = opts, + .export = 'db_read10x', + .combine = 'cbind', + .packages = c('data.table','SingleCellExperiment','Matrix')) %dopar% { + db_read10x(path = filepaths[i]) + } + + + return(result) +} + + +#' Read10x v3 +#' +#' @param path +#' @param return.sce return result as SingleCellExperiment object +#' +#' @return +#' @export +#' @import data.table +#' @import Matrix +#' @import SingleCellExperiment +#' +#' @examples +db_read10x <- function(path, return.sce = TRUE){ + fl <- dir(path) + + # reads in matrix file ---------------------------------------------------- + mat <- data.table::fread(file.path(path,fl[grepl('^matrix.mtx',fl)]), + skip = 3, + col.names = c('i','j','value'), + colClasses = c('integer','integer','integer'), + header = FALSE) + + # imports barcode --------------------------------------------------------- + barcode <- data.table::fread(file.path(path, fl[grepl('^barcodes.tsv',fl)]), + header = FALSE, + colClasses = 'character')$V1 + + # imports gene ------------------------------------------------------------ + gene<- data.table::fread( + file.path(path,fl[grepl('^[gf][e][na][te].+.tsv',fl)]), + header = FALSE)$V1 + + + # duplicate gene names for row names -------------------------------------- + + if(!all(duplicated(gene)==FALSE)){ + dg <- data.table( + position = which(duplicated(gene)), + name = gene[duplicated(gene)])[,N:=.N,name][] + dg[,new.name:=paste0(name,'.',1:.N), name] + gene[dg$position] <- dg$new.name + } + max_i <- max(mat$i) + res <- Matrix::sparseMatrix( + i = mat$i, + j = mat$j, + x = mat$value, + dimnames = list(gene[1:max_i],barcode)) + + if(return.sce){ + SingleCellExperiment::SingleCellExperiment(list(counts = res)) + } else { + return(gene) + } + +} diff --git a/R/hdbscan.R b/R/hdbscan.R new file mode 100644 index 0000000..51355f8 --- /dev/null +++ b/R/hdbscan.R @@ -0,0 +1,156 @@ +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param x +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param prediction_data not sure what this is for. Will update later. +#' +#' @return +#' @export +#' +#' @import reticulate +#' +#' @examples +HDBSCAN <- function(x, + algorithm='best', + alpha=1.0, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + prediction_data=TRUE, + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores() +){ + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm = algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + + + + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + return(result) +} + + + +#' Hierarchical Density-Based Spatial Clustering of Applications with Noise +#' +#' @param object +#' @param reduction +#' @param dims +#' @param algorithm +#' @param alpha +#' @param approx_min_span_tree +#' @param gen_min_span_tree +#' @param leaf_size +#' @param metric +#' @param min_cluster_size +#' @param min_samples +#' @param cluster_selection_epsilon +#' @param cluster_selection_method +#' @param nThreads +#' @param return_seurat logical to return the result within the orignal object or as the raw HDBSCAN result +#' @param prediction_data not sure what this is for. Will update later. +#' +#' @return +#' @export +#' +#' @examples +HDBSCAN.Seurat <- function(object, + reduction = 'umap', + dims = NULL, + algorithm='best', + alpha=1.0, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree=FALSE, + leaf_size=40, + metric='euclidean', + min_cluster_size =50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = 'leaf', + nThreads = parallel::detectCores(), + return_seurat = TRUE +){ + + if(is.null(dims)){ + x <- Seurat::Embeddings(object, reduction = reduction) + } else { + x <- Seurat::Embeddings(object, reduction = reduction)[,dims] + } + + hdbscan <- reticulate::import('hdbscan', delay_load = TRUE) + + + + clusterer <- hdbscan$HDBSCAN(algorithm=algorithm, + alpha = alpha, + prediction_data = prediction_data, + approx_min_span_tree = approx_min_span_tree, + gen_min_span_tree = gen_min_span_tree, + leaf_size = leaf_size, + core_dist_n_jobs = nThreads, + metric = metric, + min_cluster_size = as.integer(min_cluster_size), + min_samples = as.integer(min_samples), + cluster_selection_epsilon = cluster_selection_epsilon, + cluster_selection_method = cluster_selection_method + ) + clusterer$fit(x) + + result <- list( + labels = factor(clusterer$labels_), + probabilities = clusterer$probabilities_, + cluster_persistance = clusterer$cluster_persistence_, + exemplars = clusterer$exemplars_, + outlier_scores = clusterer$outlier_scores_) + + levels(result$labels)[1] <- NA + if(return_seurat){ + object@misc$hdbscan <- result + object$cl <- factor(clusterer$labels_) + return(object) + } else { + return(result) + } + + +} diff --git a/R/organize_10x.R b/R/organize_10x.R new file mode 100644 index 0000000..9411853 --- /dev/null +++ b/R/organize_10x.R @@ -0,0 +1,53 @@ +#' Reorganize another person's mess into a usable 10X dataset +#' +#' @param x path containing the unorganized disaster +#' +#' @return +#' @export +#' @import data.table +#' +#' @examples +organize_10x <- function(x ){ + path_main <- x + file_list <- dir(path = x, full.names = T) + file_list <- file_list[grepl('tsv.gz$',file_list)|grepl('mtx.gz$',file_list)] + + res <- data.table::data.table(strcapture('(GSM\\d+)_.+([fmbg][ae][tarn]\\w+.\\w{3}.gz)', x = basename(file_list), + proto = data.table::data.table(accession_id = character(), + file_type = character()))) + res$old_path <- file_list + res$old_name<- basename(file_list) + res[res$file_type=='genes.tsv.gz']$file_type<-'features.tsv.gz' + res$new_folder <- file.path(path_main,paste0(res$accession_id)) + res$new_path <- file.path(res$new_folder, res$file_type) + + new_dirs <- unique(res$new_folder) + + length(file_list) + pb <- progress::progress_bar$new( + format = " [:bar] :percent eta: :eta", + clear = FALSE, total = length(file_list), width = 80) + + + for(i in new_dirs){ + + if(!dir.exists(i)){ + dir.create(i) + } + + f2m <- res[new_folder==i] + + for(j in 1:nrow(f2m)){ + pb$tick() + file.copy(f2m[j,old_path ],f2m[j,new_path] ) + } + + } + +for(i in file_list){ + file.remove(i) +} + +} + + diff --git a/R/plot_umap.R b/R/plot_umap.R new file mode 100644 index 0000000..711ea0d --- /dev/null +++ b/R/plot_umap.R @@ -0,0 +1,176 @@ +#' Custom Palette for UMAP +#' +#' @param object +#' @param group_col +#' @param base_col +#' @param jitter randomize the colors +#' +#' @return +#' @export +#' @importFrom colortools setColors +#' @import viridis +#' +#' @examples +#' +pal_umap <- function(object, group_col, base_col = "#1E90FF", jitter = TRUE){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + + + if(jitter){ + new_order <- unlist(sapply(1:5, function(x) seq(x, n,5))) + } + pal <- c(pal, colortools::setColors(base_col,n))[new_order] + return(pal) +} + +#' UMAP Palette using HCL presets +#' +#' @param object +#' @param group_col +#' @param hcl_pal +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +hcl_umap <- function(object,group_col, hcl_pal = 'Dark 3', jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, hcl.colors(n,palette = hcl_pal)[new_order]) + return(pal) +} + +#' UMAP palette using rainbow colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @examples +rbw_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal,rainbow(57,s = 0.7,v = 0.8,alpha = 0.95)[new_order]) + return(pal) +} + +#' UMAP Palette using soft hues +#' +#' @param object +#' @param group_col +#' @param jitter integer setting the color complementary to be used +#' @param comp integer setting the color complementary to be used +#' +#' @return +#' @export +#' +#' @import colorspace +#' +#' @examples +hue_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, colorspace::sequential_hcl(n, h = c(0, 300), c = c(60, 60), l = 65)[new_order]) + return(pal) +} + +gg_color_hue <- function(n) { + hues = seq(15, 375, length = n + 1) + hcl(h = hues, l = 65, c = 100)[1:n] +} + +#' UMAP Palette using ggplot2 colors +#' +#' @param object +#' @param group_col +#' @param jitter +#' @param comp integer setting the color complementarity to be used +#' +#' @return +#' @export +#' +#' @examples +gg_umap <- function(object,group_col, jitter = TRUE,comp = 3){ + col_levels <- object@meta.data[[group_col]] + n <- length(unique(col_levels)) + if("-1" %in% col_levels){ + pal <- c('gray') + } else { + pal <- c() + } + jn <- floor(n/comp) + new_order <- 1:n + if(jitter){ + new_order <- as.integer(unlist(sapply(1:jn, function(x) seq(x, n,jn)))) + } + pal <- c(pal, gg_color_hue(n)[new_order]) + return(pal) +} + +hex_convert <- function(x){ + if(x>=256) stop() + tmp <- c(0:9, LETTERS[1:6]) + + first <- floor(x/16) + + first <- ifelse(first==16, 15, first) + second <- x - first*16 + res <- paste0(tmp[first+1], tmp[second+1]) + return(res) +} + +hex_convert <- Vectorize(hex_convert) + +incA <- function(n, min = 0, base = '#E1E1E1'){ + low <- hex_convert(min/100*255) + c1 <- paste0(base, low) + + res <- c(c1,paste0( + substring(viridis::plasma(n), 1, 7), + hex_convert(seq(min/100*255,255, length.out = n)))) + return(res) +} + + diff --git a/R/reticulate_helpers.R b/R/reticulate_helpers.R new file mode 100644 index 0000000..44533d2 --- /dev/null +++ b/R/reticulate_helpers.R @@ -0,0 +1,15 @@ +# global reference to scipy (will be initialized in .onLoad) +scipy <- NULL +hdbscan <- NULL +umap <- NULL + +.onLoad <- function(libname, pkgname) { + # use superassignment to update global reference to scipy + scipy <<- reticulate::import("scipy", delay_load = TRUE) + hdbscan <<- reticulate::import('hdbscan', delay_load = TRUE) + umap <<- reticulate::import('umap', delay_load = TRUE) +} + +install_python_packages <- function(method = "auto", conda = "auto") { + reticulate::py_install(c("hdscan",'umap'), method = method, conda = conda) +} diff --git a/R/scRNA_helpers.R b/R/scRNA_helpers.R new file mode 100644 index 0000000..0577bd5 --- /dev/null +++ b/R/scRNA_helpers.R @@ -0,0 +1,132 @@ +#' Simple method for creating Seurat Objects +#' +#' @param filepath +#' @param sample sample name to use +#' +#' @return +#' @export +#' @import Seurat +#' +#' @examples +create_seurat <- function(filepath, sample = NULL ){ + if(is.null(sample)){ + sample <- basename(filepath) + } + + # read in 10X data + x <- Seurat::Read10X(data.dir = filepath) + + # create unique cell ids + cell_ids <- paste0(sample, '_', colnames(x)) + colnames(x) <-cell_ids + + # create Seurat Object and include meta data + suppressWarnings({ + res <- Seurat::CreateSeuratObject(x, meta.data = meta, project = sample) + }) + + return(res) +} + +#' Seurat Preprocessing +#' +#' @param object +#' @param species +#' @param nfeatures +#' @param npcs number of principle component dimensions to calculate +#' +#' @return +#' @export +#' @import Seurat +#' @importFrom stringr str_to_title +#' @import crayon +#' +#' @examples +pre_processing <- function(object, species = 'Homo sapiens', nfeatures = 3000, npcs = 50){ + if(species == 'Homo sapiens'){ + mt_pattern <- '^MT-' + } else { + mt_pattern <- '^mt-' + } + + object <- Seurat::PercentageFeatureSet(object, + pattern = mt_pattern, + col.name = "percent.mt") + + message_section('Filtering out low quality cells and doublets') + + # Removing low quality cells and doublets + object <- subset(object,percent.mt < 20 &nFeature_RNA >500 & nFeature_RNA < 4100) + + message_section('Normalizing data') + # Normalization + object<- Seurat::NormalizeData(object, verbose = TRUE) + # Variable Features + + message_section(paste('Finding',nfeatures,'most variable fatures')) + object<- Seurat::FindVariableFeatures(object, selection.method = "vst", nfeatures = nfeatures) + + if(species =='Mus musculus'){ # change gene name format to title capitalization + ccss <- stringr::str_to_title(cc.genes.updated.2019$s.genes) + ccg2m <- stringr::str_to_title(cc.genes.updated.2019$g2m.genes) + } else { # use build in gene names + ccss <- cc.genes.updated.2019$s.genes + ccg2m <- cc.genes.updated.2019$g2m.genes + } + + # scoring function + object<- Seurat::CellCycleScoring(object,s.features = ccss, g2m.features = ccg2m) + + # difference between s and g2m scores + object$CC.Difference <-object$S.Score -object$G2M.Score + + message_section('Scaling data') + # Scaling Data ---- + object<- Seurat::ScaleData(object,vars.to.regress = c('CC.Difference','percent.mt')) + + message_section('Performing PCA') + message_append(paste('using npcs =',npcs)) + # PCA ---- + object<- Seurat::RunPCA( + object, + pc.genes =object@var.genes, + npcs = npcs) + return(object) + +} + +message_section <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + cat("\n",rep('-',n), "\n",sep = '') + cat(crayon::bold(crayon::yellow(paste0('[',Sys.time(),']'))), crayon::bold(crayon::green(text)),'\n') +} + +message_task <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n -22){ + cat(crayon::yellow(paste0('[',Sys.time(),']')),'\n') + } else { + cat(crayon::yellow(paste0('[',Sys.time(),']')), text,'\n') + } + +} + +message_append <- function(text){ + n <- ceiling(options()$width*0.75) + if(n >120){ + n <- 120 + } + + if(nchar(text) > n - 22){ + invisible() + } else { + cat(rep(' ',23),crayon::silver('- '),crayon::silver(text),'\n', sep = '') + } +} diff --git a/R/umap-learn.R b/R/umap-learn.R new file mode 100644 index 0000000..ce41ba9 --- /dev/null +++ b/R/umap-learn.R @@ -0,0 +1,201 @@ +#' UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +#' +#' @param embedding +#' @param a +#' @param angular_rp_forest +#' @param b +#' @param force_approximation_algorithm +#' @param init +#' @param learning_rate +#' @param local_connectivity +#' @param low_memory +#' @param metric +#' @param metric_kwds +#' @param min_dist +#' @param n_components +#' @param n_epochs +#' @param n_neighbors +#' @param negative_sample_rate +#' @param output_metric +#' @param output_metric_kwds +#' @param random_state +#' @param repulsion_strength +#' @param set_op_mix_ratio +#' @param spread +#' @param target_metric +#' @param target_metric_kwds +#' @param target_n_neighbors +#' @param target_weight +#' @param transform_queue_size +#' @param transform_seed +#' @param unique +#' @param verbose +#' @param nThreads number of parallel threads to be used +#' +#' @return +#' @export +#' +#' @import reticulate +#' @import Seurat +#' +#' @examples +umap <- function( + embedding, + a=NULL, + angular_rp_forest=FALSE, + b=NULL, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=200, + n_neighbors=15, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1 +){ + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=as.intger(local_connectivity), + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=as.integer(target_n_neighbors), + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + return(result) +} + +umap.Seurat <- function( + object, + reduction = 'pca', + reduction_name = 'umap', + dims = NULL, + a=1.662, + angular_rp_forest=FALSE, + b=0.7905, + force_approximation_algorithm=FALSE, + init='spectral', + learning_rate=1.0, + local_connectivity=1.0, + low_memory=FALSE, + metric='euclidean', + metric_kwds=NULL, + min_dist=0.1, + n_components=2, + n_epochs=100, + n_neighbors=50, + negative_sample_rate=5, + output_metric='euclidean', + output_metric_kwds=NULL, + random_state=42, + repulsion_strength=1.0, + set_op_mix_ratio=1.0, + spread=1.0, + target_metric='categorical', + target_metric_kwds=NULL, + target_n_neighbors=-1, + target_weight=0.5, + transform_queue_size=4.0, + transform_seed=42, + unique=FALSE, + verbose=TRUE, + nThreads = parallel::detectCores()-1, + return_seurat = TRUE +){ + + if(is.null(dims)){ + embedding <- Seurat::Embeddings(object, reduction = reduction) + } else { + embedding <- Seurat::Embeddings(object, reduction = reduction, dims = dims) + } + + Sys.setenv(OMP_NUM_THREADS=nThreads) + umap <- reticulate::import('umap', delay_load = TRUE) + reducer <- umap$UMAP( + a=a, + angular_rp_forest=angular_rp_forest, + b=b, + force_approximation_algorithm=force_approximation_algorithm, + init=init, + learning_rate=learning_rate, + local_connectivity=local_connectivity, + low_memory=low_memory, + metric=metric, + metric_kwds=metric_kwds, + min_dist=min_dist, + n_components=as.integer(n_components), + n_epochs=as.integer(n_epochs), + n_neighbors=as.integer(n_neighbors), + negative_sample_rate=negative_sample_rate, + output_metric=output_metric, + output_metric_kwds=output_metric_kwds, + random_state=as.integer(random_state), + repulsion_strength=repulsion_strength, + set_op_mix_ratio=set_op_mix_ratio, + spread=spread, + target_metric=target_metric, + target_metric_kwds=target_metric_kwds, + target_n_neighbors=target_n_neighbors, + target_weight=target_weight, + transform_queue_size=transform_queue_size, + transform_seed=as.integer(transform_seed), + unique=unique, + verbose=verbose) + + result <- reducer$fit_transform(embedding) + + if(return_seurat){ + object[[reduction_name]] <- Seurat::CreateDimReducObject(embeddings = result, key = 'umap_', assay = 'RNA') + return(object) + } else { + return(result) + } +} + + + diff --git a/R/umap.R b/R/umap.R new file mode 100644 index 0000000..2f9f0fc --- /dev/null +++ b/R/umap.R @@ -0,0 +1,132 @@ +#' UWOT-UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' @import uwot +#' @import Seurat +#' +#' @examples +visUMAP <- function(object, + reduction = 'harmony', + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = 'cosine', + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + umap_res <- uwot::umap(embds, + spread = 1, + n_components = n_components, + min_dist = min_dist, + metric = metric, + n_threads = n_threads, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate + ) + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, key = 'UMAP_', assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} + + +#' UWOT-UAMP: Clustering Specific UMAP +#' +#' @param object +#' @param reduction +#' @param spread +#' @param n_components +#' @param min_dist +#' @param metric +#' @param n_neighbors +#' @param set_op_mix_ratio +#' @param local_connectivity +#' @param repulsion_strength +#' @param negative_sample_rate +#' @param n_threads +#' @param reduction_name +#' @param return_seurat +#' @param verbose whether to print function messages +#' +#' @return +#' @export +#' +#' @examples +clustUMAP <- function(object, + reduction = 'harmony', + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = 'cosine', + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores()-1, + reduction_name = 'umap', + return_seurat = TRUE, + verbose = TRUE +){ + embds <- Seurat::Embeddings(object, reduction = reduction) + if(is.null(n_components)){ + n_components <- ncol(embds) + } + umap_res <- uwot::umap(embds, + spread = spread, + n_components = n_components, + min_dist = min_dist, + n_threads = n_threads, + metric = metric, + n_neighbors = n_neighbors, + set_op_mix_ratio = set_op_mix_ratio, + local_connectivity = local_connectivity, + repulsion_strength = repulsion_strength, + negative_sample_rate = negative_sample_rate, + verbose = verbose + ) + + rownames(umap_res) <- rownames(embds) + colnames(umap_res) <- paste0('UMAP_', 1:ncol(umap_res)) + + if(return_seurat){ + object[reduction_name] <- Seurat::CreateDimReducObject(embeddings = umap_res, + key = 'clustUMAP_', + assay = 'RNA') + return(object) + } else { + return(umap_res) + } + +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ce2d6b --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ + +# dbsinglecell + + + + +A collection of functions for processing single-cell RNAseq data that I am using constantly. I created this package to make these functions portable for myself. Use at your own risk. + +## Installation + +If you so desire to use this package, install by using + +``` r +remotes::install_github("dbrookeUAB/dbsinglecell") +``` + +To use `HDSCAN` or `umap` functions, you must install their respective python libraries by + +``` r +library(dbsinglecell) +install_python_packages() +``` +Cheers! diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..a9e43eb --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,35 @@ +destination: docs +template: + params: + bootswatch: yeti +navbar: + structure: + left: [home, intro, reference, articles, tutorials, news] + right: [github, packages,main_site, twitter] + components: + twitter: + icon: "fab fa-twitter fa-lg" + href: http://twitter.com/deweybrooke1 + main_site: + icon: "fas fa-user-circle fa-lg" + text: Dewey Brooke + href: https://www.deweybrooke.org/ + github: + icon: "fab fa-github fa-lg" + href: https://github.com/dbrookeUAB/GeCKO + packages: + icon: "fab fa-r-project fa-lg" + text: Other Packages + menu: + - text: TCGAseq + href: https://tcgaseq.deweybrooke.org/ + - text: GTEXseq + href: https://gtexseq.deweybrooke.org/ + - text: GeCKO + href: https://gecko.deweybrooke.org/ + - text: miknn + href: https://miknn.deweybrooke.org/ + - text: dth + href: https://dth.deweybrooke.org/ + - text: dbsinglecell + href: https://dbsinglecell.deweybrooke.org/ diff --git a/dbsinglecell.Rproj b/dbsinglecell.Rproj new file mode 100644 index 0000000..497f8bf --- /dev/null +++ b/dbsinglecell.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/HDBSCAN.Rd b/man/HDBSCAN.Rd new file mode 100644 index 0000000..3b63232 --- /dev/null +++ b/man/HDBSCAN.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hdbscan.R +\name{HDBSCAN} +\alias{HDBSCAN} +\title{Hierarchical Density-Based Spatial Clustering of Applications with Noise} +\usage{ +HDBSCAN( + x, + algorithm = "best", + alpha = 1, + approx_min_span_tree = TRUE, + gen_min_span_tree = FALSE, + leaf_size = 40, + metric = "euclidean", + prediction_data = TRUE, + min_cluster_size = 50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = "leaf", + nThreads = parallel::detectCores() +) +} +\arguments{ +\item{prediction_data}{} +} +\value{ + +} +\description{ +Hierarchical Density-Based Spatial Clustering of Applications with Noise +} diff --git a/man/HDBSCAN.Seurat.Rd b/man/HDBSCAN.Seurat.Rd new file mode 100644 index 0000000..8d196a7 --- /dev/null +++ b/man/HDBSCAN.Seurat.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/hdbscan.R +\name{HDBSCAN.Seurat} +\alias{HDBSCAN.Seurat} +\title{Hierarchical Density-Based Spatial Clustering of Applications with Noise} +\usage{ +HDBSCAN.Seurat( + object, + reduction = "umap", + dims = NULL, + algorithm = "best", + alpha = 1, + prediction_data = TRUE, + approx_min_span_tree = TRUE, + gen_min_span_tree = FALSE, + leaf_size = 40, + metric = "euclidean", + min_cluster_size = 50, + min_samples = 1, + cluster_selection_epsilon = 0.5, + cluster_selection_method = "leaf", + nThreads = parallel::detectCores(), + return_seurat = TRUE +) +} +\arguments{ +\item{prediction_data}{} + +\item{return_seurat}{logical to return the result within the orignal object or as the raw HDBSCAN result} +} +\value{ + +} +\description{ +Hierarchical Density-Based Spatial Clustering of Applications with Noise +} diff --git a/man/NewMeta.Rd b/man/NewMeta.Rd new file mode 100644 index 0000000..1f96652 --- /dev/null +++ b/man/NewMeta.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/NewMeta.R +\name{NewMeta} +\alias{NewMeta} +\title{Easy Add Meta data to Seurat Object} +\usage{ +NewMeta(object, meta, col.name) +} +\arguments{ +\item{col.name}{name of the column for the new meta data} +} +\value{ + +} +\description{ +Easy Add Meta data to Seurat Object +} diff --git a/man/cellphonedb_summary.Rd b/man/cellphonedb_summary.Rd new file mode 100644 index 0000000..1034025 --- /dev/null +++ b/man/cellphonedb_summary.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cellphonedb_utilities.R +\name{cellphonedb_summary} +\alias{cellphonedb_summary} +\title{CellPhoneDB Summary File} +\usage{ +cellphonedb_summary(path, pvalue = "all") +} +\arguments{ +\item{path}{the directory containing the CellPhoneDB Output} + +\item{pvalue}{setting this will return results less than it} +} +\value{ + +} +\description{ +CellPhoneDB Summary File +} diff --git a/man/clustUMAP.Rd b/man/clustUMAP.Rd new file mode 100644 index 0000000..f430dff --- /dev/null +++ b/man/clustUMAP.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/umap.R +\name{clustUMAP} +\alias{clustUMAP} +\title{UWOT-UAMP: Clustering Specific UMAP} +\usage{ +clustUMAP( + object, + reduction = "harmony", + spread = 1.1, + n_components = NULL, + min_dist = 0, + metric = "cosine", + n_neighbors = 50, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores() - 1, + reduction_name = "umap", + return_seurat = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{verbose}{whether to print function messages} +} +\value{ + +} +\description{ +UWOT-UAMP: Clustering Specific UMAP +} diff --git a/man/create_seurat.Rd b/man/create_seurat.Rd new file mode 100644 index 0000000..98c2ad6 --- /dev/null +++ b/man/create_seurat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scRNA_helpers.R +\name{create_seurat} +\alias{create_seurat} +\title{Simple method for creating Seurat Objects} +\usage{ +create_seurat(filepath, sample = NULL) +} +\arguments{ +\item{sample}{sample name to use} +} +\value{ + +} +\description{ +Simple method for creating Seurat Objects +} diff --git a/man/db_read10x.Rd b/man/db_read10x.Rd new file mode 100644 index 0000000..2545faf --- /dev/null +++ b/man/db_read10x.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/db_read10x.R +\name{db_read10x} +\alias{db_read10x} +\title{Read10x v3} +\usage{ +db_read10x(path, return.sce = TRUE) +} +\arguments{ +\item{return.sce}{} +} +\value{ + +} +\description{ +Read10x v3 +} diff --git a/man/db_singlecell-package.Rd b/man/db_singlecell-package.Rd new file mode 100644 index 0000000..43627da --- /dev/null +++ b/man/db_singlecell-package.Rd @@ -0,0 +1,34 @@ +\name{db_singlecell-package} +\alias{db_singlecell-package} +\alias{db_singlecell} +\docType{package} +\title{ + A short title line describing what the package does +} +\description{ + A more detailed description of what the package does. A length + of about one to five lines is recommended. +} +\details{ + This section should provide a more detailed overview of how to use the + package, including the most important functions. +} +\author{ +Your Name, email optional. + +Maintainer: Your Name +} +\references{ + This optional section can contain literature or other references for + background information. +} +\keyword{ package } +\seealso{ + Optional links to other man pages +} +\examples{ + \dontrun{ + ## Optional simple examples of the most important functions + ## These can be in \dontrun{} and \donttest{} blocks. + } +} diff --git a/man/gg_umap.Rd b/man/gg_umap.Rd new file mode 100644 index 0000000..79a9dcb --- /dev/null +++ b/man/gg_umap.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_umap.R +\name{gg_umap} +\alias{gg_umap} +\title{UMAP Palette using ggplot2 colors} +\usage{ +gg_umap(object, group_col, jitter = TRUE, comp = 3) +} +\arguments{ +\item{comp}{integer setting the color complementarity to be used} +} +\value{ + +} +\description{ +UMAP Palette using ggplot2 colors +} diff --git a/man/hcl_umap.Rd b/man/hcl_umap.Rd new file mode 100644 index 0000000..6dfc355 --- /dev/null +++ b/man/hcl_umap.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_umap.R +\name{hcl_umap} +\alias{hcl_umap} +\title{UMAP Palette using HCL presets} +\usage{ +hcl_umap(object, group_col, hcl_pal = "Dark 3", jitter = TRUE, comp = 3) +} +\arguments{ +\item{comp}{integer setting the color complementary to be used} +} +\value{ + +} +\description{ +UMAP Palette using HCL presets +} diff --git a/man/hue_umap.Rd b/man/hue_umap.Rd new file mode 100644 index 0000000..aa8c858 --- /dev/null +++ b/man/hue_umap.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_umap.R +\name{hue_umap} +\alias{hue_umap} +\title{UMAP Palette using soft hues} +\usage{ +hue_umap(object, group_col, jitter = TRUE, comp = 3) +} +\arguments{ +\item{jitter}{integer setting the color complementary to be used} + +\item{comp}{integer setting the color complementary to be used} +} +\value{ + +} +\description{ +UMAP Palette using soft hues +} diff --git a/man/organize_10x.Rd b/man/organize_10x.Rd new file mode 100644 index 0000000..c70a028 --- /dev/null +++ b/man/organize_10x.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/organize_10x.R +\name{organize_10x} +\alias{organize_10x} +\title{Reorganize another person's mess into a usable 10X dataset} +\usage{ +organize_10x(x) +} +\arguments{ +\item{x}{path containing the unorganized disaster} +} +\value{ + +} +\description{ +Reorganize another person's mess into a usable 10X dataset +} diff --git a/man/pal_umap.Rd b/man/pal_umap.Rd new file mode 100644 index 0000000..14a88b8 --- /dev/null +++ b/man/pal_umap.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_umap.R +\name{pal_umap} +\alias{pal_umap} +\title{Custom Palette for UMAP} +\usage{ +pal_umap(object, group_col, base_col = "#1E90FF", jitter = TRUE) +} +\arguments{ +\item{jitter}{randomize the colors} +} +\value{ + +} +\description{ +Custom Palette for UMAP +} +\examples{ + +} diff --git a/man/pre_processing.Rd b/man/pre_processing.Rd new file mode 100644 index 0000000..c48d352 --- /dev/null +++ b/man/pre_processing.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scRNA_helpers.R +\name{pre_processing} +\alias{pre_processing} +\title{Seurat Preprocessing} +\usage{ +pre_processing(object, species = "Homo sapiens", nfeatures = 3000, npcs = 50) +} +\arguments{ +\item{npcs}{number of principle component dimensions to calculate} +} +\value{ + +} +\description{ +Seurat Preprocessing +} diff --git a/man/rbw_umap.Rd b/man/rbw_umap.Rd new file mode 100644 index 0000000..212ff86 --- /dev/null +++ b/man/rbw_umap.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_umap.R +\name{rbw_umap} +\alias{rbw_umap} +\title{UMAP palette using rainbow colors} +\usage{ +rbw_umap(object, group_col, jitter = TRUE, comp = 3) +} +\arguments{ +\item{comp}{integer setting the color complementary to be used} +} +\value{ + +} +\description{ +UMAP palette using rainbow colors +} diff --git a/man/rcpp_hello_world.Rd b/man/rcpp_hello_world.Rd new file mode 100644 index 0000000..e4f90bf --- /dev/null +++ b/man/rcpp_hello_world.Rd @@ -0,0 +1,17 @@ +\name{rcpp_hello_world} +\alias{rcpp_hello_world} +\docType{package} +\title{ +Simple function using Rcpp +} +\description{ +Simple function using Rcpp +} +\usage{ +rcpp_hello_world() +} +\examples{ +\dontrun{ +rcpp_hello_world() +} +} diff --git a/man/read10x.Rd b/man/read10x.Rd new file mode 100644 index 0000000..f7c9d73 --- /dev/null +++ b/man/read10x.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/db_read10x.R +\name{read10x} +\alias{read10x} +\title{Read10x v1} +\usage{ +read10x(path, return.sce = TRUE) +} +\arguments{ +\item{return.sce}{return result as SingleCellExperiment object} +} +\value{ + +} +\description{ +Read10x v1 +} diff --git a/man/read10x_atlas.Rd b/man/read10x_atlas.Rd new file mode 100644 index 0000000..59de9dd --- /dev/null +++ b/man/read10x_atlas.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/db_read10x.R +\name{read10x_atlas} +\alias{read10x_atlas} +\title{Read10x v2} +\usage{ +read10x_atlas(filepaths, project = "scRNAseq", meta = NULL) +} +\arguments{ +\item{meta}{} +} +\value{ + +} +\description{ +Read10x v2 +} diff --git a/man/sparse2DT.Rd b/man/sparse2DT.Rd new file mode 100644 index 0000000..490a880 --- /dev/null +++ b/man/sparse2DT.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cellphonedb_utilities.R +\name{sparse2DT} +\alias{sparse2DT} +\title{Convert a sparse matrix to a data.table} +\usage{ +sparse2DT(matrix) +} +\arguments{ +\item{matrix}{sparse matrix to be used} +} +\value{ + +} +\description{ +Convert a sparse matrix to a data.table +} +\examples{ + + +} diff --git a/man/sparse2DT.Seurat.Rd b/man/sparse2DT.Seurat.Rd new file mode 100644 index 0000000..ebcdd3c --- /dev/null +++ b/man/sparse2DT.Seurat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cellphonedb_utilities.R +\name{sparse2DT.Seurat} +\alias{sparse2DT.Seurat} +\title{Convert a sparse matrix to a data.table} +\usage{ +sparse2DT.Seurat(object) +} +\arguments{ +\item{object}{Seurat object} +} +\value{ + +} +\description{ +Convert a sparse matrix to a data.table +} diff --git a/man/umap.Rd b/man/umap.Rd new file mode 100644 index 0000000..38a2fca --- /dev/null +++ b/man/umap.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/umap-learn.R +\name{umap} +\alias{umap} +\title{UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction)} +\usage{ +umap( + embedding, + a = NULL, + angular_rp_forest = FALSE, + b = NULL, + force_approximation_algorithm = FALSE, + init = "spectral", + learning_rate = 1, + local_connectivity = 1, + low_memory = FALSE, + metric = "euclidean", + metric_kwds = NULL, + min_dist = 0.1, + n_components = 2, + n_epochs = 200, + n_neighbors = 15, + negative_sample_rate = 5, + output_metric = "euclidean", + output_metric_kwds = NULL, + random_state = 42, + repulsion_strength = 1, + set_op_mix_ratio = 1, + spread = 1, + target_metric = "categorical", + target_metric_kwds = NULL, + target_n_neighbors = -1, + target_weight = 0.5, + transform_queue_size = 4, + transform_seed = 42, + unique = FALSE, + verbose = TRUE, + nThreads = parallel::detectCores() - 1 +) +} +\arguments{ +\item{nThreads}{number of parallel threads to be used} +} +\value{ + +} +\description{ +UMAP (Uniform Manifold Approximation and Projection for Dimension Reduction) +} diff --git a/man/visUMAP.Rd b/man/visUMAP.Rd new file mode 100644 index 0000000..640f23d --- /dev/null +++ b/man/visUMAP.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/umap.R +\name{visUMAP} +\alias{visUMAP} +\title{UWOT-UMAP} +\usage{ +visUMAP( + object, + reduction = "harmony", + spread = 1, + n_components = 2, + min_dist = 0.3, + metric = "cosine", + n_neighbors = 30, + set_op_mix_ratio = 1, + local_connectivity = 1, + repulsion_strength = 1, + negative_sample_rate = 5, + n_threads = parallel::detectCores() - 1, + reduction_name = "umap", + return_seurat = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{verbose}{whether to print function messages} +} +\value{ + +} +\description{ +UWOT-UMAP +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..13026b5 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,27 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// rcpp_hello_world +List rcpp_hello_world(); +RcppExport SEXP _dbsinglecell_rcpp_hello_world() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(rcpp_hello_world()); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_dbsinglecell_rcpp_hello_world", (DL_FUNC) &_dbsinglecell_rcpp_hello_world, 0}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_dbsinglecell(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/RcppExports.o b/src/RcppExports.o new file mode 100644 index 0000000..b76a3ca Binary files /dev/null and b/src/RcppExports.o differ diff --git a/src/dbsinglecell.so b/src/dbsinglecell.so new file mode 100755 index 0000000..6863fa5 Binary files /dev/null and b/src/dbsinglecell.so differ diff --git a/src/rcpp_hello_world.cpp b/src/rcpp_hello_world.cpp new file mode 100644 index 0000000..98a959c --- /dev/null +++ b/src/rcpp_hello_world.cpp @@ -0,0 +1,13 @@ + +#include +using namespace Rcpp; + +// [[Rcpp::export]] +List rcpp_hello_world() { + + CharacterVector x = CharacterVector::create( "foo", "bar" ) ; + NumericVector y = NumericVector::create( 0.0, 1.0 ) ; + List z = List::create( x, y ) ; + + return z ; +} diff --git a/src/rcpp_hello_world.o b/src/rcpp_hello_world.o new file mode 100644 index 0000000..c276559 Binary files /dev/null and b/src/rcpp_hello_world.o differ