diff --git a/404.html b/404.html index 2474cc9..d077803 100644 --- a/404.html +++ b/404.html @@ -32,7 +32,7 @@
diff --git a/articles/MultiEWCE.html b/articles/MultiEWCE.html index 33580d5..82145aa 100644 --- a/articles/MultiEWCE.html +++ b/articles/MultiEWCE.html @@ -33,7 +33,7 @@ @@ -320,10 +320,13 @@## Validating gene lists..
## 1 / 3 gene lists are valid.
+## Background already exists at: /github/home/.cache/R/MultiEWCE/bg-human-human-gprofiler.rds
+## Use `overwrite=TRUE` to overwrite.
+## + Version: 2023-11-07
## Computing gene counts.
-## Done in: 3.3 seconds.
+## Done in: 5.9 seconds.
##
-## Saving results ==> /tmp/RtmpnaSTmh/gen_results.rds
+## Saving results ==> /tmp/RtmpVHxLis/gen_results.rds
+-library(ggplot2) library(data.table) #### Aggregate results #### @@ -343,7 +346,7 @@
Visualise the results geom_col() + labs(x="Phenotype",y="Enrichments (n)") + theme_bw()
@@ -360,7 +363,7 @@+methods::show(plot1)
merge_results
+all_results_2 <- MultiEWCE::merge_results(save_dir = save_dir_tmp)
## 1 results files found.
+phenotypes <- c("Scoliosis") gene_set <- HPOExplorer::get_gene_lists(phenotypes = phenotypes, phenotype_to_genes = gene_data)
## Translating all phenotypes to HPO IDs.
## ℹ All local files already up-to-date!
-## + Returning a vector of phenotypes (same order as input).
+@@ -387,7 +390,7 @@cat(paste(length(unique(gene_set$gene_symbol)), "genes associated with",shQuote(phenotypes),":", paste(unique(gene_set$gene_symbol)[seq(5)],collapse = ", ")))
get_unfinished_list_names
This function is used to find which gene lists you have not yet analysed
-@@ -72,13 +72,13 @@+diff --git a/reference/ewce_para.html b/reference/ewce_para.html index fd433e1..899a0ca 100644 --- a/reference/ewce_para.html +++ b/reference/ewce_para.html @@ -17,7 +17,7 @@all_phenotypes <- unique(gene_data$hpo_name) unfinished <- MultiEWCE::get_unfinished_list_names(list_names = all_phenotypes, save_dir_tmp = save_dir_tmp) @@ -403,30 +406,31 @@
Run disease-level enrichment testsSo far, we’ve iterated over gene list grouped by phenotypes. But we can also do this at the level of diseases (which are composed of combinations of phenotypes). -
diff --git a/reference/create_dt.html b/reference/create_dt.html index c90cc77..1818df3 100644 --- a/reference/create_dt.html +++ b/reference/create_dt.html @@ -17,7 +17,7 @@+gene_data <- HPOExplorer::load_phenotype_to_genes("genes_to_phenotype.txt")
## Reading cached RDS file: genes_to_phenotype.txt
-## + Version: v2023-10-09
++-#### Filter only to those with >=4 genes #### gene_counts <- gene_data[,list(genes=length(unique(gene_symbol))), by="disease_id"][genes>=4] list_names <- unique(gene_counts$disease_id)[seq(5)]
+-all_results <- MultiEWCE::gen_results(ctd = ctd, gene_data = gene_data, list_name_column = "disease_id", list_names = list_names, annotLevel = 1, reps = 10)
+## Results already exist at: /tmp/RtmpnaSTmh/gen_results.rds Use `force_new=TRUE` to overwrite.
## Results already exist at: /tmp/RtmpVHxLis/gen_results.rds +## Use `force_new=TRUE` to overwrite.
@@ -90,7 +90,7 @@Full analysis
Run the following code the replicate the main analysis in the study described here.
-diff --git a/index.html b/index.html index e429a5d..bd03861 100644 --- a/index.html +++ b/index.html @@ -40,7 +40,7 @@+diff --git a/authors.html b/authors.html index a166cda..58bbeeb 100644 --- a/authors.html +++ b/authors.html @@ -17,7 +17,7 @@gene_data <- HPOExplorer::load_phenotype_to_genes() gene_data[,n_gene:=length(unique(gene_symbol)),by="hpo_id"] gene_data <- gene_data[n_gene>=4,] @@ -441,7 +445,7 @@
Full analysis
Session info
-@@ -188,7 +188,7 @@+utils::sessionInfo()
## R Under development (unstable) (2023-11-02 r85465) ## Platform: x86_64-pc-linux-gnu @@ -466,7 +470,7 @@
Session info## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] data.table_1.14.8 ggplot2_3.4.4 MultiEWCE_0.1.8 BiocStyle_2.31.0 +## [1] data.table_1.14.8 ggplot2_3.4.4 MultiEWCE_0.1.9 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): ## [1] later_1.3.1 bitops_1.0-7 diff --git a/articles/MultiEWCE_files/figure-html/display-1.png b/articles/MultiEWCE_files/figure-html/display-1.png index 828b0ae..e9c02ae 100644 Binary files a/articles/MultiEWCE_files/figure-html/display-1.png and b/articles/MultiEWCE_files/figure-html/display-1.png differ diff --git a/articles/docker.html b/articles/docker.html index 7341726..d7ea012 100644 --- a/articles/docker.html +++ b/articles/docker.html @@ -33,7 +33,7 @@
Session Info## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] MultiEWCE_0.1.8 BiocStyle_2.31.0 +## [1] MultiEWCE_0.1.9 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): ## [1] later_1.3.1 bitops_1.0-7 diff --git a/articles/index.html b/articles/index.html index 939c555..f3d2efa 100644 --- a/articles/index.html +++ b/articles/index.html @@ -17,7 +17,7 @@
-@@ -60,6 +60,24 @@
+
Authors: Robert Gordon-Smith, Brian Schilder, Nathan Skene
diff --git a/news/index.html b/news/index.html index fc4d07a..fdbd8f9 100644 --- a/news/index.html +++ b/news/index.html @@ -17,7 +17,7 @@Changelog
Source:NEWS.md
diff --git a/reference/correlation_heatmap.html b/reference/correlation_heatmap.html index bf4f7cc..32cd983 100644 --- a/reference/correlation_heatmap.html +++ b/reference/correlation_heatmap.html @@ -17,7 +17,7 @@MultiEWCE 0.1.8
diff --git a/pkgdown.yml b/pkgdown.yml index 11e489c..69c450f 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -4,5 +4,5 @@ pkgdown_sha: ~ articles: MultiEWCE: MultiEWCE.html docker: docker.html -last_built: 2023-11-07T16:30Z +last_built: 2023-11-07T20:25Z diff --git a/reference/add_ctd.html b/reference/add_ctd.html index 9842199..3da1ca6 100644 --- a/reference/add_ctd.html +++ b/reference/add_ctd.html @@ -24,7 +24,7 @@diff --git a/reference/agg_results.html b/reference/agg_results.html index 6409696..3260359 100644 --- a/reference/agg_results.html +++ b/reference/agg_results.html @@ -18,7 +18,7 @@EWCE parallel
list_name_column = "hpo_id", gene_column = "gene_symbol", list_names = unique(gene_data[[list_name_column]]), - bg = unique(gene_data[[gene_column]]), reps = 100, annotLevel = 1, + force_new = FALSE, genelistSpecies = "human", sctSpecies = "human", + bg = get_bg(species1 = genelistSpecies, species2 = sctSpecies, overwrite = force_new), save_dir_tmp = tempdir(), - force_new = FALSE, parallel_boot = FALSE, cores = 1, verbose = FALSE @@ -110,12 +110,6 @@Arguments
- character vector of gene list names.
bg -- - List of gene symbols containing the background gene list -(including hit genes). If
bg=NULL
, - an appropriate gene background will be created automatically.reps @@ -126,6 +120,11 @@ Number of random gene lists to generate (Default: 100, but should be >=10,000 for publication-quality results).
Arguments
analyse (Default: 1). +force_new ++ + Overwrite previous results +in the
save_dir_tmp
.genelistSpecies + Species that
hits
genes came from (no longer limited to just "mouse" and "human"). @@ -138,16 +137,17 @@Arguments
See list_species for all available species.bg ++ + List of gene symbols containing the background gene list +(including hit genes). If
bg=NULL
, + an appropriate gene background will be created automatically.save_dir_tmp - Folder to save intermediate results files to (one file per gene list). Set to
NULL
to skip saving temporary files.force_new -- - Overwrite previous results -in the
save_dir_tmp
.parallel_boot @@ -181,6 +181,9 @@ Parallelise at the level of bootstrap iterations, rather than across gene lists.
Examples
gene_data = gene_data, list_names = list_names, reps = 10) +#> Background already exists at: /github/home/.cache/R/MultiEWCE/bg-human-human-gprofiler.rds +#> Use `overwrite=TRUE` to overwrite. +#> + Version: 2023-11-07 #> Computing gene counts.
character vector of gene list names.
List of gene symbols containing the background gene list
-(including hit genes). If bg=NULL
,
- an appropriate gene background will be created automatically.
Number of random gene lists to generate (Default: 100, but should be >=10,000 for publication-quality results).
Overwrite previous results
+in the save_dir_tmp
.
List of gene symbols containing the background gene list
+(including hit genes). If bg=NULL
,
+ an appropriate gene background will be created automatically.
The number of cores to run in parallel (e.g. 8) int
.
Folder to save merged results in.
Overwrite previous results
-in the save_dir_tmp
.
Print messages.
Generate background genes given one or more species. +Caches the list to avoid excessive API calls to +g:Profiler.
+get_bg(
+ species1 = "human",
+ species2 = "human",
+ method = "gprofiler",
+ save_dir = tools::R_user_dir(package = "MultiEWCE", which = "cache"),
+ overwrite = FALSE,
+ verbose = TRUE,
+ ...
+)
First species.
Second species.
R package to use for gene mapping:
"gprofiler"
: Slower but more species and genes.
"homologene"
: Faster but fewer species and genes.
"babelgene"
: Faster but fewer species and genes.
+Also gives consensus scores for each gene mapping based on a
+ several different data sources.
Directory to save data to.
Should any local files of the same name be overwritten?
+default TRUE
.
Print messages.
Arguments passed on to orthogene::create_background
output_species
Species to convert all genes from
+species1
and species2
to first.
+ Default="human"
, but can be to either any species
+ supported by orthogene, including
+ species1
or species2
.
as_output_species
Return background gene list as
+output_species
orthologs, instead of the
+ gene names of the original input species.
use_intersect
When species1
and species2
are both
+different from output_species
, this argument will determine whether
+to use the intersect (TRUE
) or union (FALSE
) of all genes
+from species1
and species2
.
bg
User supplied background list that will be returned to the +user after removing duplicate genes.
gene_map
User-supplied gene_map
data table from
+map_orthologs or map_genes.
non121_strategy
How to handle genes that don't have
+1:1 mappings between input_species
:output_species
.
+Options include:
"drop_both_species" or "dbs" or 1
:
+Drop genes that have duplicate
+mappings in either the input_species
or output_species
+(DEFAULT).
"drop_input_species" or "dis" or 2
:
+Only drop genes that have duplicate
+mappings in the input_species
.
"drop_output_species" or "dos" or 3
:
+Only drop genes that have duplicate
+mappings in the output_species
.
"keep_both_species" or "kbs" or 4
:
+Keep all genes regardless of whether
+they have duplicate mappings in either species.
"keep_popular" or "kp" or 5
:
+Return only the most "popular" interspecies ortholog mappings.
+ This procedure tends to yield a greater number of returned genes
+ but at the cost of many of them not being true biological 1:1 orthologs.
"sum","mean","median","min" or "max"
:
+ When gene_df
is a matrix and gene_output="rownames"
,
+ these options will aggregate many-to-one gene mappings
+ (input_species
-to-output_species
)
+ after dropping any duplicate genes in the output_species
.
A vector of background genes.
+bg <- get_bg()
+#> Background already exists at: /github/home/.cache/R/MultiEWCE/bg-human-human-gprofiler.rds
+#> Use `overwrite=TRUE` to overwrite.
+#> + Version: 2023-11-07
+
get_data(
fname,
repo = "neurogenomics/MultiEWCE",
- storage_dir = tools::R_user_dir(package = "MultiEWCE", which = "cache"),
+ save_dir = tools::R_user_dir(package = "MultiEWCE", which = "cache"),
overwrite = TRUE,
tag = "latest",
check = FALSE
@@ -82,6 +82,10 @@ Arguments
Repository name in format "owner/repo". Defaults to guess_repo()
.
+save_dir
+Directory to save data to.
+
+
overwrite
Should any local files of the same name be overwritten?
default TRUE
.
diff --git a/reference/get_unfinished_list_names.html b/reference/get_unfinished_list_names.html
index 97c76e5..5046537 100644
--- a/reference/get_unfinished_list_names.html
+++ b/reference/get_unfinished_list_names.html
@@ -21,7 +21,7 @@
gen_results()
Generate results
Get background genes
report_plot()
Report plot
Standardise genes
Standardise gene symbols to HGNC.
+standardise_genes(
+ dat,
+ gene_col = "gene_symbol",
+ fill_na = TRUE,
+ verbose = TRUE,
+ ...
+)
data.table
character column name.
logical. Fill NAs with original gene symbol.
logical. Print messages.
Arguments passed on to orthogene::map_genes
genes
Gene list.
species
Species to map against.
target
target namespace.
mthreshold
maximum number of results per initial alias to show. Shows all by default.
drop_na
Drop all genes without mappings.
+Sets gprofiler2::gconvert(filter_na=)
as well
+an additional round of more comprehensive NA
filtering
+by orthogene.
numeric_ns
namespace to use for fully numeric IDs (list of available namespaces).
run_map_species
Standardise species
names with
+map_species first (Default: TRUE
).
data.table
+if (FALSE) {
+dat <- data.table(gene_symbol = c("BRCA1","BRCA2","BRCA3"))
+dat2 <- standardise_genes(dat)
+}
+