Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Figure Updates #41

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/citations-graphs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ library(patchwork)
## Input data

```{r}
years <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_technical_paper_2024/refs/heads/main/results/citations/years.csv")
methods <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_technical_paper_2024/refs/heads/main/results/citations/methods.csv")
target_technical <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_technical_paper_2024/refs/heads/main/results/citations/technical_target.csv")
target_organisms <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_technical_paper_2024/refs/heads/main/results/citations/targeted_organisms.csv")
years <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/results/citations/years.csv")
methods <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/results/citations/methods.csv")
target_technical <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/results/citations/technical_target.csv")
target_organisms <- fread("https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/results/citations/targeted_organisms.csv")

years$year <- seq(2005, 2025)
years <- years |> melt(id.vars = "year", value.factor = FALSE, variable.factor = FALSE)
Expand Down
Binary file added bin/microGalaxy-tools.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion bin/survey-figure.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ library(stringr)
## Input data

```{r}
df <- "https://raw.githubusercontent.com/paulzierep/microgalaxy-survey/refs/heads/main/data/modified_answers.csv" |> fread()
df <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_technical_paper_2024/refs/heads/main/docs/supplementary/supplementary_table_2.tsv" |> fread()
```

### Correct column names
Expand Down
20 changes: 9 additions & 11 deletions bin/tools-graphs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ library(ggrepel)
library(ggtext)
library(ggh4x)

# install.packages("extrafont")
# install.packages(c("extrafont", "paletteer", "colorspace"))
library(extrafont)

library(paletteer)
library(colorspace)
```
Expand All @@ -38,7 +37,7 @@ library(colorspace)
## THERE IS A BUG UNFORTUNATELY WITH fread FUNCTION
## RUN ON CONSOLE IN THIS CASE

tools_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/microgalaxy/resources/curated_tools.tsv"
tools_url <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/docs/supplementary/supplementary_table_3.tsv"

tools_dt <- tools_url |> fread()
```
Expand Down Expand Up @@ -82,14 +81,13 @@ d <- availability |> melt(id.vars = c("Suite ID", "EDAM reduced topics"), variab
d <- d[which(value >= 1)]

d$`Suite ID` <- d$`Suite ID` |> factor(levels = mm_c$labels[mm_c$order |> rev()])
d$variable <- d$variable |> factor(levels = mm_r$labels[mm_r$order |> rev()])
d$variable <- d$variable |> factor(levels = mm_r$labels[mm_r$order |> rev()])

d$fct <- ifelse(d$variable |> str_detect("UseGalaxy"), "UseGalaxy", "vOther")

index <- d[which(fct == "UseGalaxy")][[1]] |> unique()

p <- d[which(`Suite ID` %in% index)]

p <- d[which(`Suite ID` %in% index)]
p2 <- p |> tidyr::separate_rows("EDAM reduced topics", sep = ",") |> setDT()

p2$`EDAM reduced topics` <- p2$`EDAM reduced topics` |> str_squish()
Expand All @@ -103,7 +101,7 @@ p2$edam_clean <- ifelse(p2$`EDAM reduced topics` %in% t$`EDAM reduced topics`, p
p2$edam_clean <- p2$edam_clean |> factor(levels = c(t$`EDAM reduced topics`, "Other"))
```

### Top 5 EDAM Operations
### Heatmap: Top 5 EDAM Operations

```{r}
c_1 <- p2[which(edam_clean != "Other")] |>
Expand Down Expand Up @@ -150,7 +148,7 @@ c_1 <- p2[which(edam_clean != "Other")] |>
labs(y = "Galaxy Tool Suites", x = "Availability of Tool Suites Across **Servers**", fill = "No. of Tools")
```

### All Tools suties
### Heatmap: All Tools suties

```{r}

Expand Down Expand Up @@ -283,7 +281,7 @@ df2 = df2[, c("Suite ID", "runs", "users", "EDAM reduced operations")] |> unique
### Define factor levels of cluster column

```{r}
st <- st[1:9]
st <- st[1:12]

df2$cluster <- ifelse(df2$`EDAM reduced operations` %in% st$`EDAM reduced operations`, df2$`EDAM reduced operations` |> as.character(), "Other")
df2$cluster <- df2$cluster |> factor(levels = c(st$`EDAM reduced operations`, "Other"))
Expand Down Expand Up @@ -339,7 +337,7 @@ a <- df2 |>
breaks = c(.1, 1, 10, 100, 1000, 10000)
) +

scale_fill_manual(values = c(paletteer_d("ggsci::default_igv", ncolors - 1), "grey") |> lighten(.25), guide = guide_legend(nrow = 3, override.aes = list(size = 2.5))) +
scale_fill_manual(values = c(paletteer_d("ggsci::default_igv", ncolors - 1), "grey") |> lighten(.25), guide = guide_legend(nrow = 4, override.aes = list(size = 2.5))) +
scale_color_manual(values = c(paletteer_d("ggsci::default_igv", ncolors - 1), "grey") |> darken(.25), guide = "none") +

coord_cartesian() +
Expand Down Expand Up @@ -626,7 +624,7 @@ d1_p <- d1 |>

# geom_point(data = d2, aes(year, N)) +
scale_x_date(expand = c(0.01, 0.01)) +
scale_y_continuous(expand = c(0, 0), breaks = seq(50, 300, by = 50), limits = c(0, 235))+
scale_y_continuous(expand = c(0, 0), breaks = seq(50, 300, by = 50), limits = c(0, 300))+
theme_minimal(base_family = "Calibri") +
theme(
# panel.grid.major = element_line(linewidth = .45, color = "grey85"),
Expand Down
62 changes: 37 additions & 25 deletions bin/tutorials-graphs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ library(packcircles)
## THERE IS A BUG UNFORTUNATELY WITH fread FUNCTION
## RUN ON CONSOLE IN THIS CASE

microGalaxy_tutorials_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/microgalaxy/resources/tutorials.tsv"
microGalaxy_tools_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/microgalaxy/resources/curated_tools.tsv"
all_tools_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.tsv"
microGalaxy_tutorials_url <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/docs/supplementary/supplementary_table_5.tsv"
microGalaxy_tools_url <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/docs/supplementary/supplementary_table_3.tsv"
# all_tools_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.tsv"


microGalaxy_tutorials_dt <- microGalaxy_tutorials_url |> fread()
microGalaxy_tools_dt <- microGalaxy_tools_url |> fread()
all_tools_dt <- all_tools_url |> fread()
# all_tools_dt <- all_tools_url |> fread()

```

Expand All @@ -68,34 +68,46 @@ dt1$Title <- microGalaxy_tutorials_dt[dt1$Topic_id]$Title
dt1$`EDAM topic` <- microGalaxy_tutorials_dt[dt1$Topic_id]$`EDAM topic`
dt1$`EDAM operation` <- microGalaxy_tutorials_dt[dt1$Topic_id]$`EDAM operation`

tmp <- microGalaxy_tools_dt$`Tool IDs` |>
str_split("\\,") |>
lapply(str_squish) |>
lapply(function(q) data.table("Tool ID" = str_squish(q))) |>
rbindlist(idcol = "id")

tmp$`Suite ID` <- microGalaxy_tools_dt[tmp$id]$`Suite ID`

index <- match(dt1$`Tool ID`, tmp$`Tool ID`)

dt1$`Suite ID` = tmp[index]$`Suite ID`

# 2 ------------------------------------

tmp <- all_tools_dt[, c("Suite ID", "Tool IDs"), with = FALSE] |> unique()
tmp <- tmp[which(`Tool IDs` != "")]
tmp <- tmp |> tidyr::separate_rows("Tool IDs", sep = ",") |> setDT()
tmp$`Tool IDs` <- tmp$`Tool IDs` |> str_squish()
# tmp <- all_tools_dt[, c("Suite ID", "Tool IDs"), with = FALSE] |> unique()
# tmp <- tmp[which(`Tool IDs` != "")]
# tmp <- tmp |> tidyr::separate_rows("Tool IDs", sep = ",") |> setDT()
# tmp$`Tool IDs` <- tmp$`Tool IDs` |> str_squish()

# 3 ------------------
# # 3 ------------------

index = match(dt1$`Tool ID`, tmp$`Tool IDs`)
# index = match(dt1$`Tool ID`, tmp$`Tool IDs`)

dt1$`Suite ID` = tmp[index][[1]]
# dt1$`Suite ID` = tmp[index][[1]]

# 4 ----------------------------

microGalaxy_tutorials_dt$`Total No. of tools` = microGalaxy_tutorials_dt$Tools |>
str_split("\\,") |>
lapply(str_squish) |>
lapply(length) |>
unlist()
# microGalaxy_tutorials_dt$`Total No. of tools` = microGalaxy_tutorials_dt$Tools |>
# str_split("\\,") |>
# lapply(str_squish) |>
# lapply(length) |>
# unlist()

microGalaxy_tutorials_dt$`No. of microGalaxy related tools` = microGalaxy_tutorials_dt$Tools |>
str_split("\\,") |>
lapply(str_squish) |>
lapply(function(q) which(q %in% tmp$`Tool IDs`) |> length() ) |>
unlist()
# microGalaxy_tutorials_dt$`No. of microGalaxy related tools` = microGalaxy_tutorials_dt$Tools |>
# str_split("\\,") |>
# lapply(str_squish) |>
# lapply(function(q) which(q %in% tmp$`Tool IDs`) |> length() ) |>
# unlist()

microGalaxy_tutorials_dt$`No. of other tools` = microGalaxy_tutorials_dt$`Total No. of tools` - microGalaxy_tutorials_dt$`No. of microGalaxy related tools`
# microGalaxy_tutorials_dt$`No. of other tools` = microGalaxy_tutorials_dt$`Total No. of tools` - microGalaxy_tutorials_dt$`No. of microGalaxy related tools`

```

Expand Down Expand Up @@ -276,7 +288,7 @@ a_2 <- dt2 |>

theme_minimal(base_family = "Calibri") +
theme(
axis.text.x = element_text(size = 3, angle = 45, hjust = 1),
axis.text.x = element_text(size = 4, angle = 45, hjust = 1),
axis.text.y = element_text(size = 6),

axis.title.x = element_text(margin = margin(t = 10)),
Expand Down Expand Up @@ -304,11 +316,11 @@ a_2 <- dt2 |>
```{r}

# ----------------------
d1 <- tutorials_dt[, c("Title", "EDAM topic"), with = FALSE] |>
d1 <- microGalaxy_tutorials_dt[, c("Title", "EDAM topic"), with = FALSE] |>
tidyr::separate_rows("EDAM topic", sep = ",") |>
setDT()

d2 <- tutorials_dt[, c("Title", "EDAM operation"), with = FALSE] |>
d2 <- microGalaxy_tutorials_dt[, c("Title", "EDAM operation"), with = FALSE] |>
tidyr::separate_rows("EDAM operation", sep = ",") |>
setDT()

Expand Down
8 changes: 4 additions & 4 deletions bin/workflows-graphs.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ library(packcircles)
## THERE IS A BUG UNFORTUNATELY WITH fread FUNCTION
## RUN ON CONSOLE IN THIS CASE

workflows_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/microgalaxy/resources/curated_workflows.tsv"
all_tools_url <- "https://raw.githubusercontent.com/galaxyproject/galaxy_codex/refs/heads/main/communities/all/resources/tools.tsv"
workflows_url <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/docs/supplementary/supplementary_table_4.tsv"
all_tools_url <- "https://raw.githubusercontent.com/usegalaxy-eu/microgalaxy_paper_2025/refs/heads/main/docs/supplementary/supplementary_table_3.tsv"

workflows_dt <- workflows_url |> fread(sep = "\t")
all_tools_dt <- all_tools_url |> fread()
Expand Down Expand Up @@ -72,7 +72,7 @@ dt1$`EDAM operation` <- dt1$`EDAM operation` |> str_squish()

dt1 <- dt1[which(!is.na(`Suite ID`)), c("Name", "Suite ID", "EDAM topics", "EDAM operation"), with = FALSE] |> unique()

dt1$`EDAM topics` <- ifelse(dt1$`EDAM topics` == "", "Other", dt1$`EDAM topics`) |> str_wrap(15)
dt1$`EDAM topics` <- ifelse(dt1$`EDAM topics` == "", "Other", dt1$`EDAM topics`) |> str_wrap(15)
dt1$`EDAM operation` <- ifelse(dt1$`EDAM operation` == "", "Other", dt1$`EDAM operation`) |> str_wrap(15)

edam_operations <- dt1[, by = `EDAM operation`, .(N = Name |> unique() |> length())]
Expand All @@ -87,7 +87,7 @@ edam_topics <- edam_topics[order(-N)] |> head(4)
dt1$`EDAM topics` <- ifelse(dt1$`EDAM topics` %in% edam_topics$`EDAM topics`, dt1$`EDAM topics`, "Other")
dt1$`EDAM operation` <- ifelse(dt1$`EDAM operation` %in% edam_operations$`EDAM operation`, dt1$`EDAM operation`, "Other")

dt1$`EDAM topics` <- dt1$`EDAM topics` |> factor(c(edam_topics$`EDAM topics`, "Other"))
dt1$`EDAM topics` <- dt1$`EDAM topics` |> factor(c(edam_topics$`EDAM topics`, "Other"))
dt1$`EDAM operation` <- dt1$`EDAM operation` |> factor(c(edam_operations$`EDAM operation`, "Other"))


Expand Down
Binary file modified results/citations/citations.pdf
Binary file not shown.
Loading