Skip to content

Commit

Permalink
fix: modifications after stageR correction
Browse files Browse the repository at this point in the history
  • Loading branch information
iaradsouza1 committed Oct 24, 2023
1 parent 6f90c81 commit 4c38efe
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 47 deletions.
14 changes: 7 additions & 7 deletions scripts/diff_tx_correct.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ library(stageR)
library(GenomicFeatures)

# Load TX data from differential expression
load("results/diff_exp/tx_rin_ph_diff.rda")
load("results/diff_exp/edger_tx_rin_ph_diff.rda")
df_res <- df_edger_ph_rin_group_tx
colnames(df_res)[1] <- "tx"

# remove transcript version
df_res$tx <- gsub("\\.+\\d+", "", rownames(df_res))

# Load transcript-gene info -----------------------------------------------
# gtf <- "data/genome/Homo_sapiens.GRCh38.97.gtf.gz"
# txdb.filename <- "data/genome/Homo_sapiens.GRCh38.97.gtf.sqlite"
gtf <- "Homo_sapiens.GRCh38.97.gtf.gz"
txdb.filename <- "Homo_sapiens.GRCh38.97.gtf.sqlite"
gtf <- "data/genome/Homo_sapiens.GRCh38.97.gtf.gz"
txdb.filename <- "data/genome/Homo_sapiens.GRCh38.97.gtf.sqlite"
#gtf <- "Homo_sapiens.GRCh38.97.gtf.gz"
#txdb.filename <- "Homo_sapiens.GRCh38.97.gtf.sqlite"

# Load db
txdb <- loadDb(txdb.filename)
Expand Down Expand Up @@ -72,7 +72,7 @@ for (i in 1:length(regions)) {

# Get the corrected values
padj <- getAdjustedPValues(stageRObj, order = TRUE, onlySignificantGenes = T)
padj <- padj[!padj$transcript == 0,]
# padj <- padj[!padj$transcript == 0,]

if (nrow(padj) == 0) {
ls_temp[[j]] <- NULL
Expand All @@ -93,7 +93,7 @@ if(!dir.exists("results/diff_exp/")) {
}

# Save results
save(df_res_padj, file = "results/diff_exp/diff_tx_corrected.rda")
save(df_res_padj_tx, file = "results/diff_exp/diff_tx_corrected.rda")



15 changes: 9 additions & 6 deletions scripts/network.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ library(ggraph)
library(magrittr)
library(RedeR)

# ------------------ GET GENES AND INTERACTIONS -------------------------
# ------------------ GET GENES AND INTERACTIONS ---------------------------
# Load diff genes table
load("results/diff_exp/diff_df.rda")
gwas_intersections <- read_csv("results/tables/gwas_intersection.csv")
Expand Down Expand Up @@ -82,6 +82,9 @@ addGraph(rdp, g)
nodes <- read_tsv("results/networks/model_nodes.txt")
edges <- read_delim("results/networks/model_edges.txt")

nodes <- read_tsv("~/model_nodes.txt")
edges <- read_delim("~/model_edges.txt")

# Import nodes coordinates determined by vivagraph
layout <- read.csv("results/networks/layout.csv")

Expand Down Expand Up @@ -115,7 +118,7 @@ ggraph(g, x = x, y = y) +
pie_scale = 0.2,
show.legend = F
) +
geom_node_text(aes(label = alias), size = 1.1, nudge_x = 2, nudge_y = 4) +
geom_node_text(aes(label = alias), size = 0.9, nudge_x = 2, nudge_y = 4) +
#geom_node_label(aes(label = alias)) +
scale_fill_manual(values = c("#0ac80aff", "#4f4affff", "#ff822fff")) +
coord_fixed() +
Expand All @@ -131,7 +134,7 @@ svg(filename = "results/plots_paper/network.svg", height = 10, width = 10)
print(p)
dev.off()

# Percentage of total genes in the network: 51,52%%
# Percentage of total genes in the network: 51,24%
n_distinct(nodes$alias) / n_distinct(diff_df$hgnc_symbol)


Expand Down Expand Up @@ -226,7 +229,7 @@ set_graph_params <- function(g, dict, f_ls) {

l_groups <- map(split(diff_df$hgnc_symbol, diff_df$group), unique)

graphs_by_group <- imap(l_groups, function(x, i){
graphs_by_group <- imap(l_groups, function(x, i) {

# CHANGE HERE IF YOU WANT FIRST NEIGHBORS
# (also remember to change the path)
Expand All @@ -241,14 +244,14 @@ graphs_by_group <- imap(l_groups, function(x, i){
# To set all gene labels, set diff_df instead of diff_temp
g <- set_graph_params(g, dc, f_ls)

if(include_first_neighbors){
if(include_first_neighbors) {
colors_list <- V(g)$pie
degrees <- degree(g,v=V(g))
filter <- !(paste(colors_list) == "c(1, 0, 0, 0)" & degrees == 1)
g <- induced_subgraph(g, filter)
}

if(length(V(g)$pie.color) != 0){
if(length(V(g)$pie.color) != 0) {
pdf(paste0("results/networks/", i, ".pdf"), width = 10, height = 10)
plot(g)
dev.off()
Expand Down
6 changes: 6 additions & 0 deletions scripts/network_layout.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

library(easylayout)

# Read igraph data
load("results/networks/int.rda")

# Create graph
g <- graph_from_edgelist(as.matrix(int[,1:2]), directed = F)

# Organize main layout
layout <- easylayout::vivagraph(g)

Expand Down
40 changes: 19 additions & 21 deletions scripts/plots.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ diff_df %>%
ggplot(aes(x = col, y = n, fill = type)) +
geom_bar(position = "stack", stat = "identity") +
labs(x = "", y = "Number of transcriptionally altered genes", fill = "") +
scale_y_continuous(limits = c(0, 1200), breaks = seq(0, 1200, 200)) +
scale_y_continuous(limits = c(0, 1400), breaks = seq(0, 1400, 200)) +
scale_fill_manual(values = color_scale) +
theme_classic() +
theme(
Expand Down Expand Up @@ -152,7 +152,7 @@ df_plot %>%
ggplot(aes(x = as.numeric(x_axis), y = n, fill = type)) +
geom_bar(stat = "identity", position = "stack") +
facet_grid(cols = vars(region)) +
scale_y_continuous(name = "Number of transcriptionally altered genes", limits = c(0, 460), breaks = seq(0, 460, 50), minor_breaks = F) +
scale_y_continuous(name = "Number of transcriptionally altered genes", limits = c(0, 500), breaks = seq(0, 500, 50), minor_breaks = F) +
#facet_zoom(x = x_axis %in% c("Female", "Male", "Intersection")) +
scale_fill_manual(name = "", values = color_scale) +
scale_x_continuous("",
Expand Down Expand Up @@ -181,7 +181,7 @@ diff_df %>%
)) -> tmp8
# Create list of genes in female, in male, and in both sexes
l_genes <- split(tmp8$gene, tmp8$sex)
l_genes <- split(tmp8$hgnc_symbol, tmp8$sex)
# Plot Venn diagram
cairo_pdf(file = "results/plots_paper/fig2B.pdf", width = 4, height = 4)
Expand Down Expand Up @@ -357,11 +357,8 @@ ggsave("results/plots_paper/fig2C_2.png", height = 4, width = 5, dpi = 300)

## Figure 3

Enrichment plot was built as described in `enrichment.R` in `script` directory.
Figure 3 was produced on the biotype analysis, in the `summarise_biotypes.R` script.

## Figure 4 and 5

Figures 4 and 5 were built as described in `plot_dtu.R` in `script` directory.

# Supplementary Figures

Expand Down Expand Up @@ -397,14 +394,15 @@ diff_df %>%
ggplot(aes(x = sex, y = p_gt, fill = gt)) +
geom_bar(stat = "identity") +
facet_grid(.~ region) +
scale_y_continuous(breaks = seq(0,1,0.2),
labels = scales::percent(seq(0,1,0.2))) +
scale_y_continuous(breaks = seq(0,1,0.25),
labels = scales::percent(seq(0,1,0.25))) +
scale_x_discrete(labels = c("female" = expression("\u2640"),
"male" = expression("\u2642"))) +
scale_fill_manual(values = c("G" = "#5E835Fff", "T" = "#85587C"),
labels = c("G" = "Genes", "T" = "Transcripts")) +
labs(x = "", y = "Pergentage of transcriptionally altered genes", fill = "") +
theme_bw() +
geom_hline(yintercept = 0.5, lty = 2, lwd = 0.2) +
theme(
strip.background = element_rect(fill = "white"),
axis.text.x = element_text(size = 15, colour = "black"),
Expand Down Expand Up @@ -489,7 +487,7 @@ load("results/important_variables/ann.rda")
ann %>%
rownames_to_column("run") %>%
dplyr::select(run, phenotype, gender, region) %>%
count(phenotype, gender, region, name = "number_of_samples") %>%
dplyr::count(phenotype, gender, region, name = "number_of_samples") %>%
arrange(gender, region) %>%
openxlsx::write.xlsx(file = "results/tables/number_of_samples.xlsx", rowNames = F)
```
Expand All @@ -504,21 +502,21 @@ diff_df %>%
## Supplementary Table 3

```{r}
genes_by_group_female %>%
openxlsx::write.xlsx(file = "results/tables/intersection_tables.xlsx", row.names = F, sheetName = "Female_Intersections")
wb <- createWorkbook()
addWorksheet(wb, sheetName = "Female_Intersections")
writeData(wb, sheet = "Female_Intersections", genes_by_group_female)
genes_by_group_male %>%
openxlsx::write.xlsx(file = "results/tables/intersection_tables.xlsx", row.names = F, sheetName = "Male_Intersections",
append = T)
addWorksheet(wb, sheetName = "Male_Intersections")
writeData(wb, sheet = "Male_Intersections", genes_by_group_male)
genes_by_sex %>%
openxlsx::write.xlsx(file = "results/tables/intersection_tables.xlsx", row.names = F, sheetName = "Sex_Intersections",
append = T)
addWorksheet(wb, sheetName = "Sex_Intersections")
writeData(wb, sheet = "Sex_Intersections", genes_by_sex)
genes_by_regions %>%
openxlsx::write.xlsx(file = "results/tables/intersection_tables.xlsx", row.names = F, sheetName = "Regions_Intersections",
append = T)
addWorksheet(wb, sheetName = "Regions_Intersections")
writeData(wb, sheet = "Regions_Intersections", genes_by_regions)
saveWorkbook(wb, "results/tables/intersection_tables.xlsx", overwrite = TRUE)
```


Expand Down
26 changes: 13 additions & 13 deletions scripts/summarise_biotypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,19 @@ dtu_w_biotype <- dtu_w_biotype %>%

dge_plot <- dge_w_biotype %>%
group_by(biotype) %>%
summarise(biotype_n = n() / length(unique(dge_w_biotype$gene_id)) * 100) %>%
dplyr::summarise(biotype_n = dplyr::n() / length(unique(dge_w_biotype$gene_id)) * 100) %>%
ungroup() %>%
mutate(type = "DGE")
dplyr::mutate(type = "DGE")

dte_plot <- dte_w_biotype %>%
group_by(biotype) %>%
summarise(biotype_n = n() / length(unique(dte_w_biotype$transcript_id))* 100) %>%
summarise(biotype_n = dplyr::n() / length(unique(dte_w_biotype$transcript_id))* 100) %>%
ungroup() %>%
mutate(type = "DTE")

dtu_plot <- dtu_w_biotype %>%
group_by(biotype) %>%
summarise(biotype_n = n() / length(unique(dtu_w_biotype$isoform_id))* 100) %>%
summarise(biotype_n = dplyr::n() / length(unique(dtu_w_biotype$isoform_id))* 100) %>%
ungroup() %>%
mutate(type = "DTU")

Expand Down Expand Up @@ -105,7 +105,7 @@ ggsave(biotype_plot, file = "results/plots_paper/biotype_plot.pdf", width = 7, h
dge_plot <- dge_w_biotype %>%
separate(group, into = c("region", "sex")) %>%
group_by(biotype, sex) %>%
summarise(biotype_n = n()) %>%
summarise(biotype_n = dplyr::n()) %>%
ungroup() %>%
group_by(sex) %>%
mutate(prop = biotype_n / sum(biotype_n) * 100,
Expand All @@ -114,7 +114,7 @@ dge_plot <- dge_w_biotype %>%
dte_plot <- dte_w_biotype %>%
separate(group, into = c("region", "sex")) %>%
group_by(biotype, sex) %>%
summarise(biotype_n = n()) %>%
summarise(biotype_n = dplyr::n()) %>%
ungroup() %>%
group_by(sex) %>%
mutate(prop = biotype_n / sum(biotype_n) * 100,
Expand All @@ -123,7 +123,7 @@ dte_plot <- dte_w_biotype %>%
dtu_plot <- dtu_w_biotype %>%
separate(group, into = c("region", "sex")) %>%
group_by(biotype, sex) %>%
summarise(biotype_n = n()) %>%
summarise(biotype_n = dplyr::n()) %>%
ungroup() %>%
group_by(sex) %>%
mutate(prop = biotype_n / sum(biotype_n) * 100,
Expand All @@ -149,7 +149,7 @@ ggplot(df_plot, aes(x = reorder(biotype, dplyr::desc(prop)), y = prop, fill = ty
strip.background = element_rect(fill = "white")) -> biotype_plot_by_sex

# Save
ggsave(biotype_plot_by_sex, filename = "results/plots_paper/biotype_by_sexplot.pdf", width = 7, height = 4)
ggsave(biotype_plot_by_sex, filename = "results/plots_paper/fig3.pdf", width = 7, height = 4)

# Test feature prevalence differences between female and male -------------

Expand All @@ -165,7 +165,7 @@ biotypes_by_sex %>%
group_map(~ {
cat(.y$type, sep = "\n")
cont_table <- table(.x$biotype, .x$sex)
return(list(fisher = fisher.test(cont_table), count_table = cont_table))
return(list(fisher = fisher.test(cont_table, simulate.p.value = T), count_table = cont_table))
}) -> biot_tests_fisher

biotypes_by_sex %>%
Expand Down Expand Up @@ -214,10 +214,10 @@ biotypes_by_sex %>%
arrange(type, biotype) %>%
filter(sex == "female") %>%
group_by(region, type) %>%
mutate(n1 = n()) %>%
mutate(n1 = dplyr::n()) %>%
ungroup() %>%
group_by(biotype, type, region) %>%
mutate(n2 = n(),
mutate(n2 = dplyr::n(),
prop_by_region = (n2 / n1) * 100) %>%
arrange(desc(type), desc(region)) %>%
ungroup() %>%
Expand Down Expand Up @@ -245,10 +245,10 @@ biotypes_by_sex %>%
arrange(type, biotype) %>%
filter(sex == "male") %>%
group_by(region, type) %>%
mutate(n1 = n()) %>%
mutate(n1 = dplyr::n()) %>%
ungroup() %>%
group_by(biotype, type,region) %>%
mutate(n2 = n(),
mutate(n2 = dplyr::n(),
prop_by_region = (n2 / n1) * 100) %>%
arrange(desc(type), desc(region)) %>%
ungroup() %>%
Expand Down

0 comments on commit 4c38efe

Please sign in to comment.