forked from microbiome/course_2022_oulu
-
Notifications
You must be signed in to change notification settings - Fork 2
/
tse_script.R
65 lines (39 loc) · 1.54 KB
/
tse_script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Make a tse object from the avilable human gastrointestinal microbiota data
setwd("")
library("ggplot2"); packageVersion("ggplot2")
library(tidyr)
library(mia)
library(miaViz)
library(tidyverse)
library(scater)
library(ape)
#Read the data
samples_df <- read.csv(file ="HGMA.web.metadata.csv",
header = TRUE)
otu_mat <- read.csv(file ="HGMA.web.MSP.abundance.matrix.csv",
header = TRUE)
tax_mat <- read.table(file='IGC2.1989MSPs.taxo.tsv',sep = '\t', header = TRUE)
tree <- read.tree("IGC2.1990MSPs.nwk",text = NULL, tree.names = NULL, skip = 0,comment.char = "", keep.multi = FALSE)
otu_mat <- otu_mat %>%coordinates
tibble::column_to_rownames("X")
otu_mat <- as.matrix(otu_mat)
tax_mat <- tax_mat %>%
tibble::column_to_rownames("msp_name")
samples_df <- samples_df %>%
tibble::column_to_rownames("sample.ID")
dim(otu_mat)
dim(samples_df)
dim(tax_mat)
tax_mat <- tax_mat[,c("phylum", "class", "order", "family", "genus","species" )]
se <- TreeSummarizedExperiment(assays = list(counts = otu_mat[,rownames(samples_df)]),
colData = samples_df,
rowData = tax_mat[rownames(otu_mat),])
se
#how many taxa and samples the data contains
dim(se)
#TreeSummarizedExperiment object which includes also a rowTree slot
tse <- as(se, "TreeSummarizedExperiment")
rowTree(tse) <- tree
tse
common.nodes <- intersect(rownames(tse), rowTree(tse)$tip.label)
tse <- TreeSummarizedExperiment::subsetByLeaf(x = tse, rowLeaf = common.nodes, updateTree = TRUE)