-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript_RDP_to_FUNGuild.R
92 lines (66 loc) · 2.89 KB
/
script_RDP_to_FUNGuild.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#script_RDP_to_FUNGuild.R
library(tidyr)
# example df for FUNGuild input
#df.example <- read.table("otu_table_example.txt", sep = "\t", header = TRUE, stringsAsFactors = FALSE)
# current df from RDP output
df.rdpout <- read.table("data/head_RDPUnite.txt", sep = "\t", stringsAsFactors = FALSE)
# current otu table
df.otutab <- read.table("data/otuTable.tab", sep = "\t", header = TRUE, stringsAsFactors = FALSE)
################
# taxonomy cell template
#df.example[1,"taxonomy"]
################
# reformat df.rdpout
#pull out relevant columns and given them column names
df.tmp <- df.rdpout[, c("V1","V3","V6","V9","V12","V15","V18","V21","V23")]
colnames(df.tmp) <- c("OTUId","kingdom","phylum","class","order","family","genus","genus_species_code","speciesPerc")
#split the column with multiple types of info
df.tmp1 <- separate(data = df.tmp, col = genus_species_code, into = c("genus_species", "shcode"), sep = "\\|")
df.tmp2 <- separate(data = df.tmp1, col = genus_species, into = c("Genus","Species"), sep = "_", extra = "merge")
df.tmp3<-data.frame(df.tmp2, genus_species=df.tmp1$genus_species)
#replace columns that include "unidentified with just unidentified"
i<-0
COLS <- c("kingdom","phylum","class","order","family","Genus","Species")
for (i in 1:length(COLS)){
#pull out the current column
columnThing <- df.tmp3[,COLS[i]]
#find "unidentified"s and replace cell with simple "unidentified"
columnThing[grepl("unidentified", columnThing)] <- "unidentified"
#update the column in the original dataframe
df.tmp3[,COLS[i]] <- columnThing
}
################
# pull stuff together and make a 'taxonomy' column
data<-df.tmp3 #call this something generic
perc <- data$speciesPerc
genus_species <- data$genus_species
eucode <- "EU"
shcode <- data$shcode
repinfo <- "reps"
kingdom <- data$kingdom
phylum <- data$phylum
class <- data$class
order <- data$order
family <- data$family
genus <- data$Genus
species <- data$genus_species
data[,"taxonomy"]<-paste(perc,
genus_species, eucode, shcode, repinfo,
paste(paste("k__",kingdom, sep =""),
paste("p__",phylum, sep =""),
paste("c__",class, sep =""),
paste("o__",order, sep =""),
paste("f__",family, sep =""),
paste("g__",genus, sep =""),
paste("s__",species, " ...", sep =""), sep = ";"),
sep = "|")
#check the format
#data[1,"taxonomy"]
################
# attach the 'taxonomy' column to the otu table
ind.tax <- data[,c("OTUId","taxonomy")]
df.otu.tax <- merge(df.otutab, ind.tax)
################
write.table(df.otu.tax, file="output/otuTable_forFUNG.tab", sep = "\t", row.names = FALSE, quote = FALSE)
#df.test<-read.table("output/otuTable_forFUNG.tab")
#colnames(df.test)[colnames(df.test) == "taxonomy"]