-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathremove_uncertains_UCE.R
48 lines (37 loc) · 1.1 KB
/
remove_uncertains_UCE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
intable <- read.table("temp",header=FALSE,stringsAsFactors=FALSE,sep="\t")
species <- read.table("species_assignments",header=FALSE,stringsAsFactors=FALSE)
rows <- dim(intable)[1]
uncertains <- species[species[,2]=="uncertain",1]
no_uncertains <- length(uncertains)
to_write <- NULL
sequencepaste <- NULL
tempname <- NULL
for (j in 1:rows) {
if ((length(grep(">",intable[j,1])))>0) {
if(!(is.null(sequencepaste))) {
to_write <- rbind(to_write,tempname)
to_write <- rbind(to_write,sequencepaste)
}
tempname <- intable[j,1]
sequencepaste <- NULL
} else {
sequencepaste <- paste(sequencepaste,intable[j,1],sep="")
}
}
to_write <- rbind(to_write,tempname)
to_write <- rbind(to_write,sequencepaste)
if ((length(grep(">",to_write[1,1])))==0) {
to_write <- rbind(intable[1,1],to_write)
}
rows <- dim(to_write)[1]
to_write_unc <- NULL
j <- 1
while (j < rows) {
if(!(any(uncertains %in% (gsub(">","",to_write[j,1]))))) {
to_write_unc <- rbind(to_write_unc,to_write[j,1])
to_write_unc <- rbind(to_write_unc,to_write[j+1,1])
}
j <- j+2
}
write.table(to_write_unc, "temp.fa",quote=FALSE, col.names=FALSE,row.names=FALSE)
q()