-
Notifications
You must be signed in to change notification settings - Fork 0
/
replace.r
79 lines (59 loc) · 2.42 KB
/
replace.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
rm(list = ls())
library(dplyr)
source("replacer_functions.r")
chiResults <- readRDS("data/output/annotated_5feb_chrom2.rds")
replaceInput <- chiResults |>
dplyr::select(nucPosition, alleles,
group1AltAllFreq,
group2AltAllFreq,
lod,
totalHeteroz,
group1Heteroz,
group2Heteroz,
info)
replaceInput$info <- gsub("\\s+", " ", replaceInput$info)
replaceInput <- replaceInput %>%
tidyr::separate(info,
into = c("LOC", "POL", "REFN", "GTYPE", "CODPOS", "RESID", "CODON1", "AA1", "CODON2", "AA2"),
sep = " ",
extra = "merge")
replaceInput$info <- NULL
replaceInput$REPLACEMENT <- NA #make space in ram for new data
replaceInput$NREPS <- NA #make space in ram for new data
# replaceInput1000 <- head(replaceInput, n = 15000)
# nRows <- nrow(replaceInput1000)
# i <- 766 # one indel
# i <- 2735 # triple variant everyting syn
# i <- 204 # triple variant double rep
for(i in 1:nrow(replaceInput)) {
if(replaceInput$GTYPE[i] == "CDS" & !is.na(replaceInput$GTYPE[i])) {
nucPos <- replaceInput$nucPosition[i]
# print( paste("CDS mutation in nuc pos:", nucPos) )
refCodon <- replaceInput$CODON1[i]
refAmino <- replaceInput$AA1[i]
refNuc <- replaceInput$REFN[i]
varPosition <- as.numeric(replaceInput$CODPOS[i])
variants <- replaceInput$alleles[i]
polarity <- replaceInput$POL[i]
variantsTable <- GetSubstitutions(refCodon = refCodon,
refAmino = refAmino,
refNuc = refNuc,
varPosition = varPosition,
variants = variants,
polarity = polarity)
if ( nrow(variantsTable ) > 2 ) { #if more than 2 alleles
temp <- ReportTripleVariant( variantsTable )
replaceInput$CODON2[i] <- temp$codon[2]
replaceInput$AA2[i] <- temp$amino[2]
replaceInput$REPLACEMENT[i] <- temp$replacement[2]
replaceInput$NREPS[i] <- temp$nRep[2]
} else{
replaceInput$CODON2[i] <- variantsTable$codon[2]
replaceInput$AA2[i] <- variantsTable$amino[2]
replaceInput$REPLACEMENT[i] <- variantsTable$replacement[2]
}
} else {
# variation not in CDS; do nothing.
}
}
#save merge results to file