1
+ library(dplyr )
2
+ library(stringr )
3
+ library(data.table )
4
+
5
+ # ## Segment processing
6
+ get_segment_parts <- function (.df ) {
7
+ .df.v <- .df %> %
8
+ select(species , gene , v.segm , v.end , cdr3 ) %> %
9
+ filter(v.end > 0 ) %> %
10
+ mutate(v.segm = str_split_fixed(v.segm , " [*,]" , 2 )[,1 ],
11
+ cdr3 = substr(cdr3 , 1 , v.end )) %> %
12
+ group_by(species , gene , v.segm , cdr3 ) %> %
13
+ summarise(count = n()) %> %
14
+ group_by(species , gene , cdr3 , type = " V" ) %> %
15
+ summarise(segm = v.segm [which(count == max(count ))][1 ])
16
+
17
+ .df.j <- .df %> %
18
+ select(species , gene , j.segm , j.start , cdr3 ) %> %
19
+ filter(j.start > 0 ) %> %
20
+ mutate(j.segm = str_split_fixed(j.segm , " [*,]" , 2 )[,1 ],
21
+ cdr3 = substr(cdr3 , j.start , nchar(cdr3 ))) %> %
22
+ group_by(species , gene , j.segm , cdr3 ) %> %
23
+ summarise(count = n()) %> %
24
+ group_by(species , gene , cdr3 , type = " J" ) %> %
25
+ summarise(segm = j.segm [which(count == max(count ))][1 ])
26
+
27
+ rbind(.df.v , .df.j )
28
+ }
29
+
30
+
31
+
32
+ # ## VDJtools export
33
+
34
+ mock_codons <- c(' GCT' , ' TGT' , ' GAT' , ' GAA' , ' TTT' ,
35
+ ' GGT' , ' ATT' , ' CAT' , ' AAA' , ' TTA' ,
36
+ ' ATG' , ' AAT' , ' CCT' , ' CAA' , ' CGT' ,
37
+ ' TCT' , ' ACT' , ' GTT' , ' TGG' , ' TAT' )
38
+
39
+ names(mock_codons ) <- c(' A' , ' C' , ' D' , ' E' , ' F' ,
40
+ ' G' , ' I' , ' H' , ' K' , ' L' ,
41
+ ' M' , ' N' , ' P' , ' Q' , ' R' ,
42
+ ' S' , ' T' , ' V' , ' W' , ' Y' )
43
+
44
+ mock_back_translate <- function (x ) {
45
+ paste0(mock_codons [x ], collapse = " " )
46
+ }
47
+
48
+ # "CASS" %>% strsplit('') %>% lapply(mock_back_translate)
49
+
50
+ as.vdjtools.df <- function (.df , .chain = c(" beta" , " alpha" )) {
51
+ if (.chain == " beta" ) {
52
+ .df $ cdr3aa <- .df $ cdr3.beta
53
+ .df $ v <- .df $ v.beta
54
+ .df $ j <- .df $ j.beta
55
+ } else {
56
+ .df $ cdr3aa <- .df $ cdr3.alpha
57
+ .df $ v <- .df $ v.alpha
58
+ .df $ j <- .df $ j.alpha
59
+ }
60
+
61
+ .df $ cdr3nt <- cdr3.beta %> %
62
+ strsplit(' ' ) %> %
63
+ lapply(mock_back_translate )
64
+
65
+ .df %> %
66
+ mutate(count = 1 , freq = 1 / n(), d = " " ,
67
+ vend = - 1 , dstart = - 1 , dend = - 1 , jstart = - 1 ) %> %
68
+ select(count , freq , cdr3nt , cdr3aa , v , d , j , vend , dstart , dend , jstart )
69
+ }
0 commit comments