-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSuperposition of two Protein-Sequences
227 lines (181 loc) · 7.07 KB
/
Superposition of two Protein-Sequences
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#install library
if (!require('rgl')) {
install.packages('rgl')
library('rgl')
}
# structure coordinate files
f1=file("1zta_ca_1.xyz")
f2=file("1zta_ca_2.xyz")
# parsing the input:
f1_lines=strsplit(readLines(f1),"\n")
f2_lines=strsplit(readLines(f2),"\n")
close(f1)
close(f2)
# Extract the number of atoms from the first line of each file
n1=as.numeric(f1_lines[[1]])
n2=as.numeric(f2_lines[[1]])
# Create matrices and empty vectors for coordinates and labels
m1=matrix(nrow=n1,ncol=3)
m2=matrix(nrow=n2,ncol=3)
l1=c()
l2=c()
# Parse the coordinates and labels from the XYZ files
for(i in 3:(n1+2))
{
c=strsplit(f1_lines[[i]],"[\t ]+")
l1=c(l1,c[[1]][1])
m1[i-2,1]=c[[1]][2]
m1[i-2,2]=c[[1]][3]
m1[i-2,3]=c[[1]][4]
}
for(i in 3:(n2+2))
{
c=strsplit(f2_lines[[i]],"[\t ]+")
l2=c(l2,c[[1]][1])
m2[i-2,1]=c[[1]][2]
m2[i-2,2]=c[[1]][3]
m2[i-2,3]=c[[1]][4]
}
# plotting structures:
# Set up the 3D plot window
par3d("windowRect"= c(50,50,1000,1000))
# Plot the first structure in red
plot3d(m1,xlim=c(-20,20),ylim=c(-20,20),zlim=c(-20,20),
box=F,axes=F,xlab="",ylab="",zlab="",col="red")
lines3d(m1,col="red")
text3d(m1,text=l1,col="red",cex=0.8)
# Plot the second structure in blue
plot3d(m2,col="blue",add=T)
lines3d(m2,col="blue")
text3d(m2,text=l1,col="blue",cex=0.8)
# PCA = unit-length moments of inertia
# Perform Principal Component Analysis (PCA) on the first structure
mn=matrix(as.numeric(m1),ncol=3,byrow=F)
p=prcomp(mn)
# Perform PCA on the second structure
mm=matrix(as.numeric(m2),ncol=3,byrow=F)
pp=prcomp(mm)
# Convert matrices back to numeric format
m1 <- matrix(as.numeric(m1),ncol=3,byrow=F)
m2 <- matrix(as.numeric(m2),ncol=3,byrow=F)
# Define inertia vectors for each model
i1V1=c(0.541059,-0.299595,0.785810)
i1V2=c(0.787317,-0.103191,-0.581439)
i1V3=c(0.010829,0.039588,0.007637)
# Normalize the inertia vectors to have unit length
i1V1=i1V1/sqrt(sum(i1V1^2))
i1V2=i1V2/sqrt(sum(i1V2^2))
i1V3=i1V3/sqrt(sum(i1V3^2))
i2V1=c(0.027761,-0.103066,0.994287)
i2V2=c(-0.010364,-0.974455,-0.100720)
i2V3=c(0.050197,-0.000385,-0.001441)
# Normalize the inertia vectors to have unit length
i2V1=i2V1/sqrt(sum(i2V1^2))
i2V2=i2V2/sqrt(sum(i2V2^2))
i2V3=i2V3/sqrt(sum(i2V3^2))
# Plot the inertia vectors for the first structure
arrow3d(c(0,0,0), 10*i1V1, theta=pi/12, col = "red")
arrow3d(c(0,0,0), 10*i1V2, theta=pi/12, col = "blue")
arrow3d(c(0,0,0), 10*i1V3, theta=pi/12, col = "green")
# Plot the inertia vectors for the second structure
arrow3d(c(0,0,0), 10*i2V1, col = "purple")
arrow3d(c(0,0,0), 10*i2V2, col = "black")
arrow3d(c(0,0,0), 10*i2V3, col = "grey")
# Compute the radius of gyration for Model 1
colMeans(m1)
#0.1704857 -0.5047143 -0.4091143
rg1 = sqrt(sum((m1[,1] - 0.1704857)^2 + (m1[,2] - -0.5047143)^2
+ (m1[,3]- -0.4091143)^2)/nrow(m1))
# Compute the radius of gyration for Model 2
colMeans(m2)
#9.161229 -7.341657 -10.584857
rg2 = sqrt(sum((m2[,1] - 9.161229)^2 + (m2[,2] - -7.341657)^2
+ (m2[,3]- -10.58486)^2)/nrow(m2))
# Translate Model 1 to its center of mass
m1_centered <- matrix(c(m1[,1]-0.1704857 , m1[,2]- -0.5047143, m1[,3]- -0.4091143), ncol=3)
# Translate Model 2 to its center of mass
m2_centered =matrix(c(m2[,1]-9.161229, m2[,2]- -7.341657, m2[,3]- -10.584857), ncol=3)
# Define a function to create a rotation matrix given a vector and angle
rot.mt <- function (vector, angle) {
vector=vector/sqrt(sum(vector^2))
v1 <- vector[1]
v2 <- vector[2]
v3 <- vector[3]
a <- angle # in radians
r1 <- c(cos(a)+v1^2*(1-cos(a)),
v2*v1*(1-cos(a))+v3*sin(a),
v3*v1*(1-cos(a))-v2*sin(a))
r2 <- c(v1*v2*(1-cos(a))-v3*sin(a),
cos(a)+v2^2*(1-cos(a)),
v3*v2*(1-cos(a))+v1*sin(a))
r3 <- c(v1*v3*(1-cos(a))+v2*sin(a),
v2*v3*(1-cos(a))-v1*sin(a),
cos(a)+v3^2*(1-cos(a)))
rot.mt <- matrix(c(r1, r2, r3), nrow=3)
return(rot.mt)
}
#Define a function that calculates the angle between two vectors
angle <- function (a, b) {
alpha <- acos( sum(a*b) / ( sqrt(sum(a * a)) * sqrt(sum(b * b)) ) )
return (alpha)
}
#Define a function that calculates the normalized vector perpendicular to
#vector a and b
norm.vec <- function (a, b) {
n1 <- c(a[2]*b[3]-a[3]*b[2], a[3]*b[1]-a[1]*b[3], a[1]*b[2]-a[2]*b[1])
return(n1)
}
# Calculate the angle between i1V3 and i2V3
alpha <- angle(i1V3, i2V3)
# Calculate the normalized vector perpendicular to i1V3 and i2V3
nor1 <- norm.vec(i1V3, i2V3)
# Create rotation matrices for the normalized vector
rotation_matrix1 <- rot.mt(nor1, alpha)
#Transform i2V1 using the rotation matrix
i2V1_model <- i2V1%*%rotation_matrix1
#Rotate the centered model m2 using the rotation matrix
rotated_model1 <- m2_centered%*%rotation_matrix1
# Calculate the angle between i1V3 and i2V3
nalpha <- angle(i1V1, i2V1_model)
#Calculate the normalized vector perpendicular to i1V1 and i2V1_model
nor2 = norm.vec(i1V1, i2V1_model)
# Create a rotation matrix for the second principal component
rotation_matrix2 <- rot.mt(nor2, nalpha)
# Rotate the previously rotated model using the second rotation matrix
rotated_model2 <- rotated_model1 %*% rotation_matrix2
# Superimpose the models by aligning the centers of mass
center_of_mass_model1 <- colMeans(m1)
center_of_mass_model2_rotated <- colMeans(rotated_model2)
translation_vector <- center_of_mass_model1 - center_of_mass_model2_rotated
rotated_model2 <- rotated_model2 + translation_vector
# Define the filenames for the new structure data files
output_file1 <- "model1_superimposed.xyz"
output_file2 <- "model2_superimposed.xyz"
# Write the superimposed Model 1 to a new structure data file
writeLines(c(f1_lines[[1]], f1_lines[[2]]), output_file1)
write.table(cbind(l1, rotated_model2), file = output_file1, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE)
# Write the superimposed Model 2 to a new structure data file
writeLines(c(f2_lines[[1]], f2_lines[[2]]), output_file2)
write.table(cbind(l2, m2_centered), file = output_file2, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE)
# Compute the RMSD between the initial models
n_atoms <- nrow(m1)
rmsd1 <- sqrt(sum(rowSums((m1_centered - rotated_model1)^2)) / n_atoms)
print(paste("Initial RMSD:", rmsd1)) #4.274
# Compute the RMSD between the superimposed models
n_atoms <- nrow(m1)
rmsd2 <- sqrt((sum((m1_centered - rotated_model2)^2)) / n_atoms)
print(paste("Second RMSD:", rmsd2)) #5.148
#A lower RMSD indicates a better alignment between the two structures
#suggesting that the superposition has improved the alignment between
#the structures..
# Visualize the structures
open3d()
plot3d(m1, xlim = c(-20, 20), ylim = c(-20, 20), zlim = c(-20, 20),
box = FALSE, axes = FALSE, xlab = "", ylab = "", zlab = "", col = "red")
lines3d(m1, col = "red")
text3d(m1, text = l1, col = "red", cex = 0.8)
points3d(rotated_model2, col = "red", size = 2)
lines3d(rotated_model2, col = "red")
text3d(rotated_model2, text = l1, col = "red", cex = 0.8)
#reference
#https://en.wikipedia.org/wiki/Rotation_matrix#:~:text=In%20three%20dimensions,-See%20also%3A%20Rotation&text=A%20positive%2090°%20rotation%20around%20the%20y%2Daxis%20(left,the%20origin%20in%20its%20center