Superposition of two Protein-Sequences

#install library
if (!require('rgl')) {
  install.packages('rgl')
  library('rgl')
}

# structure coordinate files
f1=file("1zta_ca_1.xyz") 
f2=file("1zta_ca_2.xyz")

# parsing the input:
f1_lines=strsplit(readLines(f1),"\n")
f2_lines=strsplit(readLines(f2),"\n")
close(f1)
close(f2)

# Extract the number of atoms from the first line of each file
n1=as.numeric(f1_lines[[1]])
n2=as.numeric(f2_lines[[1]])

# Create matrices and empty vectors for coordinates and labels
m1=matrix(nrow=n1,ncol=3)
m2=matrix(nrow=n2,ncol=3)
l1=c()
l2=c()

# Parse the coordinates and labels from the XYZ files
for(i in 3:(n1+2))
{
  c=strsplit(f1_lines[[i]],"[\t ]+")
  l1=c(l1,c[[1]][1])
  m1[i-2,1]=c[[1]][2]
  m1[i-2,2]=c[[1]][3]
  m1[i-2,3]=c[[1]][4]
}

for(i in 3:(n2+2))
{
  c=strsplit(f2_lines[[i]],"[\t ]+")
  l2=c(l2,c[[1]][1])
  m2[i-2,1]=c[[1]][2]
  m2[i-2,2]=c[[1]][3]
  m2[i-2,3]=c[[1]][4]
}

# plotting structures:
# Set up the 3D plot window
par3d("windowRect"= c(50,50,1000,1000))

# Plot the first structure in red
plot3d(m1,xlim=c(-20,20),ylim=c(-20,20),zlim=c(-20,20),
       box=F,axes=F,xlab="",ylab="",zlab="",col="red")
lines3d(m1,col="red")
text3d(m1,text=l1,col="red",cex=0.8)

# Plot the second structure in blue
plot3d(m2,col="blue",add=T)
lines3d(m2,col="blue")
text3d(m2,text=l1,col="blue",cex=0.8)

# PCA = unit-length moments of inertia 
# Perform Principal Component Analysis (PCA) on the first structure
mn=matrix(as.numeric(m1),ncol=3,byrow=F)
p=prcomp(mn)

# Perform PCA on the second structure
mm=matrix(as.numeric(m2),ncol=3,byrow=F)
pp=prcomp(mm)

# Convert matrices back to numeric format
m1 <- matrix(as.numeric(m1),ncol=3,byrow=F)
m2 <- matrix(as.numeric(m2),ncol=3,byrow=F)

# Define inertia vectors for each model
i1V1=c(0.541059,-0.299595,0.785810)
i1V2=c(0.787317,-0.103191,-0.581439)
i1V3=c(0.010829,0.039588,0.007637)

# Normalize the inertia vectors to have unit length
i1V1=i1V1/sqrt(sum(i1V1^2))
i1V2=i1V2/sqrt(sum(i1V2^2))
i1V3=i1V3/sqrt(sum(i1V3^2))

i2V1=c(0.027761,-0.103066,0.994287)
i2V2=c(-0.010364,-0.974455,-0.100720)
i2V3=c(0.050197,-0.000385,-0.001441)

# Normalize the inertia vectors to have unit length
i2V1=i2V1/sqrt(sum(i2V1^2))
i2V2=i2V2/sqrt(sum(i2V2^2))
i2V3=i2V3/sqrt(sum(i2V3^2))

# Plot the inertia vectors for the first structure
arrow3d(c(0,0,0), 10*i1V1, theta=pi/12, col = "red")
arrow3d(c(0,0,0), 10*i1V2, theta=pi/12, col = "blue")
arrow3d(c(0,0,0), 10*i1V3, theta=pi/12, col = "green")

# Plot the inertia vectors for the second structure
arrow3d(c(0,0,0), 10*i2V1, col = "purple")
arrow3d(c(0,0,0), 10*i2V2, col = "black")
arrow3d(c(0,0,0), 10*i2V3, col = "grey")

# Compute the radius of gyration for Model 1
colMeans(m1)
#0.1704857 -0.5047143 -0.4091143
rg1 = sqrt(sum((m1[,1] - 0.1704857)^2 + (m1[,2] - -0.5047143)^2
               + (m1[,3]- -0.4091143)^2)/nrow(m1))


# Compute the radius of gyration for Model 2
colMeans(m2)
#9.161229  -7.341657 -10.584857
rg2 = sqrt(sum((m2[,1] - 9.161229)^2 + (m2[,2] - -7.341657)^2
               + (m2[,3]- -10.58486)^2)/nrow(m2))


# Translate Model 1 to its center of mass
m1_centered <- matrix(c(m1[,1]-0.1704857 , m1[,2]- -0.5047143, m1[,3]- -0.4091143), ncol=3)

# Translate Model 2 to its center of mass
m2_centered =matrix(c(m2[,1]-9.161229, m2[,2]- -7.341657, m2[,3]- -10.584857), ncol=3)

# Define a function to create a rotation matrix given a vector and angle
rot.mt <- function (vector, angle) {
  vector=vector/sqrt(sum(vector^2))
  v1 <- vector[1]
  v2 <- vector[2]
  v3 <- vector[3]
  a <- angle # in radians
  r1 <- c(cos(a)+v1^2*(1-cos(a)), 
          v2*v1*(1-cos(a))+v3*sin(a),
          v3*v1*(1-cos(a))-v2*sin(a))
  r2 <- c(v1*v2*(1-cos(a))-v3*sin(a), 
          cos(a)+v2^2*(1-cos(a)), 
          v3*v2*(1-cos(a))+v1*sin(a))
  r3 <- c(v1*v3*(1-cos(a))+v2*sin(a), 
          v2*v3*(1-cos(a))-v1*sin(a), 
          cos(a)+v3^2*(1-cos(a)))
  rot.mt <- matrix(c(r1, r2, r3), nrow=3)
  return(rot.mt)
}

#Define a function that calculates the angle between two vectors
angle <- function (a, b) {
  alpha <- acos( sum(a*b) / ( sqrt(sum(a * a)) * sqrt(sum(b * b)) ) )
  return (alpha)
}

#Define a function that calculates the normalized vector perpendicular to 
#vector a and b
norm.vec <- function (a, b) {
  n1 <- c(a[2]*b[3]-a[3]*b[2], a[3]*b[1]-a[1]*b[3], a[1]*b[2]-a[2]*b[1])
  return(n1)
}

# Calculate the angle between i1V3 and i2V3
alpha <- angle(i1V3, i2V3)
# Calculate the normalized vector perpendicular to i1V3 and i2V3
nor1 <- norm.vec(i1V3, i2V3)

# Create rotation matrices for the normalized vector 
rotation_matrix1 <- rot.mt(nor1, alpha)

#Transform i2V1 using the rotation matrix
i2V1_model <- i2V1%*%rotation_matrix1

#Rotate the centered model m2 using the rotation matrix
rotated_model1 <- m2_centered%*%rotation_matrix1


# Calculate the angle between i1V3 and i2V3
nalpha <- angle(i1V1, i2V1_model)
#Calculate the normalized vector perpendicular to i1V1 and i2V1_model
nor2 = norm.vec(i1V1, i2V1_model)

# Create a rotation matrix for the second principal component
rotation_matrix2 <- rot.mt(nor2, nalpha)
# Rotate the previously rotated model using the second rotation matrix
rotated_model2 <- rotated_model1 %*% rotation_matrix2

# Superimpose the models by aligning the centers of mass
center_of_mass_model1 <- colMeans(m1)
center_of_mass_model2_rotated <- colMeans(rotated_model2)
translation_vector <- center_of_mass_model1 - center_of_mass_model2_rotated
rotated_model2 <- rotated_model2 + translation_vector

# Define the filenames for the new structure data files
output_file1 <- "model1_superimposed.xyz"
output_file2 <- "model2_superimposed.xyz"

# Write the superimposed Model 1 to a new structure data file
writeLines(c(f1_lines[[1]], f1_lines[[2]]), output_file1)
write.table(cbind(l1, rotated_model2), file = output_file1, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE)

# Write the superimposed Model 2 to a new structure data file
writeLines(c(f2_lines[[1]], f2_lines[[2]]), output_file2)
write.table(cbind(l2, m2_centered), file = output_file2, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE)

# Compute the RMSD between the initial models
n_atoms <- nrow(m1)
rmsd1 <- sqrt(sum(rowSums((m1_centered - rotated_model1)^2)) / n_atoms)
print(paste("Initial RMSD:", rmsd1)) #4.274

# Compute the RMSD between the superimposed models
n_atoms <- nrow(m1)
rmsd2 <- sqrt((sum((m1_centered - rotated_model2)^2)) / n_atoms)

print(paste("Second RMSD:", rmsd2)) #5.148

#A lower RMSD indicates a better alignment between the two structures
#suggesting that the superposition has improved the alignment between 
#the structures..

# Visualize the structures
open3d()
plot3d(m1, xlim = c(-20, 20), ylim = c(-20, 20), zlim = c(-20, 20),
       box = FALSE, axes = FALSE, xlab = "", ylab = "", zlab = "", col = "red")
lines3d(m1, col = "red")
text3d(m1, text = l1, col = "red", cex = 0.8)

points3d(rotated_model2, col = "red", size = 2)
lines3d(rotated_model2, col = "red")
text3d(rotated_model2, text = l1, col = "red", cex = 0.8)

#reference
#https://en.wikipedia.org/wiki/Rotation_matrix#:~:text=In%20three%20dimensions,-See%20also%3A%20Rotation&text=A%20positive%2090°%20rotation%20around%20the%20y%2Daxis%20(left,the%20origin%20in%20its%20center