-
Notifications
You must be signed in to change notification settings - Fork 3
/
scoring-xlinks.py
160 lines (135 loc) · 4.8 KB
/
scoring-xlinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# ScoringXlinks.py
# This script was implemented to run within IMP software package
# Documentation nd information on how to run it can be found where:
# https://github.com/integrativemodeling/hybrid_ms_method
# This script scores the violation of cross-links in the
# candidate model structures
import re
#import os, numarray
import operator
from operator import itemgetter
from math import *
from Numeric import * # imports numerical python
import csv, sys, os
# get List of theoretical X-links (vxl type of file)
if len(sys.argv) !=3:
sys.exit(" Must provide the input text file")
# Convert the arguments into strings and number
InFname = str(sys.argv[1])
RefD = float(sys.argv[2])
#filenameA = str(sys.argv[1])
#this function reads the input txt file
def ReadList_Xlinks(InFname):
#list =[]
InFile = open(InFname, 'r')
InFLines = InFile.read().splitlines() # string of lines from pdb file
InFile.close()
return InFLines
# Two lists are generated: one with the first crosslink and and another with the second linker
# For inter-protein crosslinks, the links from two different chains (subunits) of the same proteins
# or two different proteins are used
def getList_Xlinks0(lines):
outList0=[]
#outList2=[]
for i in lines[:]:
l = re.findall ("(\S+)", i)
L0 = int(l[4])
#L1 = int(l[8])
outList0.append(L0)
#outList2.append(L2)
return outList0#, outList2
def getList_Xlinks1(lines):
#outList1=[]
outList1=[]
for i in lines[:]:
l = re.findall ("(\S+)", i)
#L0 = int(l[4])
L1 = int(l[8])
#outList1.append(L1)
outList1.append(L1)
return outList1
ls0= getList_Xlinks0(ReadList_Xlinks(InFname))
ls1= getList_Xlinks1(ReadList_Xlinks(InFname))
print ls0, ls1
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# The user is prompted to enter the first and the last number of structures
NAstrs=raw_input("enter the first number of structures: ")
NBstrs=raw_input("enter the last number of structures: ")
NA=int(NAstrs)
NB=int(NBstrs)
list0 = []
list1 = []
list2 = []
list3 = []
list4 = []
list5 = []
list6 = []
listNames=[]
##Read input file *.mfj type od structures
for i in range(NA, NB):
#InputFileName="module1_20k_15Dec."+str(i)+".mfj"
InputFileName="models."+str(i)+".mfj"
def ReadFile(InputFile):
InputFile = open(InputFileName,'r') # Open file to read
InputFileLines = InputFile.read().splitlines() # string of lines from pdb file
InputFile.close()
return InputFileLines
# Store only the coordinates from the files
def coordinates(lines):
outX=[]
outY=[]
outZ=[]
outR=[]
for i in lines[6:]:
l = re.findall ("(\S+)", i)
X0 = float(l[0])
Y0 = float(l[1])
Z0 = float(l[2])
R0 = float(l[3])
outX.append(X0)
outY.append(Y0)
outZ.append(Z0)
outR.append(R0)
#print Scr2
return outX, outY, outZ, outR
b= coordinates(ReadFile(InputFileName))
# get the euclidean distances in the stored coordinates that are specified in the input *txt file
listDlink = []
def getDistances(lines):
for i,j in zip(ls0, ls1):
Dlink = sqrt(pow(b[0][512+i-6]-b[0][j-1], 2) + pow(b[1][512+i-6]-b[1][j-1], 2) + pow(b[2][512+i-6]-b[2][j-1], 2))
listDlink.append(Dlink)
return listDlink
cA = getDistances(ReadFile(InputFileName))
# Count the violation of the cross-link restraints for a given cut-off distance
def getScoreXlinks(lines, cLink):
if cLink<RefD:
score1=0.0
else:
score1=1.0
Score_Xlinks= score1
return Score_Xlinks
# append the file names into a lit
listNames.append(InputFileName)
listS = []
#calculate the sum of violations
def getScores(lines):
for i in range(len(ls0)):
S = getScoreXlinks(ReadFile(InputFileName), cA[i])
listS.append(S)
return listS
# print overall violation score
Sc = getScores(ReadFile(InputFileName))
print Sc
TotalScore = sum(Sc)
print TotalScore
#%%%%%%%%%%%%%%%%%%%%%%%
eSF=str(TotalScore)
list3.append(eSF)
# write scores into a list (*.csv type of file)
SummaryFileName = 'outList-Xlinks_Scores.csv'
SummaryFile = open(SummaryFileName, 'w')
SummaryFile.write(str(listNames) +'\n')
SummaryFile.write(str(list3) +'\n')
SummaryFile.flush()
SummaryFile.close()