-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmol_back.py
132 lines (120 loc) · 4.04 KB
/
mol_back.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from collections import defaultdict, Counter
class MoleculeParser:
def __init__(self):
super().__init__()
self.left_brackets = ['(', '[', '{']
self.right_brackets = [')', ']', '}']
self.error_message = 'it seems that your molecule is not valid. Use regular atoms, indexing and avoid parentheses mismatch'
self.succes_message = 'it seems that your molecule contains '
def push(self, obj, l, depth):
while depth:
l = l[-1]
depth -= 1
l.append(obj)
def parse_lower_and_int(self, mol):
parsed = []
i = len(mol) - 1
while i >= 0:
if mol[i].islower():
parsed.append(mol[i - 1] + mol[i])
i -= 1
elif mol[i].isdigit():
digit = mol[i]
while mol[i - 1].isdigit():
digit = mol[i - 1] + digit
i -= 1
parsed.append(digit)
else:
parsed.append(mol[i])
i -= 1
parsed.reverse()
return parsed
def parse_parentheses(self, mol):
groups = []
depth = 0
try:
for char in mol:
if char in self.left_brackets:
self.push([], groups, depth)
depth += 1
elif char in self.right_brackets:
depth -= 1
else:
self.push(char, groups, depth)
except IndexError:
# raise ValueError('Parentheses mismatch')
return False
if depth > 0:
# raise ValueError('Parentheses mismatch')
return False
else:
return groups
def parse_mol(self, mol):
expanded_mol = []
if isinstance(mol, str):
expanded_mol.append(mol)
else:
i = len(mol) - 1
while i >= 0:
if isinstance(mol[i], list):
expanded_mol += self.parse_mol(mol[i])
else:
if mol[i].isdigit():
expanded_mol += self.parse_mol(
mol[i - 1]) * int(mol[i])
i -= 1
else:
expanded_mol.append(mol[i])
i -= 1
return expanded_mol
def is_valid(self, mol, mol_dic):
if mol[0].isdigit():
return False
else:
for element in mol_dic:
if len(element) == 1:
if element.isupper():
pass
else:
return False
elif len(element) == 2:
if element[0].isupper() and element[1].islower():
pass
else:
return False
else:
return False
return True
def nice_output(self, mol_dic):
output = ''
compt = 0
for atom in mol_dic:
output += str(mol_dic[atom]) + ' ' + atom
compt += 1
if compt == len(mol_dic):
pass
elif compt == len(mol_dic) - 1:
output += ', and '
else:
output += ', '
return self.succes_message + output
def parse(self, mol):
parsed_lower_and_int = self.parse_lower_and_int(mol)
parsed_parentheses = self.parse_parentheses(parsed_lower_and_int)
if parsed_parentheses == False:
return self.error_message
parsed_mol = self.parse_mol(parsed_parentheses)
return dict(Counter(parsed_mol).items())
def output(self, mol):
mol_dic = self.parse(mol)
if self.is_valid(mol, mol_dic):
return self.nice_output(mol_dic)
else:
return self.error_message
if __name__ == "__main__":
# quick tests
parser = MoleculeParser()
# mol = 'H2Ol982'
# mol = 'Mg2[(OH]2]3H2'
mol = 'K4[(ON(S(Ol)23)2]2'
print('Output', parser.parse(mol))