-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHMMParamEstimator.py
80 lines (70 loc) · 4.07 KB
/
HMMParamEstimator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# !/usr/bin/env python3
# HMM Set 2
# Estimate the Parameters of an HMM
# University of California, Santa Cruz - BME 205
# Biomolecular Engineering and Bioinformatics
# Name: (zmmason)
# Group Members: NONE
import sys
import numpy as np # numpy version 1.19.3
class ParamHMM:
"""
Estimate the Parameters of an HMM
Input: A sequence of emitted symbols x = x1 . . . xn in an alphabet ∑ and a path π = π1 . . . πn generated by a
k-state HMM with unknown transition and emission probabilities.
Output: A matrix of transition probabilities Transition and a matrix of emission probabilities Emission that
maximize Pr(x,π) over all possible matrices of transition and emission probabilities.
"""
def __init__(self, emission, alphabet, pi, states):
"""Constructor: saves attributes from the input file."""
self.emission = [alphabet.index(i) for i in emission] # list of index codes for each em in emission seq
self.states = states
self.pi = [states.index(i) for i in pi]
self.alphabet = alphabet
# initializing the matrix for emission and transition
self.transitionMatrix = np.array([[float(0) for j in range(len(states))] for i in range(len(states))])
self.emissionMatrix = np.array([[float(0) for j in range(len(alphabet))] for i in range(len(states))])
def paramEst(self):
"""Compute the transition and emission matrices that maximize Pr(x,π)."""
self.emissionMatrix[self.pi[0]][self.emission[0]] = 1 # initialize 1st prob
prev = self.pi[0] # initializing variable to hold previous index item
for i in range(1, len(self.emission)):
self.emissionMatrix[self.pi[i]][self.emission[i]] += 1 # counts frequency of occurrences
self.transitionMatrix[prev][self.pi[i]] += 1 # counts frequency of occurrences
prev = self.pi[i] # reset new previous index
for s in range(len(self.states)):
if sum(self.emissionMatrix[s]) == 0: # accounts for 0 frequency
self.emissionMatrix[s] += 1
self.emissionMatrix[s] = self.emissionMatrix[s]/sum(self.emissionMatrix[s]) # get Pr(emissions)
if sum(self.transitionMatrix[s]) == 0: # accounts for 0 frequency
self.transitionMatrix[s] += 1
self.transitionMatrix[s] = self.transitionMatrix[s]/sum(self.transitionMatrix[s]) # get Pr(transitions)
roundTrans = np.round(self.transitionMatrix, 3) # rounding array to 3rd dec place for formatting
roundEmis = np.round(self.emissionMatrix, 3)
roundTrans = roundTrans.tolist() # turns matrix into list ro print
roundEmis = roundEmis.tolist()
return roundEmis, roundTrans
def dataPrint(self, roundEmis, roundTrans):
"""Using the data from transition and emission matrices , print to specified formatting."""
maxMatrix = ['\t'.join([str(x) for x in self.states])]
for i in range(len(roundEmis)): # printing emission matrix info
roundTrans[i].insert(0, self.states[i]) # adding the state to the specific probabilities
maxMatrix.append('\t'.join([str(x) for x in roundTrans[i]]))
maxMatrix.append('--------')
maxMatrix.append('\t' + '\t'.join([str(x) for x in self.alphabet]))
for i in range(len(roundTrans)): # printing transition matrix info
roundEmis[i].insert(0, self.states[i]) # adding the state to the specific probabilities
maxMatrix.append('\t'.join([str(x) for x in roundEmis[i]]))
return maxMatrix
def main():
"""Estimate the Parameters of an HMM."""
contents = [] # list to hold the contents of the dataset
for line in sys.stdin: # takes STDIN only
contents.append(line.strip())
param = ParamHMM(contents[0], contents[2].split(), contents[4], contents[6].split())
roundEmis, roundTrans = param.paramEst()
matrix = param.dataPrint(roundEmis, roundTrans)
for data in matrix:
print(data)
if __name__ == '__main__':
main()