diff --git a/prob.py b/prob.py
index cf4a36057bf48c11d6787512d3edd0cdc216b80e..52b15a3a32e86ea08cf22d23cd4b9aa0eba74595 100644
--- a/prob.py
+++ b/prob.py
@@ -865,24 +865,30 @@ class HMM():
> states = cpg_hmm.viterbi(X)
> print(states)
"""
- X = _terminate(symseq, 1, self.startsym, self.endsym)
+ X = _terminate(symseq, 1, self.startsym, self.endsym) # put start and end symbols on sequence
# Initialise state scores for each index in X
for state in self.mystates:
- # Fill in emission probabilities for each index in X
+ # Fill in emission probabilities in V for each index of X
+ # (only the first position is really needed)
V[state] = [self.e[state][x] for x in X]
trace[state] = []
+ # Next loop through the sequence
for j in range(len(X) - 1):
- i = j + 1 # sequence index that we're processing
- for tostate in self.mystates:
+ i = j + 1 # sequence index that we're processing start with 1, not 0
+ for tostate in self.mystates: # check each state for i = 1, ...
tracemax = 0
- beststate = None
+ beststate = None # the state v with max[Vv(i-1) * t(v,u)]
for fromstate in self.mystates:
+ # determine the best score propagated forward from previous state
score = V[fromstate][i - 1] * self.a[fromstate][tostate]
if score > tracemax:
beststate = fromstate
tracemax = score
+ # record the transition that will appear in the traceback
trace[tostate].append(beststate)
+ # finalise the dynamic programming score for current i in state u
V[tostate][i] = self.e[tostate][X[i]] * tracemax
+ # finally, assemble the string that describes the most probable path
ret = ''
traced = '$'
for j in range(len(X)):