""" Find the position of the specified symbol or sub-sequence """
ifgappy==Falseorself.gappy==False:
return''.join(self.sequence).find(findme)
else:# if the sequence is gappy AND the function is called with gappy = True THEN run the find on the de-gapped sequence
degapped,idxs=self.getDegapped()
idx=''.join(degapped).find(findme)
returnidxs[idx]ifidx>=0else-1
"""
Below are some useful methods for loading data from strings and files.
...
...
@@ -590,6 +612,51 @@ class Alignment():
s.set(a,b,int(round(sab)))
returns
defoutliers(self,cap=None):
"""
Score the extent to which each sequence in the alignment is an outlier
:param cap: the number of sequences that need to share a the state of a position for it to be optimally aligned
:return: a tuple of two lists, each with a score for each sequences, in order of the alignment;
the first list contains an entropy-based score accumulated over the whole sequence;
the second list has a gap-continuity score (the greatest entropy-based score collated for a single, continuous gap, most probably a "deletion");
the third list has a character-continuity score (the greatest entropy-based score collated for a single, continuous character string, most probably an "insertion");
for all three scores, higher means outlier, zero means it is optimally aligned
"""
nseqs=len(self.seqs)
ifnotcap:
cap=nseqs
gapmat=numpy.zeros((nseqs,self.alignlen))
ngaps=numpy.zeros((self.alignlen))
entscore=[0for_inrange(nseqs)]# cumulative entropy based score