Commit 6ca6ac3d authored by Mikael Boden's avatar Mikael Boden

EDITOR=emacsclient

parent bd07c60d
...@@ -258,7 +258,8 @@ class BedFile: ...@@ -258,7 +258,8 @@ class BedFile:
self.chromqueue = ival.Stack() self.chromqueue = ival.Stack()
for c in sorted(self.chroms.keys())[::-1]: for c in sorted(self.chroms.keys())[::-1]:
self.chromqueue.push(self.generate(c)) self.chromqueue.push(self.generate(c))
self.current = self.chromqueue.pop() if not self.chromqueue.isEmpty():
self.current = self.chromqueue.pop()
return self return self
def __next__(self): def __next__(self):
......
...@@ -55,7 +55,23 @@ class GibbsMotif(): ...@@ -55,7 +55,23 @@ class GibbsMotif():
""" background that will be used as pseudo-counts """ """ background that will be used as pseudo-counts """
pseudocount = pseudocount or prob.Distrib(self.alphabet, 1.0) pseudocount = pseudocount or prob.Distrib(self.alphabet, 1.0)
""" q: the foreground distribution (specifying the W distributions in aligned columns) """ q: the foreground distribution (specifying the W distributions in aligned columns)
p: the background distribution (for non-aligned positions in all sequences) """
columns 0 1 2 3 4 ... W
Rows
A .1
C .5
G .2
T .2
p: the background distribution (for non-aligned positions in all sequences)
columns 0 1 2 3 4 ... W
Rows
A .25
C .25
G .25
T .25
"""
q = [ prob.Distrib(self.alphabet, pseudocount) for _ in range(W) ] q = [ prob.Distrib(self.alphabet, pseudocount) for _ in range(W) ]
p = prob.Distrib(self.alphabet, pseudocount) p = prob.Distrib(self.alphabet, pseudocount)
a = self.alignment a = self.alignment
......
...@@ -603,9 +603,9 @@ if __name__ == '__main__1': ...@@ -603,9 +603,9 @@ if __name__ == '__main__1':
print(tree) print(tree)
if __name__ == '__main__': if __name__ == '__main__':
aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.aln', sequence.Protein_Alphabet) aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/2020/dnaexamq.aln', sequence.Protein_Alphabet)
tree = runUPGMA(sequence.Alignment(aln), "fractional") tree = runUPGMA(sequence.Alignment(aln), "fractional")
writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.nwk', tree) writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/2021/examq_pdist.nwk', tree)
if __name__ == '__main__3': if __name__ == '__main__3':
aln = sequence.readClustalFile('/Users/mikael/simhome/ASR/dp16_example.aln', sequence.Protein_Alphabet) aln = sequence.readClustalFile('/Users/mikael/simhome/ASR/dp16_example.aln', sequence.Protein_Alphabet)
......
...@@ -501,6 +501,10 @@ class Joint(object): ...@@ -501,6 +501,10 @@ class Joint(object):
return 0.0 return 0.0
return float(score) / float(self.totalCnt) return float(score) / float(self.totalCnt)
# def __setitem__(self, key, value):
# key = _getMeTuple(self.alphas, key)
# self.store[key] = value
def __str__(self): def __str__(self):
""" Return a textual representation of the JP. """ """ Return a textual representation of the JP. """
str = '< ' str = '< '
......
...@@ -191,6 +191,23 @@ class Sequence(object): ...@@ -191,6 +191,23 @@ class Sequence(object):
idx = ''.join(degapped).find(findme) idx = ''.join(degapped).find(findme)
return idxs[idx] if idx >= 0 else -1 return idxs[idx] if idx >= 0 else -1
def getKmers(self, k):
""" Retrieve k-mers of sequence with counts in canonical (alphabet-based) order """
if self.gappy == False:
myseq = self.sequence
else: # if the sequence is gappy AND the function is called with gappy = True THEN run the find on the de-gapped sequence
myseq, idxs = self.getDegapped()
counts = [0 for _ in range(len(self.alphabet) ** k)]
for i in range(len(myseq) - k):
sub = myseq[i:i + k]
idx = 0
multiplier = 1
for s in sub:
idx += self.alphabet.index(s) * multiplier
multiplier *= len(self.alphabet)
counts[idx] += 1
return counts
""" """
Below are some useful methods for loading data from strings and files. Below are some useful methods for loading data from strings and files.
Recognize the FASTA format (nothing fancy). Recognize the FASTA format (nothing fancy).
......
...@@ -175,9 +175,13 @@ def getGODef(goterm): ...@@ -175,9 +175,13 @@ def getGODef(goterm):
Retrieve information about a GO term Retrieve information about a GO term
goterm: the identifier, e.g. 'GO:0002080' goterm: the identifier, e.g. 'GO:0002080'
""" """
# first turn off server certificate verification # to turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): #if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context # ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
# Construct URL with query term # Construct URL with query term
url = __ebiGOUrl__ + 'ontology/go/search?query=' + goterm url = __ebiGOUrl__ + 'ontology/go/search?query=' + goterm
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
...@@ -224,9 +228,13 @@ def getGOTerms(genes): ...@@ -224,9 +228,13 @@ def getGOTerms(genes):
uri_string += gene + "," if i < len(genebatch) - 1 else gene uri_string += gene + "," if i < len(genebatch) - 1 else gene
# Construct URL # Construct URL
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
# installing Python doesn't always install security certificates, and the below code is a workaround...
# first turn off server certificate verification # first turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): # if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context # ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
page = 1 page = 1
try: try:
while (True): while (True):
...@@ -280,13 +288,17 @@ def getGenes(goterms, taxo=None): ...@@ -280,13 +288,17 @@ def getGenes(goterms, taxo=None):
break break
termcnt += 1 termcnt += 1
uri_string = 'annotation/search?limit=' + str( uri_string = 'annotation/search?limit=' + str(
limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId=' limitpage) + '&taxonId=' + str(taxo) + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)): for i in range(len(termbatch)):
term = termbatch[i] term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term uri_string += term + "," if i < len(termbatch) - 1 else term
# installing Python doesn't always install security certificates, and the below code is a workaround...
# first turn off server certificate verification # first turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): # if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context # ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
page = 1 page = 1
try: try:
while (True): while (True):
...@@ -534,7 +546,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None): ...@@ -534,7 +546,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
request = urllib.request.Request(url, data) request = urllib.request.Request(url, data)
opener = urllib.request.build_opener() opener = urllib.request.build_opener()
response = opener.open(request) response = opener.open(request)
page = response.read(20000000).decode('utf-8') page = response.read(2000000000).decode('utf-8')
up_dict = {} up_dict = {}
# For each record we retrieve, split the line by tabs and build up the UniProt dict # For each record we retrieve, split the line by tabs and build up the UniProt dict
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment