Commit 6ca6ac3d authored by Mikael Boden's avatar Mikael Boden

EDITOR=emacsclient

parent bd07c60d
......@@ -258,7 +258,8 @@ class BedFile:
self.chromqueue = ival.Stack()
for c in sorted(self.chroms.keys())[::-1]:
self.chromqueue.push(self.generate(c))
self.current = self.chromqueue.pop()
if not self.chromqueue.isEmpty():
self.current = self.chromqueue.pop()
return self
def __next__(self):
......
......@@ -55,7 +55,23 @@ class GibbsMotif():
""" background that will be used as pseudo-counts """
pseudocount = pseudocount or prob.Distrib(self.alphabet, 1.0)
""" q: the foreground distribution (specifying the W distributions in aligned columns)
p: the background distribution (for non-aligned positions in all sequences) """
columns 0 1 2 3 4 ... W
Rows
A .1
C .5
G .2
T .2
p: the background distribution (for non-aligned positions in all sequences)
columns 0 1 2 3 4 ... W
Rows
A .25
C .25
G .25
T .25
"""
q = [ prob.Distrib(self.alphabet, pseudocount) for _ in range(W) ]
p = prob.Distrib(self.alphabet, pseudocount)
a = self.alignment
......
......@@ -603,9 +603,9 @@ if __name__ == '__main__1':
print(tree)
if __name__ == '__main__':
aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.aln', sequence.Protein_Alphabet)
aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/2020/dnaexamq.aln', sequence.Protein_Alphabet)
tree = runUPGMA(sequence.Alignment(aln), "fractional")
writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.nwk', tree)
writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/2021/examq_pdist.nwk', tree)
if __name__ == '__main__3':
aln = sequence.readClustalFile('/Users/mikael/simhome/ASR/dp16_example.aln', sequence.Protein_Alphabet)
......
......@@ -501,6 +501,10 @@ class Joint(object):
return 0.0
return float(score) / float(self.totalCnt)
# def __setitem__(self, key, value):
# key = _getMeTuple(self.alphas, key)
# self.store[key] = value
def __str__(self):
""" Return a textual representation of the JP. """
str = '< '
......
......@@ -191,6 +191,23 @@ class Sequence(object):
idx = ''.join(degapped).find(findme)
return idxs[idx] if idx >= 0 else -1
def getKmers(self, k):
""" Retrieve k-mers of sequence with counts in canonical (alphabet-based) order """
if self.gappy == False:
myseq = self.sequence
else: # if the sequence is gappy AND the function is called with gappy = True THEN run the find on the de-gapped sequence
myseq, idxs = self.getDegapped()
counts = [0 for _ in range(len(self.alphabet) ** k)]
for i in range(len(myseq) - k):
sub = myseq[i:i + k]
idx = 0
multiplier = 1
for s in sub:
idx += self.alphabet.index(s) * multiplier
multiplier *= len(self.alphabet)
counts[idx] += 1
return counts
"""
Below are some useful methods for loading data from strings and files.
Recognize the FASTA format (nothing fancy).
......
......@@ -175,9 +175,13 @@ def getGODef(goterm):
Retrieve information about a GO term
goterm: the identifier, e.g. 'GO:0002080'
"""
# first turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
# to turn off server certificate verification
#if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
# ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
# Construct URL with query term
url = __ebiGOUrl__ + 'ontology/go/search?query=' + goterm
# Get the entry: fill in the fields specified below
......@@ -224,9 +228,13 @@ def getGOTerms(genes):
uri_string += gene + "," if i < len(genebatch) - 1 else gene
# Construct URL
# Get the entry: fill in the fields specified below
# installing Python doesn't always install security certificates, and the below code is a workaround...
# first turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
# if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
# ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
page = 1
try:
while (True):
......@@ -280,13 +288,17 @@ def getGenes(goterms, taxo=None):
break
termcnt += 1
uri_string = 'annotation/search?limit=' + str(
limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
limitpage) + '&taxonId=' + str(taxo) + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)):
term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term
# installing Python doesn't always install security certificates, and the below code is a workaround...
# first turn off server certificate verification
if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
# if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
# ssl._create_default_https_context = ssl._create_unverified_context
# The better solution is to install network security certificates from the command line (MacOS below),
# which should render the above unnecessary:
# bash /Applications/Python*/Install\ Certificates.command
page = 1
try:
while (True):
......@@ -534,7 +546,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
request = urllib.request.Request(url, data)
opener = urllib.request.build_opener()
response = opener.open(request)
page = response.read(20000000).decode('utf-8')
page = response.read(2000000000).decode('utf-8')
up_dict = {}
# For each record we retrieve, split the line by tabs and build up the UniProt dict
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment