Commit 881688af authored by Mikael Boden's avatar Mikael Boden

webservice_in_Python_3

parent de69764a
...@@ -118,6 +118,7 @@ class GO(): ...@@ -118,6 +118,7 @@ class GO():
if line.startswith('!'): if line.startswith('!'):
continue continue
(gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns) (gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns)
print(gene, symb, qual, term, evid, onto, taxa)
try: try:
(taxa_q, terms_map) = self.annots[gene] (taxa_q, terms_map) = self.annots[gene]
terms_map[term] = (evid, qual != 'NOT') terms_map[term] = (evid, qual != 'NOT')
......
...@@ -48,7 +48,7 @@ class PhyloTree: ...@@ -48,7 +48,7 @@ class PhyloTree:
If node does not exist, None is returned. If node does not exist, None is returned.
If node has no descendants, an empty list will be returned.""" If node has no descendants, an empty list will be returned."""
if not isinstance(node, PhyloNode): if not isinstance(node, PhyloNode):
node = self.root.findLabel(node) node = self.findLabel(node)
if node: if node:
return node.getDescendants(transitive) return node.getDescendants(transitive)
return None return None
...@@ -60,22 +60,24 @@ class PhyloTree: ...@@ -60,22 +60,24 @@ class PhyloTree:
If node does not exist, None is returned. If node does not exist, None is returned.
If node is the root of the tree, None is returned.""" If node is the root of the tree, None is returned."""
if not isinstance(node, PhyloNode): if not isinstance(node, PhyloNode):
node = self.root.findLabel(node) node = self.findLabel(node)
if node: if node:
myroot = self.root myroot = self.root
found = False found = False
branching = [] branching = []
while not found and myroot != None: while not found and myroot != None:
branching.append(myroot) branching.append(myroot)
# check if "myroot" is a leaf node, i.e. does not have children
if myroot.left == node or myroot.right == node: if myroot.left == node or myroot.right == node:
found = True found = True
break break
if myroot.left: if myroot.left != None: # myroot has a "left" child
if myroot.left.isAncestorOf(node, transitive = True): # check if the "left" child of "myroot" is the ancestor of "node"
myroot = myroot.left if myroot.left.isAncestorOf(node, transitive = True): # if yes,
else: # must be right branch then... myroot = myroot.left # move to the "left" child
myroot = myroot.right else: # if not,
else: # must be right branch then... myroot = myroot.right # move to the "right" child
else: # myroot does NOT have a "left" child, so let's move "right"
myroot = myroot.right myroot = myroot.right
if found and transitive: if found and transitive:
return branching return branching
...@@ -91,6 +93,8 @@ class PhyloTree: ...@@ -91,6 +93,8 @@ class PhyloTree:
self.root._backwardParsimony(self.aln) # use scores to determine sequences self.root._backwardParsimony(self.aln) # use scores to determine sequences
return self.root.getSequence() # return the sequence found at the root return self.root.getSequence() # return the sequence found at the root
def canonise(self):
self.root._canonise()
class PhyloNode: class PhyloNode:
""" A class for a node in a rooted, binary (bifurcating) tree. """ A class for a node in a rooted, binary (bifurcating) tree.
...@@ -212,6 +216,18 @@ class PhyloNode: ...@@ -212,6 +216,18 @@ class PhyloNode:
self.sequence = seq self.sequence = seq
break break
def _canonise(self):
if self.left == None and self.right == None: # at leaf
return self.label
myleft = self.left._canonise()
myright = self.right._canonise();
if myleft > myright:
tmpnode = self.left
self.left = self.right
self.right = tmpnode
return myright
return myleft
def _forwardParsimony(self, aln): def _forwardParsimony(self, aln):
""" Internal function that operates recursively to first initialise each node (forward), """ Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node, stopping only once a sequence has been assigned to the node,
...@@ -459,3 +475,6 @@ def readNewick(filename): ...@@ -459,3 +475,6 @@ def readNewick(filename):
string = ''.join(f) string = ''.join(f)
return parseNewick(string) return parseNewick(string)
def writeNewickFile(filename, my_tree):
with open(filename, 'w') as fh:
print(my_tree, end="", file=fh)
...@@ -33,17 +33,17 @@ from prob import * ...@@ -33,17 +33,17 @@ from prob import *
# Sequence ------------------**** # Sequence ------------------****
class Sequence(object): class Sequence(object):
""" A biological sequence. Stores the sequence itself (as a compact array), """ A biological sequence. Stores the sequence itself (as a compact array),
the alphabet (i.e., type of sequence it is), and optionally a name and further the alphabet (i.e., type of sequence it is), and optionally a name and further
information. """ information. """
sequence = None # The array of symbols that make up the sequence sequence = None # The array of symbols that make up the sequence
alphabet = None # The alphabet from which symbols come alphabet = None # The alphabet from which symbols come
name = None # The name (identifier) of a sequence name = None # The name (identifier) of a sequence
info = None # Other information (free text; e.g. annotations) info = None # Other information (free text; e.g. annotations)
length = None # The number of symbols that the sequence is composed of length = None # The number of symbols that the sequence is composed of
gappy = None # True if the sequence has "gaps", i.e. positions that represent deletions relative another sequence gappy = None # True if the sequence has "gaps", i.e. positions that represent deletions relative another sequence
def __init__(self, sequence, alphabet = None, name = '', info = '', gappy = False): def __init__(self, sequence, alphabet = None, name = '', info = '', gappy = False):
""" Create a sequence with the sequence data. Specifying the alphabet, """ Create a sequence with the sequence data. Specifying the alphabet,
name and other information about the sequence are all optional. name and other information about the sequence are all optional.
...@@ -56,14 +56,11 @@ class Sequence(object): ...@@ -56,14 +56,11 @@ class Sequence(object):
will output the standard protein alphabet: will output the standard protein alphabet:
['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q',
'R', 'S', 'T', 'V', 'W', 'Y'] """ 'R', 'S', 'T', 'V', 'W', 'Y'] """
#try: # convert sequence data into a compact array representation
# self.sequence = sequence.encode("utf-8") #array.array('b', ''.join([s.upper() for s in sequence]))
#except TypeError:
# raise RuntimeError('S"""""""""""""""""""""""""""""""equence data is not specified correctly: must be iterable')
self.sequence = sequence self.sequence = sequence
# Assign an alphabet # Assign an alphabet
# If no alphabet is provided, attempts to identify the alphabet from sequence
self.alphabet = None self.alphabet = None
if not alphabet is None: if not alphabet is None:
for sym in self.sequence: for sym in self.sequence:
...@@ -75,7 +72,7 @@ class Sequence(object): ...@@ -75,7 +72,7 @@ class Sequence(object):
alpha = predefAlphabets[alphaName] alpha = predefAlphabets[alphaName]
valid = True valid = True
for sym in self.sequence: for sym in self.sequence:
if not sym in alpha and (sym != '-' or not gappy): if not sym in alpha and (sym != '-' or not gappy):
valid = False valid = False
break break
if valid: if valid:
...@@ -83,17 +80,17 @@ class Sequence(object): ...@@ -83,17 +80,17 @@ class Sequence(object):
break break
if self.alphabet is None: if self.alphabet is None:
raise RuntimeError('Could not identify alphabet from sequence: %s' % name) raise RuntimeError('Could not identify alphabet from sequence: %s' % name)
# Store other information # Store other information
self.name = name self.name = name
self.info = info self.info = info
self.length = len(self.sequence) self.length = len(self.sequence)
self.gappy = gappy self.gappy = gappy
def __len__(self): def __len__(self):
""" Defines what the "len" operator returns for an instance of Sequence, e.g. """ Defines what the "len" operator returns for an instance of Sequence, e.g.
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet) >>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> print(len(seq)) >>> print (len(seq))
9 9
""" """
return len(self.sequence) return len(self.sequence)
...@@ -104,42 +101,42 @@ class Sequence(object): ...@@ -104,42 +101,42 @@ class Sequence(object):
for sym in self: for sym in self:
str += sym str += sym
return str return str
def __iter__(self): def __iter__(self):
""" Defines how a Sequence should be "iterated", i.e. what its elements are, e.g. """ Defines how a Sequence should be "iterated", i.e. what its elements are, e.g.
>>> seq = Sequence('AGGAT', DNA_Alphabet) >>> seq = Sequence('AGGAT', DNA_Alphabet)
>>> for sym in seq: >>> for sym in seq:
print(sym) print (sym)
will print A, G, G, A, T (each on a separate row) will print A, G, G, A, T (each on a separate row)
""" """
tsyms = tuple(self.sequence) tsyms = tuple(self.sequence)
return tsyms.__iter__() return tsyms.__iter__()
def __contains__(self, item): def __contains__(self, item):
""" Defines what is returned when the "in" operator is used on a Sequence, e.g. """ Defines what is returned when the "in" operator is used on a Sequence, e.g.
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet) >>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> print('T' in seq) >>> print ('T' in seq)
True True
which is equivalent to which is equivalent to
>>> print(seq.__contains__('T')) >>> print (seq.__contains__('T'))
True True
>>> print('X' in seq) >>> print ('X' in seq)
False False
""" """
for sym in self.sequence: for sym in self.sequence:
if sym == item: if sym == item:
return True return True
return False return False
def __getitem__(self, ndx): def __getitem__(self, ndx):
""" Retrieve a specified index (or a "slice" of indices) of the sequence data. """ Retrieve a specified index (or a "slice" of indices) of the sequence data.
Calling self.__getitem__(3) is equivalent to self[3] Calling self.__getitem__(3) is equivalent to self[3]
""" """
if type(ndx) is slice: if type(ndx) is slice:
return ''.join(self.sequence[ndx]) return ''.join(self.sequence[ndx])
else: else:
return self.sequence[ndx] return self.sequence[ndx]
def writeFasta(self): def writeFasta(self):
""" Write one sequence in FASTA format to a string and return it. """ """ Write one sequence in FASTA format to a string and return it. """
fasta = '>' + self.name + ' ' + self.info + '\n' fasta = '>' + self.name + ' ' + self.info + '\n'
...@@ -149,7 +146,7 @@ class Sequence(object): ...@@ -149,7 +146,7 @@ class Sequence(object):
lineofseq = ''.join(data[i*60 : (i+1)*60]) + '\n' lineofseq = ''.join(data[i*60 : (i+1)*60]) + '\n'
fasta += lineofseq fasta += lineofseq
return fasta return fasta
def count(self, findme = None): def count(self, findme = None):
""" Get the number of occurrences of specified symbol findme OR """ Get the number of occurrences of specified symbol findme OR
if findme = None, return a dictionary of counts of all symbols in alphabet """ if findme = None, return a dictionary of counts of all symbols in alphabet """
...@@ -318,13 +315,12 @@ def getCount(seqs, findme = None): ...@@ -318,13 +315,12 @@ def getCount(seqs, findme = None):
class Alignment(): class Alignment():
""" A sequence alignment class. Stores two or more sequences of equal length where """ A sequence alignment class. Stores two or more sequences of equal length where
one symbol is gap '-' one symbol is gap '-'
Example usage: Example usage:
>>> seqs = [Sequence('THIS-LI-NE', Protein_Alphabet, gappy = True), Sequence('--ISALIGNED', Protein_Alphabet, gappy = True)] >>> seqs = [Sequence('THIS-LI-NE-', Protein_Alphabet, gappy = True), Sequence('--ISALIGNED', Protein_Alphabet, gappy = True)]
>>> print(Alignment(seqs)) >>> print (Alignment(seqs))
THIS-LI-NE- THIS-LI-NE-
--ISALIGNED --ISALIGNED """
"""
alignlen = None alignlen = None
seqs = None seqs = None
...@@ -381,7 +377,7 @@ class Alignment(): ...@@ -381,7 +377,7 @@ class Alignment():
maxNameLength = self.getnamelen() + 1 maxNameLength = self.getnamelen() + 1
string = '' string = ''
wholeRows = self.alignlen / symbolsPerLine wholeRows = self.alignlen / symbolsPerLine
for i in range(wholeRows): for i in range(int(wholeRows)):
for j in range(len(self.seqs)): for j in range(len(self.seqs)):
string += self.seqs[j].name.ljust(maxNameLength) + ' ' string += self.seqs[j].name.ljust(maxNameLength) + ' '
string += self.seqs[j][i*symbolsPerLine:(i+1)*symbolsPerLine] + '\n' string += self.seqs[j][i*symbolsPerLine:(i+1)*symbolsPerLine] + '\n'
...@@ -646,49 +642,65 @@ class Alignment(): ...@@ -646,49 +642,65 @@ class Alignment():
distmat[i, j] = distmat[j, i] = dist distmat[i, j] = distmat[j, i] = dist
return distmat return distmat
def writeHTML(self, filename=None): def writeHTML(self, filename = None):
""" Generate HTML that displays the alignment in color. """ Generate HTML that displays the alignment in color.
Requires that the alphabet is annotated with the label 'html-color' (see Sequence.annotateSym) Requires that the alphabet is annotated with the label 'html-color' (see Sequence.annotateSym)
and that each symbol maps to a text string naming the color, e.g. 'blue' and that each symbol maps to a text string naming the color, e.g. 'blue'
""" """
if filename == None: html = '''<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">\n<title>Sequence Alignment</title>\n</head><body><pre>\n'''
htmlstr = '<pre>\n'
else:
htmlstr = '<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">\n<title>Sequence Alignment</title>\n</head><body><pre>\n'
maxNameLength = self.getnamelen() maxNameLength = self.getnamelen()
html = ''.ljust(maxNameLength) + ' ' html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1): for i in range(self.alignlen - 1):
if (i+1) % 10 == 0: if (i+1) % 10 == 0:
html += str(i/10+1)[-1] html += str(i/10+1)[0]
else: else:
html += ' ' html += ' '
html += '%s\n' % (self.alignlen) html += '%s\n' % (self.alignlen)
htmlstr += html
if self.alignlen > 10: if self.alignlen > 10:
html = ''.ljust(maxNameLength) + ' ' html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1): for i in range(self.alignlen - 1):
if (i+1) % 10 == 0: if (i+1) % 10 == 0:
html += '0' index = len(str(i/10 + 1).split('.')[0])
html += str(i / 10 + 1).split('.')[0][(index * -1) + 1 ] if (len(str(i / 10 + 1).split('.')[0]) > 1) else '0'
else:
html += ' '
html += '\n'
if self.alignlen > 100:
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0 and i >= 99:
index = len(str(i/10 + 1).split('.')[0])
html += str(i / 10 + 1).split('.')[0][-1] if (len(str(i / 10 + 1).split('.')[0]) >2) else '0'
else:
html += ' '
html += '\n'
if self.alignlen > 1000:
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0:
html += '0' if (len(str(i / 10 + 1).split('.')[0]) > 2) else ' '
else: else:
html += ' ' html += ' '
html += '\n' html += '\n'
htmlstr += html
for seq in self.seqs: for seq in self.seqs:
html = seq.name.ljust(maxNameLength) + ' ' html += seq.name.ljust(maxNameLength) + ' '
for sym in seq: for sym in seq:
color = self.alphabet.getAnnotation('html-color', sym) color = self.alphabet.getAnnotation('html-color', sym)
if not color: if not color:
color = 'white' color = 'white'
html += '<font style="BACKGROUND-COLOR: %s">%s</font>' % (color, sym) html += '<font style="BACKGROUND-COLOR: %s">%s</font>' % (color, sym)
html += '\n' html += '\n'
htmlstr += html html += '</pre></body></html>'
htmlstr += '<pre>'
if filename: if filename:
with open(filename, 'w+') as fh: fh = open(filename, 'w')
fh.write(htmlstr) fh.write(html)
fh.write('</body></html>\n') fh.close()
else: return html
return htmlstr
def saveConsensus(aln, theta1 = 0.99, theta2 = 0.01, countgaps = False, consensus = True, filename = None): def saveConsensus(aln, theta1 = 0.99, theta2 = 0.01, countgaps = False, consensus = True, filename = None):
""" Display a table with rows for each alignment column, showing """ Display a table with rows for each alignment column, showing
...@@ -1239,7 +1251,7 @@ def getSequence(id, database = 'uniprotkb', start=None, end=None): ...@@ -1239,7 +1251,7 @@ def getSequence(id, database = 'uniprotkb', start=None, end=None):
for i in range(MAX_TRY): for i in range(MAX_TRY):
try: try:
fastaData = fetch(id, database).decode("utf-8") fastaData = fetch(id, database)
seq = readFasta(fastaData)[0] seq = readFasta(fastaData)[0]
break break
except: except:
......
...@@ -121,22 +121,27 @@ this module is imported """ ...@@ -121,22 +121,27 @@ this module is imported """
Bool_Alphabet = Alphabet('TF') Bool_Alphabet = Alphabet('TF')
DNA_Alphabet = Alphabet('ACGT') DNA_Alphabet = Alphabet('ACGT')
DNA_Alphabet_wN = Alphabet('ACGTN') DNA_Alphabet_wN = Alphabet('ACGTN')
RNA_Alphabet_wN = Alphabet('ACGUN')
RNA_Alphabet = Alphabet('ACGU') RNA_Alphabet = Alphabet('ACGU')
Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY') Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX') Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX')
Protein_Alphabet_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*') Protein_Alphabet_wSTOP = Protein_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
DSSP_Alphabet = Alphabet('GHITEBSC') DSSP_Alphabet = Alphabet('GHITEBSC')
DSSP3_Alphabet = Alphabet('HEC') DSSP3_Alphabet = Alphabet('HEC')
predefAlphabets = {'DNA': DNA_Alphabet, predefAlphabets = {'Bool_Alphabet': Bool_Alphabet,
'DNA': DNA_Alphabet,
'RNA': RNA_Alphabet, 'RNA': RNA_Alphabet,
'DNAwN': Alphabet('ACGTN'), 'DNAwN': RNA_Alphabet_wN,
'RNAwN': Alphabet('ACGUN'), 'RNAwN': DNA_Alphabet_wN,
'Protein': Protein_Alphabet, 'Protein': Protein_Alphabet,
'ProteinwX': Protein_wX} 'ProteinwX': Protein_wX,
'ProteinwSTOP' : Protein_wSTOP,
'DSSP_Alphabet' : DSSP_Alphabet,
'DSSP3_Alphabet' : DSSP3_Alphabet}
# The preferred order in which a predefined alphabet is assigned to a sequence # The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid) # (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder = ['DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX'] preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
# Useful annotations # Useful annotations
DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'}) DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'})
RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'}) RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'})
......
...@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'): ...@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId
# Get the entry # Get the entry
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
if data.startswith(b'ERROR'): print (type(data))
if data.startswith("ERROR"):
raise RuntimeError(data) raise RuntimeError(data)
return data return data
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
def search(query, dbName='uniprot', format='list', limit=100): def search(query, dbName='uniprot', format='list', limit=100):
...@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query
# Get the entries # Get the entries
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
if format == 'list': if format == 'list':
return data.splitlines() return data.splitlines()
else: else:
return data return data
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
elif dbName.startswith('refseq'): elif dbName.startswith('refseq'):
dbs = dbName.split(":") dbs = dbName.split(":")
...@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit) url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit)
# Get the entries # Get the entries
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
words = data.split("</Id>") words = data.split("</Id>")
words = [w[w.find("<Id>")+4:] for w in words[:-1]] words = [w[w.find("<Id>")+4:] for w in words[:-1]]
if format == 'list': if format == 'list':
...@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id=" url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id="
for w in words: for w in words:
url += w + "," url += w + ","
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
return data return data
else: else:
return '' return ''
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
return return
...@@ -199,7 +201,7 @@ def getGODef(goterm): ...@@ -199,7 +201,7 @@ def getGODef(goterm):
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
try: try:
entry={'id': None, 'name': None, 'def': None} entry={'id': None, 'name': None, 'def': None}
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines(): for row in data.splitlines():
index = row.find(':') index = row.find(':')
if index > 0 and len(row[index:]) > 1: if index > 0 and len(row[index:]) > 1:
...@@ -209,7 +211,7 @@ def getGODef(goterm): ...@@ -209,7 +211,7 @@ def getGODef(goterm):
if entry[field] == None: # check if not yet assigned if entry[field] == None: # check if not yet assigned
entry[field] = value entry[field] = value
return entry return entry
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
def getGOTerms(genes, database='UniProtKB', completeAnnot = False): def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
...@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False): ...@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
if response.info().get('Content-Encoding') == 'gzip': if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read()) buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf) f = gzip.GzipFile(fileobj=buf)
data = f.read() data = f.read().decode("utf-8")
else: else:
data = response.read() data = response.read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t') values = row.split('\t')
if len(values) >= 7: if len(values) >= 7:
...@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False): ...@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
else: else:
termsmap[key] = set([values[6]]) termsmap[key] = set([values[6]])
taxonmap[key] = int(values[4]) taxonmap[key] = int(values[4])
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
if completeAnnot: if completeAnnot:
if len(genes) == 1: if len(genes) == 1:
...@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None): ...@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
url = __ebiGOUrl__ + uri_string + goterm.strip() url = __ebiGOUrl__ + uri_string + goterm.strip()
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t') values = row.split('\t')
if len(values) >= 7: if len(values) >= 7:
genes.add(values[1]) genes.add(values[1])
map[goterm] = list(genes) map[goterm] = list(genes)
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
if len(goterms) == 1: if len(goterms) == 1:
return map[goterms[0]] return map[goterms[0]]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment