Commit 881688af authored by Mikael Boden's avatar Mikael Boden

webservice_in_Python_3

parent de69764a
......@@ -118,6 +118,7 @@ class GO():
if line.startswith('!'):
continue
(gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns)
print(gene, symb, qual, term, evid, onto, taxa)
try:
(taxa_q, terms_map) = self.annots[gene]
terms_map[term] = (evid, qual != 'NOT')
......
......@@ -48,7 +48,7 @@ class PhyloTree:
If node does not exist, None is returned.
If node has no descendants, an empty list will be returned."""
if not isinstance(node, PhyloNode):
node = self.root.findLabel(node)
node = self.findLabel(node)
if node:
return node.getDescendants(transitive)
return None
......@@ -60,22 +60,24 @@ class PhyloTree:
If node does not exist, None is returned.
If node is the root of the tree, None is returned."""
if not isinstance(node, PhyloNode):
node = self.root.findLabel(node)
node = self.findLabel(node)
if node:
myroot = self.root
found = False
branching = []
while not found and myroot != None:
branching.append(myroot)
# check if "myroot" is a leaf node, i.e. does not have children
if myroot.left == node or myroot.right == node:
found = True
break
if myroot.left:
if myroot.left.isAncestorOf(node, transitive = True):
myroot = myroot.left
else: # must be right branch then...
myroot = myroot.right
else: # must be right branch then...
if myroot.left != None: # myroot has a "left" child
# check if the "left" child of "myroot" is the ancestor of "node"
if myroot.left.isAncestorOf(node, transitive = True): # if yes,
myroot = myroot.left # move to the "left" child
else: # if not,
myroot = myroot.right # move to the "right" child
else: # myroot does NOT have a "left" child, so let's move "right"
myroot = myroot.right
if found and transitive:
return branching
......@@ -91,6 +93,8 @@ class PhyloTree:
self.root._backwardParsimony(self.aln) # use scores to determine sequences
return self.root.getSequence() # return the sequence found at the root
def canonise(self):
self.root._canonise()
class PhyloNode:
""" A class for a node in a rooted, binary (bifurcating) tree.
......@@ -212,6 +216,18 @@ class PhyloNode:
self.sequence = seq
break
def _canonise(self):
if self.left == None and self.right == None: # at leaf
return self.label
myleft = self.left._canonise()
myright = self.right._canonise();
if myleft > myright:
tmpnode = self.left
self.left = self.right
self.right = tmpnode
return myright
return myleft
def _forwardParsimony(self, aln):
""" Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node,
......@@ -459,3 +475,6 @@ def readNewick(filename):
string = ''.join(f)
return parseNewick(string)
def writeNewickFile(filename, my_tree):
with open(filename, 'w') as fh:
print(my_tree, end="", file=fh)
This diff is collapsed.
......@@ -121,22 +121,27 @@ this module is imported """
Bool_Alphabet = Alphabet('TF')
DNA_Alphabet = Alphabet('ACGT')
DNA_Alphabet_wN = Alphabet('ACGTN')
RNA_Alphabet_wN = Alphabet('ACGUN')
RNA_Alphabet = Alphabet('ACGU')
Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX')
Protein_Alphabet_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
Protein_Alphabet_wSTOP = Protein_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
DSSP_Alphabet = Alphabet('GHITEBSC')
DSSP3_Alphabet = Alphabet('HEC')
predefAlphabets = {'DNA': DNA_Alphabet,
predefAlphabets = {'Bool_Alphabet': Bool_Alphabet,
'DNA': DNA_Alphabet,
'RNA': RNA_Alphabet,
'DNAwN': Alphabet('ACGTN'),
'RNAwN': Alphabet('ACGUN'),
'DNAwN': RNA_Alphabet_wN,
'RNAwN': DNA_Alphabet_wN,
'Protein': Protein_Alphabet,
'ProteinwX': Protein_wX}
'ProteinwX': Protein_wX,
'ProteinwSTOP' : Protein_wSTOP,
'DSSP_Alphabet' : DSSP_Alphabet,
'DSSP3_Alphabet' : DSSP3_Alphabet}
# The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder = ['DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX']
preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
# Useful annotations
DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'})
RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'})
......
......@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId
# Get the entry
try:
data = urllib.request.urlopen(url).read()
if data.startswith(b'ERROR'):
data = urllib.request.urlopen(url).read().decode("utf-8")
print (type(data))
if data.startswith("ERROR"):
raise RuntimeError(data)
return data
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
def search(query, dbName='uniprot', format='list', limit=100):
......@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query
# Get the entries
try:
data = urllib.request.urlopen(url).read()
data = urllib.request.urlopen(url).read().decode("utf-8")
if format == 'list':
return data.splitlines()
else:
return data
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
elif dbName.startswith('refseq'):
dbs = dbName.split(":")
......@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit)
# Get the entries
try:
data = urllib.request.urlopen(url).read()
data = urllib.request.urlopen(url).read().decode("utf-8")
words = data.split("</Id>")
words = [w[w.find("<Id>")+4:] for w in words[:-1]]
if format == 'list':
......@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id="
for w in words:
url += w + ","
data = urllib.request.urlopen(url).read()
data = urllib.request.urlopen(url).read().decode("utf-8")
return data
else:
return ''
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
return
......@@ -199,7 +201,7 @@ def getGODef(goterm):
# Get the entry: fill in the fields specified below
try:
entry={'id': None, 'name': None, 'def': None}
data = urllib.request.urlopen(url).read()
data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines():
index = row.find(':')
if index > 0 and len(row[index:]) > 1:
......@@ -209,7 +211,7 @@ def getGODef(goterm):
if entry[field] == None: # check if not yet assigned
entry[field] = value
return entry
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
......@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
data = f.read().decode("utf-8")
else:
data = response.read()
data = response.read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t')
if len(values) >= 7:
......@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
else:
termsmap[key] = set([values[6]])
taxonmap[key] = int(values[4])
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
if completeAnnot:
if len(genes) == 1:
......@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
url = __ebiGOUrl__ + uri_string + goterm.strip()
# Get the entry: fill in the fields specified below
try:
data = urllib.request.urlopen(url).read()
data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t')
if len(values) >= 7:
genes.add(values[1])
map[goterm] = list(genes)
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
if len(goterms) == 1:
return map[goterms[0]]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment