Commit 881688af authored by Mikael Boden's avatar Mikael Boden

webservice_in_Python_3

parent de69764a
...@@ -118,6 +118,7 @@ class GO(): ...@@ -118,6 +118,7 @@ class GO():
if line.startswith('!'): if line.startswith('!'):
continue continue
(gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns) (gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns)
print(gene, symb, qual, term, evid, onto, taxa)
try: try:
(taxa_q, terms_map) = self.annots[gene] (taxa_q, terms_map) = self.annots[gene]
terms_map[term] = (evid, qual != 'NOT') terms_map[term] = (evid, qual != 'NOT')
......
...@@ -48,7 +48,7 @@ class PhyloTree: ...@@ -48,7 +48,7 @@ class PhyloTree:
If node does not exist, None is returned. If node does not exist, None is returned.
If node has no descendants, an empty list will be returned.""" If node has no descendants, an empty list will be returned."""
if not isinstance(node, PhyloNode): if not isinstance(node, PhyloNode):
node = self.root.findLabel(node) node = self.findLabel(node)
if node: if node:
return node.getDescendants(transitive) return node.getDescendants(transitive)
return None return None
...@@ -60,22 +60,24 @@ class PhyloTree: ...@@ -60,22 +60,24 @@ class PhyloTree:
If node does not exist, None is returned. If node does not exist, None is returned.
If node is the root of the tree, None is returned.""" If node is the root of the tree, None is returned."""
if not isinstance(node, PhyloNode): if not isinstance(node, PhyloNode):
node = self.root.findLabel(node) node = self.findLabel(node)
if node: if node:
myroot = self.root myroot = self.root
found = False found = False
branching = [] branching = []
while not found and myroot != None: while not found and myroot != None:
branching.append(myroot) branching.append(myroot)
# check if "myroot" is a leaf node, i.e. does not have children
if myroot.left == node or myroot.right == node: if myroot.left == node or myroot.right == node:
found = True found = True
break break
if myroot.left: if myroot.left != None: # myroot has a "left" child
if myroot.left.isAncestorOf(node, transitive = True): # check if the "left" child of "myroot" is the ancestor of "node"
myroot = myroot.left if myroot.left.isAncestorOf(node, transitive = True): # if yes,
else: # must be right branch then... myroot = myroot.left # move to the "left" child
myroot = myroot.right else: # if not,
else: # must be right branch then... myroot = myroot.right # move to the "right" child
else: # myroot does NOT have a "left" child, so let's move "right"
myroot = myroot.right myroot = myroot.right
if found and transitive: if found and transitive:
return branching return branching
...@@ -91,6 +93,8 @@ class PhyloTree: ...@@ -91,6 +93,8 @@ class PhyloTree:
self.root._backwardParsimony(self.aln) # use scores to determine sequences self.root._backwardParsimony(self.aln) # use scores to determine sequences
return self.root.getSequence() # return the sequence found at the root return self.root.getSequence() # return the sequence found at the root
def canonise(self):
self.root._canonise()
class PhyloNode: class PhyloNode:
""" A class for a node in a rooted, binary (bifurcating) tree. """ A class for a node in a rooted, binary (bifurcating) tree.
...@@ -212,6 +216,18 @@ class PhyloNode: ...@@ -212,6 +216,18 @@ class PhyloNode:
self.sequence = seq self.sequence = seq
break break
def _canonise(self):
if self.left == None and self.right == None: # at leaf
return self.label
myleft = self.left._canonise()
myright = self.right._canonise();
if myleft > myright:
tmpnode = self.left
self.left = self.right
self.right = tmpnode
return myright
return myleft
def _forwardParsimony(self, aln): def _forwardParsimony(self, aln):
""" Internal function that operates recursively to first initialise each node (forward), """ Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node, stopping only once a sequence has been assigned to the node,
...@@ -459,3 +475,6 @@ def readNewick(filename): ...@@ -459,3 +475,6 @@ def readNewick(filename):
string = ''.join(f) string = ''.join(f)
return parseNewick(string) return parseNewick(string)
def writeNewickFile(filename, my_tree):
with open(filename, 'w') as fh:
print(my_tree, end="", file=fh)
This diff is collapsed.
...@@ -121,22 +121,27 @@ this module is imported """ ...@@ -121,22 +121,27 @@ this module is imported """
Bool_Alphabet = Alphabet('TF') Bool_Alphabet = Alphabet('TF')
DNA_Alphabet = Alphabet('ACGT') DNA_Alphabet = Alphabet('ACGT')
DNA_Alphabet_wN = Alphabet('ACGTN') DNA_Alphabet_wN = Alphabet('ACGTN')
RNA_Alphabet_wN = Alphabet('ACGUN')
RNA_Alphabet = Alphabet('ACGU') RNA_Alphabet = Alphabet('ACGU')
Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY') Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX') Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX')
Protein_Alphabet_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*') Protein_Alphabet_wSTOP = Protein_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
DSSP_Alphabet = Alphabet('GHITEBSC') DSSP_Alphabet = Alphabet('GHITEBSC')
DSSP3_Alphabet = Alphabet('HEC') DSSP3_Alphabet = Alphabet('HEC')
predefAlphabets = {'DNA': DNA_Alphabet, predefAlphabets = {'Bool_Alphabet': Bool_Alphabet,
'DNA': DNA_Alphabet,
'RNA': RNA_Alphabet, 'RNA': RNA_Alphabet,
'DNAwN': Alphabet('ACGTN'), 'DNAwN': RNA_Alphabet_wN,
'RNAwN': Alphabet('ACGUN'), 'RNAwN': DNA_Alphabet_wN,
'Protein': Protein_Alphabet, 'Protein': Protein_Alphabet,
'ProteinwX': Protein_wX} 'ProteinwX': Protein_wX,
'ProteinwSTOP' : Protein_wSTOP,
'DSSP_Alphabet' : DSSP_Alphabet,
'DSSP3_Alphabet' : DSSP3_Alphabet}
# The preferred order in which a predefined alphabet is assigned to a sequence # The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid) # (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder = ['DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX'] preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
# Useful annotations # Useful annotations
DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'}) DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'})
RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'}) RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'})
......
...@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'): ...@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId
# Get the entry # Get the entry
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
if data.startswith(b'ERROR'): print (type(data))
if data.startswith("ERROR"):
raise RuntimeError(data) raise RuntimeError(data)
return data return data
except(urllib.error.HTTPError, ex):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
def search(query, dbName='uniprot', format='list', limit=100): def search(query, dbName='uniprot', format='list', limit=100):
...@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query
# Get the entries # Get the entries
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
if format == 'list': if format == 'list':
return data.splitlines() return data.splitlines()
else: else:
return data return data
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
elif dbName.startswith('refseq'): elif dbName.startswith('refseq'):
dbs = dbName.split(":") dbs = dbName.split(":")
...@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit) url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit)
# Get the entries # Get the entries
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
words = data.split("</Id>") words = data.split("</Id>")
words = [w[w.find("<Id>")+4:] for w in words[:-1]] words = [w[w.find("<Id>")+4:] for w in words[:-1]]
if format == 'list': if format == 'list':
...@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100): ...@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id=" url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id="
for w in words: for w in words:
url += w + "," url += w + ","
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
return data return data
else: else:
return '' return ''
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
return return
...@@ -199,7 +201,7 @@ def getGODef(goterm): ...@@ -199,7 +201,7 @@ def getGODef(goterm):
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
try: try:
entry={'id': None, 'name': None, 'def': None} entry={'id': None, 'name': None, 'def': None}
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines(): for row in data.splitlines():
index = row.find(':') index = row.find(':')
if index > 0 and len(row[index:]) > 1: if index > 0 and len(row[index:]) > 1:
...@@ -209,7 +211,7 @@ def getGODef(goterm): ...@@ -209,7 +211,7 @@ def getGODef(goterm):
if entry[field] == None: # check if not yet assigned if entry[field] == None: # check if not yet assigned
entry[field] = value entry[field] = value
return entry return entry
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
def getGOTerms(genes, database='UniProtKB', completeAnnot = False): def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
...@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False): ...@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
if response.info().get('Content-Encoding') == 'gzip': if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read()) buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf) f = gzip.GzipFile(fileobj=buf)
data = f.read() data = f.read().decode("utf-8")
else: else:
data = response.read() data = response.read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t') values = row.split('\t')
if len(values) >= 7: if len(values) >= 7:
...@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False): ...@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
else: else:
termsmap[key] = set([values[6]]) termsmap[key] = set([values[6]])
taxonmap[key] = int(values[4]) taxonmap[key] = int(values[4])
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
if completeAnnot: if completeAnnot:
if len(genes) == 1: if len(genes) == 1:
...@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None): ...@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
url = __ebiGOUrl__ + uri_string + goterm.strip() url = __ebiGOUrl__ + uri_string + goterm.strip()
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read().decode("utf-8")
for row in data.splitlines()[1:]: # we ignore first (header) row for row in data.splitlines()[1:]: # we ignore first (header) row
values = row.split('\t') values = row.split('\t')
if len(values) >= 7: if len(values) >= 7:
genes.add(values[1]) genes.add(values[1])
map[goterm] = list(genes) map[goterm] = list(genes)
except(urllib.error.HTTPError, ex): except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
if len(goterms) == 1: if len(goterms) == 1:
return map[goterms[0]] return map[goterms[0]]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment