webservice_in_Python_3

881688af · Mikael Boden · de69764a · 881688af · 881688af · 881688af
Commit 881688af authored Jul 21, 2017 by Mikael Boden
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 122 additions and 83 deletions

godata.py godata.py +1 -0

phylo.py phylo.py +27 -8

sequence.py sequence.py +66 -54

sym.py sym.py +11 -6

webservice.py webservice.py +17 -15

No files found.
--- a/godata.py
+++ b/godata.py
@@ -118,6 +118,7 @@ class GO():
            if line.startswith('!'):
                continue
            (gene, symb, qual, term, evid, onto, taxa) = _extractAnnotFields(line, annotfile_columns)
+            print(gene, symb, qual, term, evid, onto, taxa)
            try:
                (taxa_q, terms_map) = self.annots[gene]
                terms_map[term] = (evid, qual != 'NOT')

--- a/phylo.py
+++ b/phylo.py
@@ -48,7 +48,7 @@ class PhyloTree:
            If node does not exist, None is returned.
            If node has no descendants, an empty list will be returned."""
        if not isinstance(node, PhyloNode):
-            node = self.root.findLabel(node)
+            node = self.findLabel(node)
        if node:
            return node.getDescendants(transitive)
        return None
@@ -60,22 +60,24 @@ class PhyloTree:
            If node does not exist, None is returned.
            If node is the root of the tree, None is returned."""
        if not isinstance(node, PhyloNode):
-            node = self.root.findLabel(node)
+            node = self.findLabel(node)
        if node:
            myroot = self.root
            found = False
            branching = []
            while not found and myroot != None:
                branching.append(myroot)
+                # check if "myroot" is a leaf node, i.e. does not have children
                if myroot.left == node or myroot.right == node:
                    found = True
                    break
-                if myroot.left:
-                    if myroot.left.isAncestorOf(node, transitive = True):
-                        myroot = myroot.left
-                    else: # must be right branch then...
-                        myroot = myroot.right
-                else: # must be right branch then...
+                if myroot.left != None: # myroot has a "left" child
+                    # check if the "left" child of "myroot" is the ancestor of "node"
+                    if myroot.left.isAncestorOf(node, transitive = True): # if yes,
+                        myroot = myroot.left    # move to the "left" child
+                    else:                       # if not,
+                        myroot = myroot.right   # move to the "right" child
+                else: # myroot does NOT have a "left" child, so let's move "right"
                    myroot = myroot.right
            if found and transitive:
                return branching
@@ -91,6 +93,8 @@ class PhyloTree:
        self.root._backwardParsimony(self.aln) # use scores to determine sequences
        return self.root.getSequence() # return the sequence found at the root

+    def canonise(self):
+        self.root._canonise()

 class PhyloNode:
    """ A class for a node in a rooted, binary (bifurcating) tree.
@@ -212,6 +216,18 @@ class PhyloNode:
                self.sequence = seq
                break

+    def _canonise(self):
+        if self.left == None and self.right == None: # at leaf
+            return self.label
+        myleft = self.left._canonise()
+        myright = self.right._canonise();
+        if myleft > myright:
+            tmpnode = self.left
+            self.left = self.right
+            self.right = tmpnode
+            return myright
+        return myleft
+
    def _forwardParsimony(self, aln):
        """ Internal function that operates recursively to first initialise each node (forward),
            stopping only once a sequence has been assigned to the node,
@@ -459,3 +475,6 @@ def readNewick(filename):
    string = ''.join(f)
    return parseNewick(string)

+def writeNewickFile(filename, my_tree):
+    with open(filename, 'w') as fh:
+        print(my_tree, end="", file=fh)
--- a/sequence.py
+++ b/sequence.py
--- a/sym.py
+++ b/sym.py
@@ -121,22 +121,27 @@ this module is imported """
 Bool_Alphabet = Alphabet('TF')
 DNA_Alphabet = Alphabet('ACGT')
 DNA_Alphabet_wN = Alphabet('ACGTN')
+RNA_Alphabet_wN = Alphabet('ACGUN')
 RNA_Alphabet = Alphabet('ACGU')
 Protein_Alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
 Protein_Alphabet_wX = Protein_wX = Alphabet('ACDEFGHIKLMNPQRSTVWYX')
-Protein_Alphabet_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
+Protein_Alphabet_wSTOP = Protein_wSTOP = Alphabet('ACDEFGHIKLMNPQRSTVWY*')
 DSSP_Alphabet = Alphabet('GHITEBSC')
 DSSP3_Alphabet = Alphabet('HEC')

-predefAlphabets = {'DNA': DNA_Alphabet,
+predefAlphabets = {'Bool_Alphabet': Bool_Alphabet,
+                   'DNA': DNA_Alphabet,
                   'RNA': RNA_Alphabet,
-                   'DNAwN': Alphabet('ACGTN'),
-                   'RNAwN': Alphabet('ACGUN'),
+                   'DNAwN': RNA_Alphabet_wN,
+                   'RNAwN': DNA_Alphabet_wN,
                   'Protein': Protein_Alphabet,
-                   'ProteinwX': Protein_wX}
+                   'ProteinwX': Protein_wX,
+                   'ProteinwSTOP' : Protein_wSTOP,
+                   'DSSP_Alphabet' : DSSP_Alphabet,
+                   'DSSP3_Alphabet' : DSSP3_Alphabet}
 # The preferred order in which a predefined alphabet is assigned to a sequence
 # (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
-preferredOrder = ['DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX']
+preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
 # Useful annotations
 DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'})
 RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'})

--- a/webservice.py
+++ b/webservice.py
@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
    url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId
    # Get the entry
    try:
-        data = urllib.request.urlopen(url).read()
-        if data.startswith(b'ERROR'):
+        data = urllib.request.urlopen(url).read().decode("utf-8")
+        print (type(data))
+        if data.startswith("ERROR"):
            raise RuntimeError(data)
        return data
-    except(urllib.error.HTTPError, ex):
+
+    except urllib.error.HTTPError as ex:
        raise RuntimeError(ex.read())

 def search(query, dbName='uniprot', format='list', limit=100):
@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
            url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query
        # Get the entries
        try:
-            data = urllib.request.urlopen(url).read()
+            data = urllib.request.urlopen(url).read().decode("utf-8")
            if format == 'list':
                return data.splitlines()
            else:
                return data
-        except(urllib.error.HTTPError, ex):
+        except urllib.error.HTTPError as ex:
            raise RuntimeError(ex.read())
    elif dbName.startswith('refseq'):
        dbs = dbName.split(":")
@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
        url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit)
        # Get the entries
        try:
-            data = urllib.request.urlopen(url).read()
+            data = urllib.request.urlopen(url).read().decode("utf-8")
            words = data.split("</Id>")
            words = [w[w.find("<Id>")+4:] for w in words[:-1]]
            if format == 'list':
@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
                url = base + "efetch.fcgi?db=" + dbName + "&rettype=fasta&id="
                for w in words:
                    url += w + ","
-                data = urllib.request.urlopen(url).read()
+                data = urllib.request.urlopen(url).read().decode("utf-8")
                return data
            else:
                return ''
-        except(urllib.error.HTTPError, ex):
+        except urllib.error.HTTPError as ex:
            raise RuntimeError(ex.read())
    return

@@ -199,7 +201,7 @@ def getGODef(goterm):
    # Get the entry: fill in the fields specified below
    try:
        entry={'id': None, 'name': None, 'def': None}
-        data = urllib.request.urlopen(url).read()
+        data = urllib.request.urlopen(url).read().decode("utf-8")
        for row in data.splitlines():
            index = row.find(':')
            if index > 0 and len(row[index:]) > 1:
@@ -209,7 +211,7 @@ def getGODef(goterm):
                    if entry[field] == None:      # check if not yet assigned
                        entry[field] = value
        return entry
-    except(urllib.error.HTTPError, ex):
+    except urllib.error.HTTPError as ex:
        raise RuntimeError(ex.read())

 def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read())
                f = gzip.GzipFile(fileobj=buf)
-                data = f.read()
+                data = f.read().decode("utf-8")
            else:
-                data = response.read()
+                data = response.read().decode("utf-8")
            for row in data.splitlines()[1:]:  # we ignore first (header) row
                values = row.split('\t')
                if len(values) >= 7:
@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
                    else:
                        termsmap[key] = set([values[6]])
                        taxonmap[key] = int(values[4])
-        except(urllib.error.HTTPError, ex):
+        except urllib.error.HTTPError as ex:
            raise RuntimeError(ex.read())
    if completeAnnot:
        if len(genes) == 1:
@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
        url = __ebiGOUrl__ + uri_string + goterm.strip()
        # Get the entry: fill in the fields specified below
        try:
-            data = urllib.request.urlopen(url).read()
+            data = urllib.request.urlopen(url).read().decode("utf-8")
            for row in data.splitlines()[1:]:  # we ignore first (header) row
                values = row.split('\t')
                if len(values) >= 7:
                    genes.add(values[1])
            map[goterm] = list(genes)
-        except(urllib.error.HTTPError, ex):
+        except urllib.error.HTTPError as ex:
            raise RuntimeError(ex.read())
    if len(goterms) == 1:
        return map[goterms[0]]