EDITOR=emacsclient

6ca6ac3d · Mikael Boden · bd07c60d · 6ca6ac3d · 6ca6ac3d · 6ca6ac3d
Commit 6ca6ac3d authored Jul 13, 2021 by Mikael Boden
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 13 deletions

bed.py bed.py +2 -1

gibbs.py gibbs.py +17 -1

phylo.py phylo.py +2 -2

prob.py prob.py +4 -0

sequence.py sequence.py +17 -0

webservice.py webservice.py +21 -9

No files found.
--- a/bed.py
+++ b/bed.py
@@ -258,7 +258,8 @@ class BedFile:
        self.chromqueue = ival.Stack()
        for c in sorted(self.chroms.keys())[::-1]:
            self.chromqueue.push(self.generate(c))
-        self.current = self.chromqueue.pop()
+        if not self.chromqueue.isEmpty():
+            self.current = self.chromqueue.pop()
        return self

    def __next__(self):

--- a/gibbs.py
+++ b/gibbs.py
@@ -55,7 +55,23 @@ class GibbsMotif():
        """ background that will be used as pseudo-counts """
        pseudocount = pseudocount or prob.Distrib(self.alphabet, 1.0)
        """ q: the foreground distribution (specifying the W distributions in aligned columns)
-            p: the background distribution (for non-aligned positions in all sequences) """
+        
+            columns   0 1 2 3 4 ... W
+            Rows    
+                    A .1 
+                    C .5
+                    G .2
+                    T .2
+        
+            p: the background distribution (for non-aligned positions in all sequences) 
+            
+            columns   0 1 2 3 4 ... W
+            Rows    
+                    A .25 
+                    C .25
+                    G .25
+                    T .25
+            """
        q = [ prob.Distrib(self.alphabet, pseudocount) for _ in range(W) ]
        p = prob.Distrib(self.alphabet, pseudocount)
        a = self.alignment

--- a/phylo.py
+++ b/phylo.py
@@ -603,9 +603,9 @@ if __name__ == '__main__1':
    print(tree)

 if __name__ == '__main__':
-    aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.aln', sequence.Protein_Alphabet)
+    aln = sequence.readFastaFile('/Users/mikael/Documents/Teaching/SCIE2100/2020/dnaexamq.aln', sequence.Protein_Alphabet)
    tree = runUPGMA(sequence.Alignment(aln), "fractional")
-    writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/Exams/pdistupgma.nwk', tree)
+    writeNewickFile('/Users/mikael/Documents/Teaching/SCIE2100/2021/examq_pdist.nwk', tree)

 if __name__ == '__main__3':
    aln = sequence.readClustalFile('/Users/mikael/simhome/ASR/dp16_example.aln', sequence.Protein_Alphabet)

--- a/prob.py
+++ b/prob.py
@@ -501,6 +501,10 @@ class Joint(object):
            return 0.0
        return float(score) / float(self.totalCnt)

+    # def __setitem__(self, key, value):
+    #     key = _getMeTuple(self.alphas, key)
+    #     self.store[key] = value
+
    def __str__(self):
        """ Return a textual representation of the JP. """
        str = '< '

--- a/sequence.py
+++ b/sequence.py
@@ -191,6 +191,23 @@ class Sequence(object):
            idx = ''.join(degapped).find(findme)
            return idxs[idx] if idx >= 0 else -1

+    def getKmers(self, k):
+        """ Retrieve k-mers of sequence with counts in canonical (alphabet-based) order """
+        if self.gappy == False:
+            myseq = self.sequence
+        else: # if the sequence is gappy AND the function is called with gappy = True THEN run the find on the de-gapped sequence
+            myseq, idxs = self.getDegapped()
+        counts = [0 for _ in range(len(self.alphabet) ** k)]
+        for i in range(len(myseq) - k):
+            sub = myseq[i:i + k]
+            idx = 0
+            multiplier = 1
+            for s in sub:
+                idx += self.alphabet.index(s) * multiplier
+                multiplier *= len(self.alphabet)
+            counts[idx] += 1
+        return counts
+
 """
 Below are some useful methods for loading data from strings and files.
 Recognize the FASTA format (nothing fancy).

--- a/webservice.py
+++ b/webservice.py
@@ -175,9 +175,13 @@ def getGODef(goterm):
    Retrieve information about a GO term
    goterm: the identifier, e.g. 'GO:0002080'
    """
-    # first turn off server certificate verification
-    if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
-        ssl._create_default_https_context = ssl._create_unverified_context
+    # to turn off server certificate verification
+    #if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
+    #    ssl._create_default_https_context = ssl._create_unverified_context
+    # The better solution is to install network security certificates from the command line (MacOS below),
+    # which should render the above unnecessary:
+    # bash /Applications/Python*/Install\ Certificates.command
+
    # Construct URL with query term
    url = __ebiGOUrl__ + 'ontology/go/search?query=' + goterm
    # Get the entry: fill in the fields specified below
@@ -224,9 +228,13 @@ def getGOTerms(genes):
            uri_string += gene + "," if i < len(genebatch) - 1 else gene
        # Construct URL
        # Get the entry: fill in the fields specified below
+        # installing Python doesn't always install security certificates, and the below code is a workaround...
        # first turn off server certificate verification
-        if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
-            ssl._create_default_https_context = ssl._create_unverified_context
+        # if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
+        #    ssl._create_default_https_context = ssl._create_unverified_context
+        # The better solution is to install network security certificates from the command line (MacOS below),
+        # which should render the above unnecessary:
+        # bash /Applications/Python*/Install\ Certificates.command
        page = 1
        try:
            while (True):
@@ -280,13 +288,17 @@ def getGenes(goterms, taxo=None):
                break
            termcnt += 1
        uri_string = 'annotation/search?limit=' + str(
-            limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
+            limitpage) + '&taxonId=' + str(taxo) + "&goId=" if taxo else 'annotation/search?goId='
        for i in range(len(termbatch)):
            term = termbatch[i]
            uri_string += term + "," if i < len(termbatch) - 1 else term
+        # installing Python doesn't always install security certificates, and the below code is a workaround...
        # first turn off server certificate verification
-        if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
-            ssl._create_default_https_context = ssl._create_unverified_context
+        # if (not os.environ.__getitem__('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
+        #    ssl._create_default_https_context = ssl._create_unverified_context
+        # The better solution is to install network security certificates from the command line (MacOS below),
+        # which should render the above unnecessary:
+        # bash /Applications/Python*/Install\ Certificates.command
        page = 1
        try:
            while (True):
@@ -534,7 +546,7 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
    request = urllib.request.Request(url, data)
    opener = urllib.request.build_opener()
    response = opener.open(request)
-    page = response.read(20000000).decode('utf-8')
+    page = response.read(2000000000).decode('utf-8')
    up_dict = {}

    # For each record we retrieve, split the line by tabs and build up the UniProt dict