Commit 8fa94535 authored by Mikael Boden's avatar Mikael Boden

added_regex_search_in_gappy_sequences

parent 9889428a
Pipeline #46 failed with stages
import annotations
import phylo
tree = phylo.parseNewick("(Paenibacillus_thiaminolyticus:4.0,(((bacterium_endosymbiont_of_Mortierella_elongata_FMR23_6:4.0,(Pandoraea_faecigallinarum:4.0,Pandoraea_vervacti:4.0,Pandoraea_oxalativorans:4.0):4.0,(Burkholderia_sp_b14:4.0,Burkholderia_sp_b13:4.0,(Burkholderia_pseudomallei_406e:4.0,Burkholderia_pseudomallei_1710a:4.0):4.0):4.0):4.0,(Chromobacterium_amazonense:4.0,(Microvirgula_sp_AG722:4.0,Microvirgula_aerodenitrificans:4.0):4.0):4.0):4.0,(Candidatus_Endobugula:4.0,Moritella_sp_PE36:4.0,(Enterovibrio_nigricans:4.0,Photobacterium_iliopiscarium:4.0,Vibrio_campbellii:4.0):4.0,(((Pantoea_sp_AMG_501:4.0,Pantoea_wallisii:4.0,Pantoea_rodasii:4.0):4.0,(Erwinia_sp_ErVv1:4.0,Erwinia_toletana:4.0,Erwinia_mallotivora:4.0):4.0):4.0,(Candidatus_Fukatsuia:4.0,Rahnella_aquatilis:4.0,(Yersinia_pekkanenii:4.0,Yersinia_entomophaga:4.0,Yersinia_mollaretii:4.0,(Yersinia_wautersii:4.0,Yersinia_similis:4.0,Yersinia_pseudotuberculosis:4.0,Yersinia_pestis:4.0):4.0,Yersinia_enterocolitica:4.0):4.0):4.0,(Cosenzaea_myxofaciens:4.0,(Photorhabdus_laumondii:4.0,Photorhabdus_bodei:4.0,Photorhabdus_sp_HUG-39:4.0,Photorhabdus_sp_CRCIA-P01:4.0,Photorhabdus_namnaonensis:4.0,Photorhabdus_khanii:4.0,Photorhabdus_heterorhabditis:4.0,Photorhabdus_temperata:4.0,Photorhabdus_asymbiotica:4.0,Photorhabdus_australis:4.0,Photorhabdus_thracensis:4.0,Photorhabdus_luminescens:4.0):4.0,(Xenorhabdus_ishibashii:4.0,Xenorhabdus_khoisanae:4.0,Xenorhabdus_mauleonii:4.0,Xenorhabdus_miraniensis:4.0,Xenorhabdus_vietnamensis:4.0,Xenorhabdus_stockiae:4.0,Xenorhabdus_szentirmaii:4.0,Xenorhabdus_budapestensis:4.0,Xenorhabdus_bovienii:4.0,Xenorhabdus_nematophila:4.0):4.0,(Proteus_sp_TJ1640:4.0,Proteus_sp_TJ1636:4.0,Proteus_sp_FJ2001126-3:4.0,Proteus_columbae:4.0,Proteus_alimentorum:4.0,Proteus_genomosp_6_str._ATCC_51471:4.0,Proteus_genomosp_4_str._ATCC_51469:4.0,Proteus_cibarius:4.0,Proteus_hauseri:4.0,Proteus_penneri:4.0,Proteus_vulgaris:4.0):4.0,(Morganella_sp_HMSC11D09:4.0,Morganella_sp_EGD-HP17:4.0,Morganella_morganii:4.0):4.0):4.0,(Escherichia_sp_ESNIH1:4.0,Mangrovibacter_phragmitis:4.0,(Enterobacter_sp_DC4:4.0,Enterobacter_sp_BIDMC_26:4.0):4.0,Kosakonia_sacchari:4.0,Pseudescherichia_vulneris:4.0):4.0):4.0,(Pseudomonas_kribbensis:4.0,Pseudomonas_lactis:4.0,Pseudomonas_paralactis:4.0,Pseudomonas_helleri:4.0,Pseudomonas_weihenstephanensis:4.0,Pseudomonas_coleopterorum:4.0,Pseudomonas_endophytica:4.0,Pseudomonas_granadensis:4.0,Pseudomonas_prosekii:4.0,Pseudomonas_brassicacearum:4.0,Pseudomonas_deceptionensis:4.0,Pseudomonas_baetica:4.0,Pseudomonas_simiae:4.0,Pseudomonas_moraviensis:4.0,Pseudomonas_batumici:4.0,Pseudomonas_antarctica:4.0,Pseudomonas_rhizosphaerae:4.0,Pseudomonas_lini:4.0,Pseudomonas_kilonensis:4.0,Pseudomonas_psychrophila:4.0,Pseudomonas_abietaniphila:4.0,Pseudomonas_thivervalensis:4.0,Pseudomonas_jessenii:4.0,Pseudomonas_plecoglossicida:4.0,Pseudomonas_agarici:4.0,(Pseudomonas_cichorii:4.0,Pseudomonas_syringae:4.0):4.0,Pseudomonas_sp:4.0,(Pseudomonas_lundensis:4.0,Pseudomonas_fragi:4.0):4.0,(Pseudomonas_poae:4.0,Pseudomonas_mediterranea:4.0,Pseudomonas_extremorientalis:4.0,Pseudomonas_orientalis:4.0,Pseudomonas_libanensis:4.0,Pseudomonas_synxantha:4.0,Pseudomonas_corrugata:4.0,Pseudomonas_fluorescens:4.0):4.0):4.0):4.0):4.0);")
# tree = phylo.readNewick("/Users/gabefoley/Dropbox/PhD/Projects/Phylo Island/Species trees/species_tree.nwk")
# tree = phylo.readNewick("/Users/gabefoley/Dropbox/PhD/Smaller Projects/GRASP tree/non_unique.nwk")
......@@ -10,17 +12,17 @@ import phylo
working_dir = "/Users/gabefoley/Dropbox/PhD/Smaller Projects/Nexus_colouring/Read_annotations/"
tree = phylo.read_nexus(working_dir + "annotation_simple.nexus")
print (tree)
print (tree.nexus_annotations.annotations)
tree.swap_annotations("PDB")
print (tree)
print (tree.nexus_annotations.annotations)
# working_dir = "/Users/gabefoley/Dropbox/PhD/Smaller Projects/Nexus_colouring/Read_annotations/"
#
# tree = phylo.read_nexus(working_dir + "annotation_simple.nexus")
#
# print (tree)
# print (tree.nexus_annotations.annotations)
#
# tree.swap_annotations("PDB")
#
# print (tree)
# print (tree.nexus_annotations.annotations)
#
# tree.write_to_nexus(working_dir + "output.nexus")
......
from collections import defaultdict
from phylo import *
import phylo
import matplotlib
import random
......@@ -146,3 +147,5 @@ class NexusAnnotations():
def generate_colour_list(self, num):
return num
# tree = phylo.readNewick("/Users/gabefoley/Dropbox/PhD/Projects/Phylo Island/Species trees/species_tree_115.nwk")
......@@ -242,7 +242,7 @@ def writeGtfFile(entries, filename, header = None):
f.close()
if __name__ == '__main__':
bf = GtfFile('/Users/mikael/simhome/NFIX/WT1677.gtf')
bf = GtfFile('/Users/mikael/simhome/NFIX/WT1689.gtf')
print(bf.chroms.keys())
g = bf.generate('chr12')
print(next(g))
......
'''
Module with methods and classes for phylogeny.
Extended to handle n-ary trees (Jan 2019).
@author: mikael
'''
import sequence
from collections import defaultdict
import annotations
class PhyloTree:
""" Rooted, binary (bifurcating) tree for representing phylogenetic relationships.
""" Rooted, n-ary tree for representing phylogenetic relationships.
Functionality includes labelling and traversing nodes; reading and writing to Newick format;
association with sequence alignment; maximum parsimony inference of ancestral sequence;
generation of single, bifurcating rooted tree by UPGMA.
Known issues: Binary only; Parsimony does not handle gaps in alignment.
generation of rooted tree by UPGMA.
Known issues: Parsimony does not handle gaps in alignment.
Programmers should note that almost all functionality is implemented through recursion. """
def __init__(self, root):
......@@ -27,7 +27,6 @@ class PhyloTree:
def putAnnotations(self, nexus_annotations):
self.nexus_annotations = nexus_annotations
# Update the annotations dictionary so that it contains PhyloNode objects as keys, not text labels
for node in self.getNodes():
if node.label in self.nexus_annotations.leaf_annotations:
......@@ -60,10 +59,18 @@ class PhyloTree:
node = queue.pop()
nodes.append(node)
# if strategy.upper().startswith('DEPTH'):
if node.left: queue.append(node.left)
if node.right: queue.append(node.right)
if not node.isLeaf():
queue.extend(node.children)
return nodes
def getLeaves(self):
all = self.getNodes()
leaves = []
for n in all:
if n.isLeaf():
leaves.append(n)
return leaves
def getDescendantsOf(self, node, transitive=False):
""" Retrieve and return the (list of) descendants (children) of a specified node.
Node can be the label or the instance.
......@@ -86,28 +93,7 @@ class PhyloTree:
if not isinstance(node, PhyloNode):
node = self.findLabel(node)
if node:
myroot = self.root
found = False
branching = []
while not found and myroot != None:
branching.append(myroot)
# check if "myroot" is a leaf node, i.e. does not have children
if myroot.left == node or myroot.right == node:
found = True
break
if myroot.left != None: # myroot has a "left" child
# check if the "left" child of "myroot" is the ancestor of "node"
if myroot.left.isAncestorOf(node, transitive=True): # if yes,
myroot = myroot.left # move to the "left" child
else: # if not,
myroot = myroot.right # move to the "right" child
else: # myroot does NOT have a "left" child, so let's move "right"
myroot = myroot.right
if found and transitive:
return branching
elif found and len(branching) > 0:
return branching[len(branching) - 1]
return None
return node.getAncestors(transitive)
def parsimony(self):
""" Solve the "small parsimony problem",
......@@ -117,12 +103,8 @@ class PhyloTree:
self.root._backwardParsimony(self.aln) # use scores to determine sequences
return self.root.getSequence() # return the sequence found at the root
def canonise(self):
self.root._canonise()
def swap_annotations(self, annotation_key):
try:
for node in self.getNodes():
if node.isLeaf():
node.label = self.nexus_annotations.leaf_annotations[node][annotation_key]
......@@ -135,103 +117,91 @@ class PhyloTree:
:param out_path: The path to write the NEXUS file to
:param nexus_annotations: The NexusAnnotations containing the annotations
"""
if write_annotations and not nexus_annotations:
if not self.nexus_annotations:
raise RuntimeError("This tree file has no associated annotation file. Either associate or supply one as a parameter.")
nexus_annotations = self.nexus_annotations
if nexus_annotations:
for node in self.getNodes():
if node in self.nexus_annotations.node_annotations:
node.annotate_node(self.nexus_annotations.node_annotations, self.nexus_annotations.annotation_symbols, exclude_annotations, use_symbols)
tree_annotation = str(self) + ";"
self.swap_annotations("Original")
for node in self.getNodes():
if node in self.nexus_annotations.leaf_annotations:
node.annotate_node(self.nexus_annotations.leaf_annotations, exclude_annotations)
leaves = []
for node in self.getNodes():
if node.isLeaf():
leaves.append(node.label)
leaf_annotation = ""
for leaf in leaves:
leaf_annotation += "\n\t%s" % (leaf)
with open(out_path, "w+") as file:
file.write(
"#NEXUS\nbegin taxa;\n\tdimensions ntax=%d;\n\ttaxlabels%s\n;\nend;\n\nbegin trees;\n\ttree tree_1 = "
"[&R] %s\nend;" % (len(leaves), leaf_annotation, tree_annotation))
class PhyloNode:
""" A class for a node in a rooted, binary (bifurcating) tree.
Contains pointers to descendants/daughters (left and right),
""" A class for a node in a rooted, n-ary tree.
Contains pointers to multiple descendants/daughters,
optional fields include data, label, sequence and dist.
If parsimony is used scores and traceback pointers are available.
A number of methods are named with a _ prefix. These can be, but
are not intended to be used from outside the class. """
def __init__(self, label=''):
""" Initialise an initially unlinked node.
Populate fields left and right to link it with other nodes.
def __init__(self, parent = None, label=''):
""" Initialise a node.
Set its parent (another PhyloNode), parent can be None.
Set label to name it.
Use field data for any type of information associated with node.
Use dist to indicate the distance to its parent (if any).
Other fields are used internally, including sequence for associated alignment,
seqscores, backleft and backright for maximum parsimony. """
self.left = None
self.right = None
seqscores, back for maximum parsimony. """
self.parent = parent
self.children = None
self.data = None
self.label = label
self.dist = None
self.sequence = None # The sequence after an alignment have been mapped (leaf) or the most parsimonous sequence (ancestral)
self.seqscores = None # The scores propagated from leaves via children
self.backleft = None # Pointers back to left child: what symbol rendered current/parent symbols
self.backright = None # Pointers back to right child: what symbol rendered current/parent symbols
self.seqscores = None # The scores propagated from leaves via children
self.backptr = None # Pointers back to children: what symbol rendered current/parent symbols
def isLeaf(self):
return self.left == self.right == None
return self.nChildren() == 0
def nChildren(self):
if self.children == None:
return 0
else:
return len(self.children)
def __str__(self):
""" Returns string with node (incl descendants) in a Newick style. """
left = right = label = dist = ''
if self.left:
left = str(self.left)
if self.right:
right = str(self.right)
stubs = ['' for _ in range(self.nChildren())]
label = dist = ''
for i in range(self.nChildren()):
stubs[i] = str(self.children[i])
if self.dist or self.dist == 0.0:
dist = ':' + str(self.dist)
if self.label != None:
label = str(self.label)
if not self.left and not self.right:
return label + dist
else:
return '(' + left + ',' + right + ')' + label + dist
else: # there is no label
if self.nChildren() == 0:
return label + dist
else:
stubstr = '('
for i in range(len(stubs) - 1):
stubstr += stubs[i] + ','
return stubstr + stubs[-1] + ')' + label + dist
# there is no label
'''
if not self.left and self.right:
return ',' + right
elif self.left and not self.right:
return left + ','
elif self.left and self.right:
return '(' + left + ',' + right + ')' + dist
'''
# def __le__(self, other):
# """ Returns indication of less than other node. """
......@@ -247,38 +217,31 @@ class PhyloNode:
def _printSequences(self, start, end):
""" Returns string with node (incl descendants) in a Newick style. """
left = right = label = dist = ''
if self.left:
left = self.left._printSequences(start, end)
if self.right:
right = self.right._printSequences(start, end)
if self.dist:
stubs = ['' for _ in range(self.nChildren())]
label = dist = ''
for i in range(self.nChildren()):
stubs[i] = self._printSequences(self.children[i], start, end)
if self.dist or self.dist == 0.0:
dist = ':' + str(self.dist)
if self.sequence != None:
label = "".join(self.sequence[start:end]) + ""
if not self.left and not self.right:
return label + dist
else:
return '(' + left + ',' + right + ')' + label + dist
else: # there is no label
if not self.left and self.right:
return ',' + right
elif self.left and not self.right:
return left + ','
elif self.left and self.right:
return '(' + left + ',' + right + ')' + dist
if self.label != None:
label = str(self.label)
if self.nChildren() == 0:
return label + dist
else:
stubstr = '('
for i in range(len(stubs) - 1):
stubstr += stubs[i] + ','
return stubstr + stubs[-1] + ')' + label + dist
def _findLabel(self, label):
""" Find a node by label at this node or in any descendants (recursively). """
if self.label == label:
return self
else:
if self.left:
foundLeft = self.left._findLabel(label)
if foundLeft:
return foundLeft
if self.right:
return self.right._findLabel(label)
for i in range(self.nChildren()):
found = self.children[i]._findLabel(label)
if found:
return found
return None
def _propagateDistance(self, parent_dist):
......@@ -286,24 +249,21 @@ class PhyloNode:
The only parameter is the absolute distance to the parent of this node. """
travelled = self.dist # absolute distance to this node
self.dist = parent_dist - self.dist # relative distance to this node
if self.left != None: # if there is a child node...
self.left._propagateDistance(travelled) # pass absolute distance to this node
if self.right != None:
self.right._propagateDistance(travelled)
for i in range(self.nChildren()):
self.children[i]._propagateDistance(travelled) # pass absolute distance to this node
def _assignAlignment(self, aln):
""" Assign an alignment to the node, which implies assigning a sequence to it if one is
available in the alignment. """
self.sequence = None
if self.left != None:
self.left._assignAlignment(aln)
if self.right != None:
self.right._assignAlignment(aln)
for i in range(self.nChildren()):
self.children[i]._assignAlignment(aln)
for seq in aln.seqs:
if seq.name == self.label:
self.sequence = seq
break
""" # Not sure if this is required (putting nodes into a canonical ordering)
def _canonise(self):
if self.left == None and self.right == None: # at leaf
return self.label
......@@ -315,52 +275,38 @@ class PhyloNode:
self.right = tmpnode
return myright
return myleft
"""
def _forwardParsimony(self, aln):
""" Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node,
then to propagate scores from sequence assigned nodes to root (backward). """
if self.sequence == None: # no sequence has been assigned
if self.left == None and self.right == None: # no children, so terminal, cannot propagate scores
if self.nChildren() == 0: # no children, so terminal, cannot propagate scores
raise RuntimeError("No sequence assigned to leaf node:", self.label)
scoresleft = scoresright = None
if self.left != None:
scoresleft = self.left._forwardParsimony(aln)
if self.right != None:
scoresright = self.right._forwardParsimony(aln)
scores = [None for _ in range(self.nChildren())]
for i in range(self.nChildren()):
scores[i] = self.children[i]._forwardParsimony(aln)
# for each position in the alignment,
# introduce (initially zero) score for each symbol in alphabet
self.seqscores = [[0 for _ in aln.alphabet] for col in range(aln.alignlen)]
# for each position in the alignment,
# allocate a position to put the left child symbol from which each current node symbol score was determined
self.backleft = [[None for _ in aln.alphabet] for _ in range(aln.alignlen)]
# allocate a position to put the right child symbol from which each current node symbol score was determined
self.backright = [[None for _ in aln.alphabet] for _ in range(aln.alignlen)]
# allocate a position to put the each child symbol from which each current node symbol score was determined
self.backptr = [[[None for _ in aln.alphabet] for _ in range(aln.alignlen)] for _ in range(self.nChildren())]
for col in range(aln.alignlen):
# left child will contribute first
for a_parent in range(len(aln.alphabet)):
best_score_left = +9999999
best_symb_left = 0
for a in range(len(aln.alphabet)):
score = (scoresleft[col][a] + (
1 if a != a_parent else 0)) # if we want to weight scores, this would need to change
if score < best_score_left:
best_symb_left = a
best_score_left = score
self.seqscores[col][a_parent] = best_score_left
self.backleft[col][a_parent] = best_symb_left
# right child will contribute next
for a_parent in range(len(aln.alphabet)):
best_score_right = +9999999
best_symb_right = 0
for a in range(len(aln.alphabet)):
score = (scoresright[col][a] + (
1 if a != a_parent else 0)) # if we want to weight scores, this would need to change
if score < best_score_right:
best_symb_right = a
best_score_right = score
self.seqscores[col][a_parent] += best_score_right
self.backright[col][a_parent] = best_symb_right
for i in range(self.nChildren()):
# left child will contribute first
for a_parent in range(len(aln.alphabet)):
best_score = +9999999
best_symb = 0
for a in range(len(aln.alphabet)):
score = (scores[i][col][a] + (
1 if a != a_parent else 0)) # if we want to weight scores, this would need to change
if score < best_score:
best_symb = a
best_score = score
self.seqscores[col][a_parent] += best_score
self.backptr[i][col][a_parent] = best_symb
else:
self.seqscores = [[0 if a == sym else 999999 for a in aln.alphabet] for sym in
self.sequence] # if we want to weight scores, this would need to change
......@@ -370,39 +316,37 @@ class PhyloNode:
""" Internal function that operates recursively to inspect scores to determine
most parsimonious sequence, from root to leaves. """
if self.sequence == None: # no sequence has been assigned
leftbuf = []
rightbuf = []
if self.left == None and self.right == None: # no children, so terminal, cannot propagate scores
childbuf = [[] for _ in range(self.nChildren())]
if self.nChildren() == 0: # no children, so terminal, cannot propagate scores
raise RuntimeError("No sequence assigned to leaf node:", self.label)
if seq == None: # Only root can do this, no parents to consider, so we pick the lowest scoring symbol
currbuf = []
for col in range(aln.alignlen):
min_score = 999999
min_symb = None
left_symb = None
right_symb = None
child_symb = [None for _ in range(self.nChildren())]
for a_parent in range(len(aln.alphabet)):
if self.seqscores[col][a_parent] < min_score:
min_score = self.seqscores[col][a_parent]
min_symb = a_parent
left_symb = self.backleft[col][a_parent]
right_symb = self.backright[col][a_parent]
for i in range(self.nChildren()):
child_symb[i] = self.backptr[i][col][a_parent]
currbuf.append(aln.alphabet[min_symb])
leftbuf.append(aln.alphabet[left_symb])
rightbuf.append(aln.alphabet[right_symb])
for i in range(self.nChildren()):
childbuf[i].append(aln.alphabet[child_symb[i]])
self.sequence = sequence.Sequence(currbuf, aln.alphabet, self.label, gappy=True)
else: # Non-root, but not leaf
self.sequence = seq
col = 0
for sym_parent in self.sequence:
a_parent = aln.alphabet.index(sym_parent)
left_symb = self.backleft[col][a_parent]
right_symb = self.backright[col][a_parent]
leftbuf.append(aln.alphabet[left_symb])
rightbuf.append(aln.alphabet[right_symb])
child_symb = [None for _ in range(self.nChildren())]
for i in range(self.nChildren()):
child_symb[i] = self.backptr[i][col][a_parent]
childbuf.append(aln.alphabet[child_symb[i]])
col += 1
self.left._backwardParsimony(aln, sequence.Sequence(leftbuf, aln.alphabet, self.label, gappy=True))
self.right._backwardParsimony(aln, sequence.Sequence(rightbuf, aln.alphabet, self.label, gappy=True))
for i in range(self.nChildren()):
self.children[i]._backwardParsimony(aln, sequence.Sequence(childbuf[i], aln.alphabet, self.label, gappy=True))
return self.sequence
def getSequence(self):
......@@ -418,26 +362,35 @@ class PhyloNode:
""" Decide if this node is the ancestor of specified node.
If transitive is True (default), all descendants are included.
If transitive is False, only direct descendants are included. """
if node == self.left or node == self.right:
return True
elif transitive:
if self.left:
statusLeft = self.left.isAncestorOf(node, transitive)
if statusLeft: return True
if self.right:
return self.right.isAncestorOf(node, transitive)
for i in range(self.nChildren()):
if node == self.children[i]:
return True
elif transitive:
status = self.children[i].isAncestorOf(node, transitive)
if status: return True
else:
return False
def getAncestors(self, transitive=False):
""" Retrieve and return (list of) parent nodes.
If transitive is False (default), only the direct parent is included.
If transitive is True, all parents (parents of parents etc) are included. """
if self.parent == None:
return []
if not transitive:
return [self.parent]
else:
parents = self.parent.getAncestors(transitive)
parents.append(self.parent)
return parents
def getDescendants(self, transitive=False):
""" Retrieve and return (list of) nodes descendant of this.
If transitive is False (default), only direct descendants are included.
If transitive is True, all descendants are (recursively) included. """
children = []
if self.left:
children.append(self.left)
if self.right:
children.append(self.right)
for i in range(self.nChildren()):
children.append(self.children[i])
if not transitive:
return children
else:
......@@ -450,13 +403,11 @@ class PhyloNode:
return children
def annotate_node(self, annotations, annotation_symbols= None, exclude_annotations = [], use_symbols=False ):
annotation_string = "[&"
for key, val_list in annotations[self].items():
if type(val_list) != list:
val_list = [val_list]
if key not in exclude_annotations:
# If we are using annotation symbols and the annotation has an associated symbol
for val in val_list:
if use_symbols and val in annotation_symbols:
......@@ -464,11 +415,8 @@ class PhyloNode:
annotation_string += '%s="%s",' % (key, ' '.join(['%s' % (val,) for val in sorted_symbols]))
else:
annotation_string += '%s="%s",' % (key, ' '.join(['%s' % (val,) for val in val_list]))
# Remove the final comma and add in a closing bracket
annotation_string = annotation_string[0: len(annotation_string) - 1] + "]"
if len(annotation_string) > 2:
if ":" in self.label:
self.label = self.label.split(":")[0] + annotation_string + self.label.split(":")[1]
......@@ -488,7 +436,7 @@ def runUPGMA(aln, measure, absoluteDistances=False):
D = {}
N = {} # The number of sequences in each node
M = aln.calcDistances(measure) # determine all pairwise distances
nodes = [PhyloNode(seq.name) for seq in aln.seqs] # construct all leaf nodes
nodes = [PhyloNode(label=seq.name) for seq in aln.seqs] # construct all leaf nodes
""" For each node-pair, assign the distance between them. """
for i in range(len(nodes)):
nodes[i].sequence = aln.seqs[i]
......@@ -525,8 +473,9 @@ def runUPGMA(aln, measure, absoluteDistances=False):
N[z] = Nx + Ny # total number of sequences in new cluster, insert new cluster in list N
for w in dz: # we have to run through the nodes again, now not including the removed x and y
D[frozenset([z, w])] = dz[w] # for each "other" cluster, update distance per EQ8.16 (Z&B p. 278)
z.left = x # link the phylogenetic tree
z.right = y
x.parent = z
y.parent = z
z.children = [x, y]
nodes.append(z)
if not absoluteDistances:
x._propagateDistance(z.dist) # convert absolute distances to relative by recursing down left path
......@@ -534,24 +483,22 @@ def runUPGMA(aln, measure, absoluteDistances=False):
z.dist = 0.0 # root z is at distance 0 from merged x and y
return PhyloTree(z) # make it to tree, return
""" ----------------------------------------------------------------------------------------
Methods for processing files of trees on the Newick format
----------------------------------------------------------------------------------------"""
def _findComma(string, level=0):
""" Find first comma at specified level of embedding """
""" Find all commas at specified level of embedding """
mylevel = 0
commas = []
for i in range(len(string)):
if string[i] == '(':
mylevel += 1
elif string[i] == ')':
mylevel -= 1
elif string[i] == ',' and mylevel == level:
return i
return -1
commas.append(i)
return commas
def parseNewickNode(string):
""" Utility function that recursively parses embedded string using Newick format. """
......@@ -559,7 +506,7 @@ def parseNewickNode(string):
last = string[::-1].find(')') # look from the back
if first == -1 and last == -1: # we are at leaf
y = string.split(':')
node = PhyloNode(y[0])
node = PhyloNode(label=y[0])
if len(y) >= 2:
node.dist = float(y[1])
return node
......@@ -569,17 +516,24 @@ def parseNewickNode(string):
embed = string[first + 1:last]
tail = string[last + 1:]
# find where corresp comma is
comma = _findComma(embed)
if comma == -1:
commas = _findComma(embed)
if len(commas) < 1:
raise RuntimeError('Invalid format: invalid placement of "," in sub-string "' + embed + '"')
left = embed[0:comma].strip()
right = embed[comma + 1:].strip()
prev_comma = 0
child_tokens = []
for comma in commas:
child_tokens.append(embed[prev_comma:comma].strip())
prev_comma = comma + 1
child_tokens.append(embed[prev_comma:].strip())
y = tail.split(':')
node = PhyloNode(y[0]) # node is an instance of the PhyloNode() class
node = PhyloNode(label=y[0]) # node is an instance of the PhyloNode() class
if len(y) >= 2:
node.dist = float(y[1])
node.left = parseNewickNode(left)
node.right = parseNewickNode(right)
node.children = []
for tok in child_tokens:
child = parseNewickNode(tok)
child.parent = node
node.children.append(child)
return node
else:
raise RuntimeError('Invalid format: unbalanced parentheses in sub-string "' + string + '"')
......@@ -628,8 +582,6 @@ def parse_nexus(string):
taxon_num = num + 1
while not lines[taxon_num].strip().startswith(";"):
taxon_name = lines[taxon_num].split("[")[0].strip()
for annot_line in lines[taxon_num].split("[&")[1].split(","):
#TODO: Make these regex calls
# print ("Annotation Key is ", annot_line.split("=")[0])
......@@ -641,34 +593,18 @@ def parse_nexus(string):
annot_val = annot_line.split("=")[1].split("]")[0]
annotation_dict[taxon_name][annot_key.strip()] = annot_val
taxon_num +=1
if line.strip().startswith("begin trees"):
tree_num = num + 1
tree = (lines[tree_num].split("[&R]")[1])
phylo_tree = parseNewick(tree)
nexus_annotations = annotations.NexusAnnotations(tree=phylo_tree)
nexus_annotations.add_annotations(annotation_dict)
# print (nexus_annotations.annotations)
phylo_tree.putAnnotations(nexus_annotations)
## Extract all of the annotations from the tree and add them to the NexusAnnotations object
print ("Number of taxons is %s " % (taxon_number))
return phylo_tree
""" ----------------------------------------------------------------------------------------
Method for generating a PhyloTree with unique tip names
----------------------------------------------------------------------------------------"""
......@@ -676,7 +612,6 @@ def parse_nexus(string):
def get_unique_tree(tree):
unique_tree = tree
unique_labels = {}
for node in unique_tree.getNodes():
if node.isLeaf() and node.label in unique_labels:
unique_labels[node.label] = unique_labels[node.label] + 1
......@@ -688,3 +623,6 @@ def get_unique_tree(tree):
def unpack_list(list):
return (" ".join(["%s"] * len(list)) + "!") % (x for x in list)
if __name__ == '__main__':
tree = readNewick('/Users/mikael/simhome/ASR/edge1.nwk')
print(tree)
\ No newline at end of file
......@@ -226,9 +226,16 @@ def readFasta(string, alphabet = None, ignore = False, gappy = False, parse_defl
if parse_defline:
parsed = parseDefline(seqinfo[0])
seqname = parsed[0]
else:
seqinfo = line[1:]
else: # we are not parsing the sequence name so no need to duplicate it in the info
seqname = seqinfo[0]
seqinfo = line[1:]
if len(seqinfo) > 0: # more than a name
edited_info = ''
for infopart in seqinfo[1:]:
edited_info += infopart + ' '
seqinfo = edited_info
else:
seqinfo = ''
except IndexError as errmsg:
if not ignore:
raise RuntimeError(errmsg)
......@@ -717,60 +724,62 @@ class Alignment():
distmat[i, j] = distmat[j, i] = dist
return distmat
def writeHTML(self, filename = None):
def writeHTML(self, filename = None, col_start = None, col_end = None):
""" Generate HTML that displays the alignment in color.
Requires that the alphabet is annotated with the label 'html-color' (see Sequence.annotateSym)
and that each symbol maps to a text string naming the color, e.g. 'blue'
"""
col_start = col_start or 0
col_end = col_end or self.alignlen
html = '''<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">\n<title>Sequence Alignment</title>\n</head><body><pre>\n'''
html += '''<p style="font-size:12px">\n'''
maxNameLength = self.getnamelen()
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0:
if (i+1) % 10 == 0 and (i >= col_start and i < col_end):
html += str(i/10+1)[0]
else:
elif (i >= col_start and i < col_end):
html += ' '
html += '%s\n' % (self.alignlen)
# html += '%s\n' % (col_end)
html += '\n'
if self.alignlen > 10:
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0:
if (i+1) % 10 == 0 and (i >= col_start and i < col_end):
index = len(str(i/10 + 1).split('.')[0])
html += str(i / 10 + 1).split('.')[0][(index * -1) + 1 ] if (len(str(i / 10 + 1).split('.')[0]) > 1) else '0'
else:
elif (i >= col_start and i < col_end):
html += ' '
html += '\n'
if self.alignlen > 100:
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0 and i >= 99:
if (i+1) % 10 == 0 and i >= 99 and (i >= col_start and i < col_end):
index = len(str(i/10 + 1).split('.')[0])
html += str(i / 10 + 1).split('.')[0][-1] if (len(str(i / 10 + 1).split('.')[0]) >2) else '0'
else:
elif (i >= col_start and i < col_end):
html += ' '
html += '\n'
if self.alignlen > 1000:
html += ''.ljust(maxNameLength) + ' '
for i in range(self.alignlen - 1):
if (i+1) % 10 == 0:
if (i+1) % 10 == 0 and (i >= col_start and i < col_end):
html += '0' if (len(str(i / 10 + 1).split('.')[0]) > 2) else ' '
else:
elif (i >= col_start and i < col_end):
html += ' '
html += '\n'
for seq in self.seqs:
html += seq.name.ljust(maxNameLength) + ' '
for sym in seq:
for sym in seq[col_start:col_end]:
color = self.alphabet.getAnnotation('html-color', sym)
if not color:
color = 'white'
html += '<font style="BACKGROUND-COLOR: %s">%s</font>' % (color, sym)
html += '\n'
html += '</pre></body></html>'
html += '</p></pre></body></html>'
if filename:
fh = open(filename, 'w')
fh.write(html)
......@@ -1187,19 +1196,25 @@ class Regexp(object):
def __str__(self):
return self.pattern
def search(self, sequence):
def search(self, sequence, gappy = False):
""" Find matches to the motif in the specified sequence. Returns a list
of triples, of the form (position, matched string, score). Note that
the score is always 1.0 because a regexp either matches
or doesn't. """
if not type(sequence) is Sequence:
sequence = Sequence(sequence)
sequenceString = sequence[:]
results = []
for match in self.regex.finditer(sequenceString):
results.append((match.start(), match.group(), 1.0))
return results
if gappy == False or sequence.gappy == False:
sequenceString = sequence[:]
results = []
for match in self.regex.finditer(sequenceString):
results.append((match.start(), match.group(), 1.0))
return results
else: # if the sequence is gappy AND the function is called with gappy = True THEN run the regex matching on the de-gapped sequence
degapped, idxs = sequence.getDegapped()
results = []
for match in self.regex.finditer(''.join(degapped)):
results.append((idxs[match.start()], match.group(), 1.0))
return results
class PWM(object):
......
......@@ -138,15 +138,46 @@ predefAlphabets = {'Bool_Alphabet': Bool_Alphabet,
'Protein': Protein_Alphabet,
'ProteinwX': Protein_wX,
'ProteinwSTOP' : Protein_wSTOP,
'ProteinwGAP': Protein_wGAP,
'DSSP_Alphabet' : DSSP_Alphabet,
'DSSP3_Alphabet' : DSSP3_Alphabet}
# The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
preferredOrder = ['Bool_Alphabet', 'DNA', 'RNA', 'DNAwN', 'RNAwN', 'Protein', 'ProteinwX', 'ProteinwSTOP',
'ProteinwGAP', 'DSSP_Alphabet', 'DSSP3_Alphabet']
# Useful annotations
DNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','T':'#66bbff'})
RNA_Alphabet.annotateAll('html-color', {'A':'green','C':'orange','G':'red','U':'#66bbff'})
Protein_Alphabet.annotateAll('html-color', {'G':'orange','P':'orange','S':'orange','T':'orange','H':'red','K':'red','R':'red','F':'#66bbff','Y':'#66bbff','W':'#66bbff','I':'green','L':'green','M':'green','V':'green'})
#Protein_Alphabet.annotateAll('html-color', {'G':'orange','P':'orange','S':'orange','T':'orange','H':'red','K':'red','R':'red','F':'#66bbff','Y':'#66bbff','W':'#66bbff','I':'green','L':'green','M':'green','V':'green'})
Protein_Alphabet.annotateAll('html-color', {
#orange*/
'G': "#F5A259",
#green*/
'N':"#00f900",
'Q':"#00f900",
'S': "#00f900",
'T': "#00f900",
#red*/
'K': "#f62f00",
'R': "#f62f00",
#blue/purple*/
'A':"#92b2f3",
'I': "#92b2f3",
'L': "#92b2f3",
'M': "#92b2f3",
'V': "#92b2f3",
'W': "#92b2f3",
'F': "#92b2f3",
#yellow*/
'P': "#FFFB00",
#pink*/
'C':"#F59692",
#aqua*/
'H': "#04B2B3",
'Y': "#04B2B3",
#purple*/
'D':"#CE64CB",
'E':"#CE64CB"})
# ------------------ Substitution Matrix ------------------
......
import urllib.request
import urllib.parse
import os
from time import sleep
import stats
......@@ -16,10 +17,11 @@ import json
http://www.ebi.ac.uk/Tools/webservices/tutorials
"""
__ebiUrl__ = 'http://www.ebi.ac.uk/Tools/'
__ebiGOUrl__ = 'https://www.ebi.ac.uk/QuickGO/services/'
__uniprotUrl__ = 'http://www.uniprot.org/'
__ebiSearchUrl__ = 'http://www.ebi.ac.uk/ebisearch/'
__ebiUrl__ = 'http://www.ebi.ac.uk/Tools/'
__ebiGOUrl__ = 'https://www.ebi.ac.uk/QuickGO/services/'
__uniprotUrl__ = 'http://www.uniprot.org/'
__ebiSearchUrl__ = 'http://www.ebi.ac.uk/ebisearch/'
def fetch(entryId, dbName='uniprotkb', format='fasta'):
"""
......@@ -31,7 +33,7 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=uniprotkb&id=P63166&format=fasta&style=raw&Retrieve=Retrieve
"""
# Construct URL
# Construct URL
url = __ebiUrl__ + 'dbfetch/dbfetch?style=raw&Retrieve=Retrieve&db=' + dbName + '&format=' + format + '&id=' + entryId
# Get the entry
try:
......@@ -42,6 +44,7 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
def search(query, dbName='uniprot', format='list', limit=100, columns=""):
"""
Retrieve multiple entries matching query from a database currently only via UniProtKB
......@@ -54,10 +57,14 @@ def search(query, dbName='uniprot', format='list', limit=100, columns=""):
"""
if dbName.startswith('uniprot'):
# Construct URL
if limit == None: # no limit to number of results returned
url = __uniprotUrl__ + dbName + '/?format=' + format + '&query=' + query + '&columns='+ columns
if limit == None: # no limit to number of results returned
url = "{}{}/?format={}&query={}&columns={}".format(__uniprotUrl__, dbName, format,
urllib.parse.quote(query),
columns)
else:
url = __uniprotUrl__ + dbName + '/?format=' + format + '&limit=' + str(limit) + '&query=' + query + '&columns='+ columns
url = "{}{}/?format={}&limit={}&query={}&columns={}".format(__uniprotUrl__, dbName, format, str(limit),
urllib.parse.quote(query), columns)
# Get the entries
try:
......@@ -72,13 +79,20 @@ def search(query, dbName='uniprot', format='list', limit=100, columns=""):
dbs = dbName.split(":")
if len(dbs) > 1:
dbName = dbs[1]
base = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
url = base + "esearch.fcgi?db=" + dbName + "&term=" + query + "&retmax=" + str(limit)
url = base + "esearch.fcgi?db={}&term={}+AND+srcdb_refseq[" \
"prop]&retmax={}".format(dbName, urllib.parse.quote(query), str(limit))
print (url)
# Get the entries
try:
data = urllib.request.urlopen(url).read().decode("utf-8")
words = data.split("</Id>")
words = [w[w.find("<Id>")+4:] for w in words[:-1]]
words = [w[w.find("<Id>") + 4:] for w in words[:-1]]
if format == 'list':
return words
elif format == 'fasta' and len(words) > 0:
......@@ -93,9 +107,10 @@ def search(query, dbName='uniprot', format='list', limit=100, columns=""):
raise RuntimeError(ex.read())
return
authorised_database_tag = {9606: ['Homo sapiens', 'ACC', 'ID'],
3702: ['Arabidopsis thaliana', 'TAIR_ID'],
4932: ['Saccharomyces cerevisiae', 'SGD_ID', 'CYGD_ID'],
authorised_database_tag = {9606: ['Homo sapiens', 'ACC', 'ID'],
3702: ['Arabidopsis thaliana', 'TAIR_ID'],
4932: ['Saccharomyces cerevisiae', 'SGD_ID', 'CYGD_ID'],
10090: ['Mus musculus', 'MGI_ID']}
"""
......@@ -104,7 +119,8 @@ http://www.ebi.ac.uk/QuickGO/WebServices.html
Note that this service can be slow for queries involving a large number of entries.
"""
def getGOReport(positives, background = None):
def getGOReport(positives, background=None):
""" Generate a complete GO term report for a set of genes (positives).
Each GO term is also assigned an enrichment p-value (on basis of background, if provided).
Returns a list of tuples (GO_Term_ID[str], Foreground_no[int], Term_description[str]) with no background, OR
......@@ -135,7 +151,7 @@ def getGOReport(positives, background = None):
for t in term_set:
term_cnt[t] = fg_list.count(t)
sorted_cnt = sorted(list(term_cnt.items()), key=lambda v: v[1], reverse=True)
else: # a background is provided
else: # a background is provided
for t in term_set:
fg_hit = fg_list.count(t)
bg_hit = bg_list.count(t)
......@@ -148,11 +164,12 @@ def getGOReport(positives, background = None):
for t in sorted_cnt:
defin = getGODef(t[0])
if background != None:
ret.append((t[0], t[1][2] * len(term_set), t[1][0], t[1][0]+t[1][1], defin['name']))
ret.append((t[0], t[1][2] * len(term_set), t[1][0], t[1][0] + t[1][1], defin['name']))
else:
ret.append((t[0], t[1], defin['name']))
return ret
def getGODef(goterm):
"""
Retrieve information about a GO term
......@@ -165,7 +182,7 @@ def getGODef(goterm):
url = __ebiGOUrl__ + 'ontology/go/search?query=' + goterm
# Get the entry: fill in the fields specified below
try:
entry={'id': None, 'name': None, 'aspect': None}
entry = {'id': None, 'name': None, 'aspect': None}
data = urllib.request.urlopen(url).read().decode("utf-8")
ret = json.loads(data)
for row in ret['results']:
......@@ -179,6 +196,7 @@ def getGODef(goterm):
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
def getGOTerms(genes):
"""
Retrieve all GO terms for a given set of genes (or single gene).
......@@ -187,9 +205,9 @@ def getGOTerms(genes):
if type(genes) != list and type(genes) != set and type(genes) != tuple:
genes = [genes]
map = dict()
batchsize = 100 # size of query batch
batchsize = 100 # size of query batch
genecnt = 0
limitpage = 100 # number of record on each returned page
limitpage = 100 # number of record on each returned page
while genecnt < len(genes):
genebatch = []
for index in range(batchsize):
......@@ -237,6 +255,7 @@ def getGOTerms(genes):
raise RuntimeError(ex.read())
return map
def getGenes(goterms, taxo=None):
"""
Retrieve all genes/proteins for a given set of GO terms (or single GO term).
......@@ -247,9 +266,9 @@ def getGenes(goterms, taxo=None):
if type(goterms) != list and type(goterms) != set and type(goterms) != tuple:
goterms = [goterms]
map = dict()
batchsize = 10 # size of query batch
batchsize = 10 # size of query batch
termcnt = 0
limitpage = 100 # number of record on each returned page
limitpage = 100 # number of record on each returned page
while termcnt < len(goterms):
termbatch = []
for index in range(batchsize):
......@@ -258,7 +277,8 @@ def getGenes(goterms, taxo=None):
else:
break
termcnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
uri_string = 'annotation/search?limit=' + str(
limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)):
term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term
......@@ -295,11 +315,11 @@ def getGenes(goterms, taxo=None):
raise RuntimeError(ex.read())
return map
class EBI(object):
__email__ = 'anon@uq.edu.au' # to whom emails about jobs should go
__ebiServiceUrl__ = 'http://www.ebi.ac.uk/Tools/services/rest/' # Use UQ mirror when available
__checkInterval__ = 2 # how long to wait between checking job status
class EBI(object):
__email__ = 'anon@uq.edu.au' # to whom emails about jobs should go
__ebiServiceUrl__ = 'http://www.ebi.ac.uk/Tools/services/rest/' # Use UQ mirror when available
__checkInterval__ = 2 # how long to wait between checking job status
def __init__(self, service=None):
""" Initialise service session.
......@@ -349,7 +369,8 @@ class EBI(object):
if self.isLocked():
raise RuntimeError("""You currently have a %s job running. You must
wait until it is complete before submitting another job. Go to
%sstatus/%s to check the status of the job.""" % (self.service, self.__ebiServiceUrl__, self.jobId))
%sstatus/%s to check the status of the job.""" % (
self.service, self.__ebiServiceUrl__, self.jobId))
url = self.__ebiServiceUrl__ + self.service + '/run/'
# ncbiblast database parameter needs special handling
if self.service == 'ncbiblast':
......@@ -423,8 +444,8 @@ class EBI(object):
else:
return results
def getUniProtDict(ids, cols="", db='uniprot', identities=None):
def getUniProtDict(ids, cols="", db='uniprot', identities=None):
"""
:param ids: The list of UniProt IDs
......@@ -439,11 +460,11 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
*** EXAMPLE USAGE ***
Get a list of UniProt IDs and a list of UniProt columns you're interested in.
Full list of UniProt column names - https://www.uniprot.org/help/uniprotkb_column_names
uniprot_names = ['Q9LIR4', 'Q1JUQ1', 'P05791', 'P0ADF6']
cols = ["lineage(SUPERKINGDOM)", "genes", "lineage(KINGDOM)"]
up_dict = getUniProtDict(uniprot_names, cols)
for record in up_dict:
print (record, up_dict[record].get("lineage(SUPERKINGDOM)"))
......@@ -452,22 +473,21 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
for record in up_dict:
print (record, up_dict[record].get("genes"))
If a record doesn't have an entry in UniProt for that column it'll just return None
print (up_dict['Q1JUQ1'])
print (up_dict['Q1JUQ1']['lineage(KINGDOM)'])
*** EXAMPLE USAGE FOR UNIREF SEARCHING ***
up_dict = getUniProtDict(["Q9LIR4", "P99999"], cols=["members"], db="uniref", identities = 1.0)
You can either pass a list of identities for each UniProt identifier (in which case the list of identities must be
the same size as the list of identifiers. Or you can just pass a single identity to search Uniref at.
"""
# Format the lists of IDs and columns correctly
cols = ",".join(cols)
......@@ -481,12 +501,14 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
if type(identities) != list:
identities = [identities] * len(ids)
elif len(identities) != len(ids):
raise RuntimeError('Either supply a single identity threshold or supply one for each identifier in the list')
raise RuntimeError(
'Either supply a single identity threshold or supply one for each identifier in the list')
# Check that the identity thresholds are valid values
for x in identities:
if x not in [1.0, 0.9, 0.5]:
raise RuntimeError("UniRef threshold values must be either 1.0, 0.9, or 0.5. Supplied value was - " + str(x))
raise RuntimeError(
"UniRef threshold values must be either 1.0, 0.9, or 0.5. Supplied value was - " + str(x))
# Add the query syntax around the identifiers
updated_ids = ""
......@@ -500,8 +522,6 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
url = 'https://www.uniprot.org/' + db + '/'
params = {
'format': 'tab',
'query': updated_ids,
......@@ -518,12 +538,12 @@ def getUniProtDict(ids, cols="", db='uniprot', identities=None):
# For each record we retrieve, split the line by tabs and build up the UniProt dict
for line in page.split("\n")[1:]:
if line:
splitlines= line.split("\t")
splitlines = line.split("\t")
id_dict = {}
pos = 1
for col in cols.split(","):
id_dict[col] = None if splitlines[pos] == "" else splitlines[pos]
pos +=1
pos += 1
up_dict[splitlines[0]] = id_dict
return up_dict
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment