Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
binfpy
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
opensource
binfpy
Commits
881688af
Commit
881688af
authored
Jul 21, 2017
by
Mikael Boden
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
webservice_in_Python_3
parent
de69764a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
122 additions
and
83 deletions
+122
-83
godata.py
godata.py
+1
-0
phylo.py
phylo.py
+27
-8
sequence.py
sequence.py
+66
-54
sym.py
sym.py
+11
-6
webservice.py
webservice.py
+17
-15
No files found.
godata.py
View file @
881688af
...
@@ -118,6 +118,7 @@ class GO():
...
@@ -118,6 +118,7 @@ class GO():
if
line
.
startswith
(
'!'
):
if
line
.
startswith
(
'!'
):
continue
continue
(
gene
,
symb
,
qual
,
term
,
evid
,
onto
,
taxa
)
=
_extractAnnotFields
(
line
,
annotfile_columns
)
(
gene
,
symb
,
qual
,
term
,
evid
,
onto
,
taxa
)
=
_extractAnnotFields
(
line
,
annotfile_columns
)
print
(
gene
,
symb
,
qual
,
term
,
evid
,
onto
,
taxa
)
try
:
try
:
(
taxa_q
,
terms_map
)
=
self
.
annots
[
gene
]
(
taxa_q
,
terms_map
)
=
self
.
annots
[
gene
]
terms_map
[
term
]
=
(
evid
,
qual
!=
'NOT'
)
terms_map
[
term
]
=
(
evid
,
qual
!=
'NOT'
)
...
...
phylo.py
View file @
881688af
...
@@ -48,7 +48,7 @@ class PhyloTree:
...
@@ -48,7 +48,7 @@ class PhyloTree:
If node does not exist, None is returned.
If node does not exist, None is returned.
If node has no descendants, an empty list will be returned."""
If node has no descendants, an empty list will be returned."""
if
not
isinstance
(
node
,
PhyloNode
):
if
not
isinstance
(
node
,
PhyloNode
):
node
=
self
.
root
.
findLabel
(
node
)
node
=
self
.
findLabel
(
node
)
if
node
:
if
node
:
return
node
.
getDescendants
(
transitive
)
return
node
.
getDescendants
(
transitive
)
return
None
return
None
...
@@ -60,22 +60,24 @@ class PhyloTree:
...
@@ -60,22 +60,24 @@ class PhyloTree:
If node does not exist, None is returned.
If node does not exist, None is returned.
If node is the root of the tree, None is returned."""
If node is the root of the tree, None is returned."""
if
not
isinstance
(
node
,
PhyloNode
):
if
not
isinstance
(
node
,
PhyloNode
):
node
=
self
.
root
.
findLabel
(
node
)
node
=
self
.
findLabel
(
node
)
if
node
:
if
node
:
myroot
=
self
.
root
myroot
=
self
.
root
found
=
False
found
=
False
branching
=
[]
branching
=
[]
while
not
found
and
myroot
!=
None
:
while
not
found
and
myroot
!=
None
:
branching
.
append
(
myroot
)
branching
.
append
(
myroot
)
# check if "myroot" is a leaf node, i.e. does not have children
if
myroot
.
left
==
node
or
myroot
.
right
==
node
:
if
myroot
.
left
==
node
or
myroot
.
right
==
node
:
found
=
True
found
=
True
break
break
if
myroot
.
left
:
if
myroot
.
left
!=
None
:
# myroot has a "left" child
if
myroot
.
left
.
isAncestorOf
(
node
,
transitive
=
True
):
# check if the "left" child of "myroot" is the ancestor of "node"
myroot
=
myroot
.
left
if
myroot
.
left
.
isAncestorOf
(
node
,
transitive
=
True
):
# if yes,
else
:
# must be right branch then...
myroot
=
myroot
.
left
# move to the "left" child
myroot
=
myroot
.
right
else
:
# if not,
else
:
# must be right branch then...
myroot
=
myroot
.
right
# move to the "right" child
else
:
# myroot does NOT have a "left" child, so let's move "right"
myroot
=
myroot
.
right
myroot
=
myroot
.
right
if
found
and
transitive
:
if
found
and
transitive
:
return
branching
return
branching
...
@@ -91,6 +93,8 @@ class PhyloTree:
...
@@ -91,6 +93,8 @@ class PhyloTree:
self
.
root
.
_backwardParsimony
(
self
.
aln
)
# use scores to determine sequences
self
.
root
.
_backwardParsimony
(
self
.
aln
)
# use scores to determine sequences
return
self
.
root
.
getSequence
()
# return the sequence found at the root
return
self
.
root
.
getSequence
()
# return the sequence found at the root
def
canonise
(
self
):
self
.
root
.
_canonise
()
class
PhyloNode
:
class
PhyloNode
:
""" A class for a node in a rooted, binary (bifurcating) tree.
""" A class for a node in a rooted, binary (bifurcating) tree.
...
@@ -212,6 +216,18 @@ class PhyloNode:
...
@@ -212,6 +216,18 @@ class PhyloNode:
self
.
sequence
=
seq
self
.
sequence
=
seq
break
break
def
_canonise
(
self
):
if
self
.
left
==
None
and
self
.
right
==
None
:
# at leaf
return
self
.
label
myleft
=
self
.
left
.
_canonise
()
myright
=
self
.
right
.
_canonise
();
if
myleft
>
myright
:
tmpnode
=
self
.
left
self
.
left
=
self
.
right
self
.
right
=
tmpnode
return
myright
return
myleft
def
_forwardParsimony
(
self
,
aln
):
def
_forwardParsimony
(
self
,
aln
):
""" Internal function that operates recursively to first initialise each node (forward),
""" Internal function that operates recursively to first initialise each node (forward),
stopping only once a sequence has been assigned to the node,
stopping only once a sequence has been assigned to the node,
...
@@ -459,3 +475,6 @@ def readNewick(filename):
...
@@ -459,3 +475,6 @@ def readNewick(filename):
string
=
''
.
join
(
f
)
string
=
''
.
join
(
f
)
return
parseNewick
(
string
)
return
parseNewick
(
string
)
def
writeNewickFile
(
filename
,
my_tree
):
with
open
(
filename
,
'w'
)
as
fh
:
print
(
my_tree
,
end
=
""
,
file
=
fh
)
sequence.py
View file @
881688af
...
@@ -57,13 +57,10 @@ class Sequence(object):
...
@@ -57,13 +57,10 @@ class Sequence(object):
['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q',
['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q',
'R', 'S', 'T', 'V', 'W', 'Y'] """
'R', 'S', 'T', 'V', 'W', 'Y'] """
#try: # convert sequence data into a compact array representation
# self.sequence = sequence.encode("utf-8") #array.array('b', ''.join([s.upper() for s in sequence]))
#except TypeError:
# raise RuntimeError('S"""""""""""""""""""""""""""""""equence data is not specified correctly: must be iterable')
self
.
sequence
=
sequence
self
.
sequence
=
sequence
# Assign an alphabet
# Assign an alphabet
# If no alphabet is provided, attempts to identify the alphabet from sequence
self
.
alphabet
=
None
self
.
alphabet
=
None
if
not
alphabet
is
None
:
if
not
alphabet
is
None
:
for
sym
in
self
.
sequence
:
for
sym
in
self
.
sequence
:
...
@@ -93,7 +90,7 @@ class Sequence(object):
...
@@ -93,7 +90,7 @@ class Sequence(object):
def
__len__
(
self
):
def
__len__
(
self
):
""" Defines what the "len" operator returns for an instance of Sequence, e.g.
""" Defines what the "len" operator returns for an instance of Sequence, e.g.
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> print(len(seq))
>>> print
(len(seq))
9
9
"""
"""
return
len
(
self
.
sequence
)
return
len
(
self
.
sequence
)
...
@@ -109,7 +106,7 @@ class Sequence(object):
...
@@ -109,7 +106,7 @@ class Sequence(object):
""" Defines how a Sequence should be "iterated", i.e. what its elements are, e.g.
""" Defines how a Sequence should be "iterated", i.e. what its elements are, e.g.
>>> seq = Sequence('AGGAT', DNA_Alphabet)
>>> seq = Sequence('AGGAT', DNA_Alphabet)
>>> for sym in seq:
>>> for sym in seq:
print(sym)
print
(sym)
will print A, G, G, A, T (each on a separate row)
will print A, G, G, A, T (each on a separate row)
"""
"""
tsyms
=
tuple
(
self
.
sequence
)
tsyms
=
tuple
(
self
.
sequence
)
...
@@ -118,12 +115,12 @@ class Sequence(object):
...
@@ -118,12 +115,12 @@ class Sequence(object):
def
__contains__
(
self
,
item
):
def
__contains__
(
self
,
item
):
""" Defines what is returned when the "in" operator is used on a Sequence, e.g.
""" Defines what is returned when the "in" operator is used on a Sequence, e.g.
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> seq = Sequence('ACGGTAGGA', DNA_Alphabet)
>>> print('T' in seq)
>>> print
('T' in seq)
True
True
which is equivalent to
which is equivalent to
>>> print(seq.__contains__('T'))
>>> print
(seq.__contains__('T'))
True
True
>>> print('X' in seq)
>>> print
('X' in seq)
False
False
"""
"""
for
sym
in
self
.
sequence
:
for
sym
in
self
.
sequence
:
...
@@ -320,11 +317,10 @@ class Alignment():
...
@@ -320,11 +317,10 @@ class Alignment():
""" A sequence alignment class. Stores two or more sequences of equal length where
""" A sequence alignment class. Stores two or more sequences of equal length where
one symbol is gap '-'
one symbol is gap '-'
Example usage:
Example usage:
>>> seqs = [Sequence('THIS-LI-NE', Protein_Alphabet, gappy = True), Sequence('--ISALIGNED', Protein_Alphabet, gappy = True)]
>>> seqs = [Sequence('THIS-LI-NE
-
', Protein_Alphabet, gappy = True), Sequence('--ISALIGNED', Protein_Alphabet, gappy = True)]
>>> print(Alignment(seqs))
>>> print
(Alignment(seqs))
THIS-LI-NE-
THIS-LI-NE-
--ISALIGNED
--ISALIGNED """
"""
alignlen
=
None
alignlen
=
None
seqs
=
None
seqs
=
None
...
@@ -381,7 +377,7 @@ class Alignment():
...
@@ -381,7 +377,7 @@ class Alignment():
maxNameLength
=
self
.
getnamelen
()
+
1
maxNameLength
=
self
.
getnamelen
()
+
1
string
=
''
string
=
''
wholeRows
=
self
.
alignlen
/
symbolsPerLine
wholeRows
=
self
.
alignlen
/
symbolsPerLine
for
i
in
range
(
wholeRows
):
for
i
in
range
(
int
(
wholeRows
)
):
for
j
in
range
(
len
(
self
.
seqs
)):
for
j
in
range
(
len
(
self
.
seqs
)):
string
+=
self
.
seqs
[
j
]
.
name
.
ljust
(
maxNameLength
)
+
' '
string
+=
self
.
seqs
[
j
]
.
name
.
ljust
(
maxNameLength
)
+
' '
string
+=
self
.
seqs
[
j
][
i
*
symbolsPerLine
:(
i
+
1
)
*
symbolsPerLine
]
+
'
\n
'
string
+=
self
.
seqs
[
j
][
i
*
symbolsPerLine
:(
i
+
1
)
*
symbolsPerLine
]
+
'
\n
'
...
@@ -646,49 +642,65 @@ class Alignment():
...
@@ -646,49 +642,65 @@ class Alignment():
distmat
[
i
,
j
]
=
distmat
[
j
,
i
]
=
dist
distmat
[
i
,
j
]
=
distmat
[
j
,
i
]
=
dist
return
distmat
return
distmat
def
writeHTML
(
self
,
filename
=
None
):
def
writeHTML
(
self
,
filename
=
None
):
""" Generate HTML that displays the alignment in color.
""" Generate HTML that displays the alignment in color.
Requires that the alphabet is annotated with the label 'html-color' (see Sequence.annotateSym)
Requires that the alphabet is annotated with the label 'html-color' (see Sequence.annotateSym)
and that each symbol maps to a text string naming the color, e.g. 'blue'
and that each symbol maps to a text string naming the color, e.g. 'blue'
"""
"""
if
filename
==
None
:
html
=
'''<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
\n
<title>Sequence Alignment</title>
\n
</head><body><pre>
\n
'''
htmlstr
=
'<pre>
\n
'
else
:
htmlstr
=
'<html><head><meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
\n
<title>Sequence Alignment</title>
\n
</head><body><pre>
\n
'
maxNameLength
=
self
.
getnamelen
()
maxNameLength
=
self
.
getnamelen
()
html
=
''
.
ljust
(
maxNameLength
)
+
' '
html
+
=
''
.
ljust
(
maxNameLength
)
+
' '
for
i
in
range
(
self
.
alignlen
-
1
):
for
i
in
range
(
self
.
alignlen
-
1
):
if
(
i
+
1
)
%
10
==
0
:
if
(
i
+
1
)
%
10
==
0
:
html
+=
str
(
i
/
10
+
1
)[
-
1
]
html
+=
str
(
i
/
10
+
1
)[
0
]
else
:
else
:
html
+=
' '
html
+=
' '
html
+=
'
%
s
\n
'
%
(
self
.
alignlen
)
html
+=
'
%
s
\n
'
%
(
self
.
alignlen
)
htmlstr
+=
html
if
self
.
alignlen
>
10
:
if
self
.
alignlen
>
10
:
html
=
''
.
ljust
(
maxNameLength
)
+
' '
html
+
=
''
.
ljust
(
maxNameLength
)
+
' '
for
i
in
range
(
self
.
alignlen
-
1
):
for
i
in
range
(
self
.
alignlen
-
1
):
if
(
i
+
1
)
%
10
==
0
:
if
(
i
+
1
)
%
10
==
0
:
html
+=
'0'
index
=
len
(
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
])
html
+=
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
][(
index
*
-
1
)
+
1
]
if
(
len
(
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
])
>
1
)
else
'0'
else
:
html
+=
' '
html
+=
'
\n
'
if
self
.
alignlen
>
100
:
html
+=
''
.
ljust
(
maxNameLength
)
+
' '
for
i
in
range
(
self
.
alignlen
-
1
):
if
(
i
+
1
)
%
10
==
0
and
i
>=
99
:
index
=
len
(
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
])
html
+=
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
][
-
1
]
if
(
len
(
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
])
>
2
)
else
'0'
else
:
html
+=
' '
html
+=
'
\n
'
if
self
.
alignlen
>
1000
:
html
+=
''
.
ljust
(
maxNameLength
)
+
' '
for
i
in
range
(
self
.
alignlen
-
1
):
if
(
i
+
1
)
%
10
==
0
:
html
+=
'0'
if
(
len
(
str
(
i
/
10
+
1
)
.
split
(
'.'
)[
0
])
>
2
)
else
' '
else
:
else
:
html
+=
' '
html
+=
' '
html
+=
'
\n
'
html
+=
'
\n
'
htmlstr
+=
html
for
seq
in
self
.
seqs
:
for
seq
in
self
.
seqs
:
html
=
seq
.
name
.
ljust
(
maxNameLength
)
+
' '
html
+
=
seq
.
name
.
ljust
(
maxNameLength
)
+
' '
for
sym
in
seq
:
for
sym
in
seq
:
color
=
self
.
alphabet
.
getAnnotation
(
'html-color'
,
sym
)
color
=
self
.
alphabet
.
getAnnotation
(
'html-color'
,
sym
)
if
not
color
:
if
not
color
:
color
=
'white'
color
=
'white'
html
+=
'<font style="BACKGROUND-COLOR:
%
s">
%
s</font>'
%
(
color
,
sym
)
html
+=
'<font style="BACKGROUND-COLOR:
%
s">
%
s</font>'
%
(
color
,
sym
)
html
+=
'
\n
'
html
+=
'
\n
'
htmlstr
+=
html
html
+=
'</pre></body></html>'
htmlstr
+=
'<pre>'
if
filename
:
if
filename
:
with
open
(
filename
,
'w+'
)
as
fh
:
fh
=
open
(
filename
,
'w'
)
fh
.
write
(
htmlstr
)
fh
.
write
(
html
)
fh
.
write
(
'</body></html>
\n
'
)
fh
.
close
()
else
:
return
html
return
htmlstr
def
saveConsensus
(
aln
,
theta1
=
0.99
,
theta2
=
0.01
,
countgaps
=
False
,
consensus
=
True
,
filename
=
None
):
def
saveConsensus
(
aln
,
theta1
=
0.99
,
theta2
=
0.01
,
countgaps
=
False
,
consensus
=
True
,
filename
=
None
):
""" Display a table with rows for each alignment column, showing
""" Display a table with rows for each alignment column, showing
...
@@ -1239,7 +1251,7 @@ def getSequence(id, database = 'uniprotkb', start=None, end=None):
...
@@ -1239,7 +1251,7 @@ def getSequence(id, database = 'uniprotkb', start=None, end=None):
for
i
in
range
(
MAX_TRY
):
for
i
in
range
(
MAX_TRY
):
try
:
try
:
fastaData
=
fetch
(
id
,
database
)
.
decode
(
"utf-8"
)
fastaData
=
fetch
(
id
,
database
)
seq
=
readFasta
(
fastaData
)[
0
]
seq
=
readFasta
(
fastaData
)[
0
]
break
break
except
:
except
:
...
...
sym.py
View file @
881688af
...
@@ -121,22 +121,27 @@ this module is imported """
...
@@ -121,22 +121,27 @@ this module is imported """
Bool_Alphabet
=
Alphabet
(
'TF'
)
Bool_Alphabet
=
Alphabet
(
'TF'
)
DNA_Alphabet
=
Alphabet
(
'ACGT'
)
DNA_Alphabet
=
Alphabet
(
'ACGT'
)
DNA_Alphabet_wN
=
Alphabet
(
'ACGTN'
)
DNA_Alphabet_wN
=
Alphabet
(
'ACGTN'
)
RNA_Alphabet_wN
=
Alphabet
(
'ACGUN'
)
RNA_Alphabet
=
Alphabet
(
'ACGU'
)
RNA_Alphabet
=
Alphabet
(
'ACGU'
)
Protein_Alphabet
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY'
)
Protein_Alphabet
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY'
)
Protein_Alphabet_wX
=
Protein_wX
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWYX'
)
Protein_Alphabet_wX
=
Protein_wX
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWYX'
)
Protein_Alphabet_wSTOP
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY*'
)
Protein_Alphabet_wSTOP
=
Protein_wSTOP
=
Alphabet
(
'ACDEFGHIKLMNPQRSTVWY*'
)
DSSP_Alphabet
=
Alphabet
(
'GHITEBSC'
)
DSSP_Alphabet
=
Alphabet
(
'GHITEBSC'
)
DSSP3_Alphabet
=
Alphabet
(
'HEC'
)
DSSP3_Alphabet
=
Alphabet
(
'HEC'
)
predefAlphabets
=
{
'DNA'
:
DNA_Alphabet
,
predefAlphabets
=
{
'Bool_Alphabet'
:
Bool_Alphabet
,
'DNA'
:
DNA_Alphabet
,
'RNA'
:
RNA_Alphabet
,
'RNA'
:
RNA_Alphabet
,
'DNAwN'
:
Alphabet
(
'ACGTN'
)
,
'DNAwN'
:
RNA_Alphabet_wN
,
'RNAwN'
:
Alphabet
(
'ACGUN'
)
,
'RNAwN'
:
DNA_Alphabet_wN
,
'Protein'
:
Protein_Alphabet
,
'Protein'
:
Protein_Alphabet
,
'ProteinwX'
:
Protein_wX
}
'ProteinwX'
:
Protein_wX
,
'ProteinwSTOP'
:
Protein_wSTOP
,
'DSSP_Alphabet'
:
DSSP_Alphabet
,
'DSSP3_Alphabet'
:
DSSP3_Alphabet
}
# The preferred order in which a predefined alphabet is assigned to a sequence
# The preferred order in which a predefined alphabet is assigned to a sequence
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
# (e.g., we'd want to assign DNA to 'AGCT', even though Protein is also valid)
preferredOrder
=
[
'
DNA'
,
'RNA'
,
'DNAwN'
,
'RNAwN'
,
'Protein'
,
'ProteinwX
'
]
preferredOrder
=
[
'
Bool_Alphabet'
,
'DNA'
,
'RNA'
,
'DNAwN'
,
'RNAwN'
,
'Protein'
,
'ProteinwX'
,
'ProteinwSTOP'
,
'DSSP_Alphabet'
,
'DSSP3_Alphabet
'
]
# Useful annotations
# Useful annotations
DNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'T'
:
'#66bbff'
})
DNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'T'
:
'#66bbff'
})
RNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'U'
:
'#66bbff'
})
RNA_Alphabet
.
annotateAll
(
'html-color'
,
{
'A'
:
'green'
,
'C'
:
'orange'
,
'G'
:
'red'
,
'U'
:
'#66bbff'
})
...
...
webservice.py
View file @
881688af
...
@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
...
@@ -32,11 +32,13 @@ def fetch(entryId, dbName='uniprotkb', format='fasta'):
url
=
__ebiUrl__
+
'dbfetch/dbfetch?style=raw&db='
+
dbName
+
'&format='
+
format
+
'&id='
+
entryId
url
=
__ebiUrl__
+
'dbfetch/dbfetch?style=raw&db='
+
dbName
+
'&format='
+
format
+
'&id='
+
entryId
# Get the entry
# Get the entry
try
:
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
if
data
.
startswith
(
b
'ERROR'
):
print
(
type
(
data
))
if
data
.
startswith
(
"ERROR"
):
raise
RuntimeError
(
data
)
raise
RuntimeError
(
data
)
return
data
return
data
except
(
urllib
.
error
.
HTTPError
,
ex
):
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
def
search
(
query
,
dbName
=
'uniprot'
,
format
=
'list'
,
limit
=
100
):
def
search
(
query
,
dbName
=
'uniprot'
,
format
=
'list'
,
limit
=
100
):
...
@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
...
@@ -57,12 +59,12 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
__uniprotUrl__
+
dbName
+
'/?format='
+
format
+
'&limit='
+
str
(
limit
)
+
'&query='
+
query
url
=
__uniprotUrl__
+
dbName
+
'/?format='
+
format
+
'&limit='
+
str
(
limit
)
+
'&query='
+
query
# Get the entries
# Get the entries
try
:
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
if
format
==
'list'
:
if
format
==
'list'
:
return
data
.
splitlines
()
return
data
.
splitlines
()
else
:
else
:
return
data
return
data
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
elif
dbName
.
startswith
(
'refseq'
):
elif
dbName
.
startswith
(
'refseq'
):
dbs
=
dbName
.
split
(
":"
)
dbs
=
dbName
.
split
(
":"
)
...
@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
...
@@ -72,7 +74,7 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
base
+
"esearch.fcgi?db="
+
dbName
+
"&term="
+
query
+
"&retmax="
+
str
(
limit
)
url
=
base
+
"esearch.fcgi?db="
+
dbName
+
"&term="
+
query
+
"&retmax="
+
str
(
limit
)
# Get the entries
# Get the entries
try
:
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
words
=
data
.
split
(
"</Id>"
)
words
=
data
.
split
(
"</Id>"
)
words
=
[
w
[
w
.
find
(
"<Id>"
)
+
4
:]
for
w
in
words
[:
-
1
]]
words
=
[
w
[
w
.
find
(
"<Id>"
)
+
4
:]
for
w
in
words
[:
-
1
]]
if
format
==
'list'
:
if
format
==
'list'
:
...
@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
...
@@ -81,11 +83,11 @@ def search(query, dbName='uniprot', format='list', limit=100):
url
=
base
+
"efetch.fcgi?db="
+
dbName
+
"&rettype=fasta&id="
url
=
base
+
"efetch.fcgi?db="
+
dbName
+
"&rettype=fasta&id="
for
w
in
words
:
for
w
in
words
:
url
+=
w
+
","
url
+=
w
+
","
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
return
data
return
data
else
:
else
:
return
''
return
''
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
return
return
...
@@ -199,7 +201,7 @@ def getGODef(goterm):
...
@@ -199,7 +201,7 @@ def getGODef(goterm):
# Get the entry: fill in the fields specified below
# Get the entry: fill in the fields specified below
try
:
try
:
entry
=
{
'id'
:
None
,
'name'
:
None
,
'def'
:
None
}
entry
=
{
'id'
:
None
,
'name'
:
None
,
'def'
:
None
}
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
():
for
row
in
data
.
splitlines
():
index
=
row
.
find
(
':'
)
index
=
row
.
find
(
':'
)
if
index
>
0
and
len
(
row
[
index
:])
>
1
:
if
index
>
0
and
len
(
row
[
index
:])
>
1
:
...
@@ -209,7 +211,7 @@ def getGODef(goterm):
...
@@ -209,7 +211,7 @@ def getGODef(goterm):
if
entry
[
field
]
==
None
:
# check if not yet assigned
if
entry
[
field
]
==
None
:
# check if not yet assigned
entry
[
field
]
=
value
entry
[
field
]
=
value
return
entry
return
entry
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
def
getGOTerms
(
genes
,
database
=
'UniProtKB'
,
completeAnnot
=
False
):
def
getGOTerms
(
genes
,
database
=
'UniProtKB'
,
completeAnnot
=
False
):
...
@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
...
@@ -252,9 +254,9 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
if
response
.
info
()
.
get
(
'Content-Encoding'
)
==
'gzip'
:
if
response
.
info
()
.
get
(
'Content-Encoding'
)
==
'gzip'
:
buf
=
StringIO
(
response
.
read
())
buf
=
StringIO
(
response
.
read
())
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
f
=
gzip
.
GzipFile
(
fileobj
=
buf
)
data
=
f
.
read
()
data
=
f
.
read
()
.
decode
(
"utf-8"
)
else
:
else
:
data
=
response
.
read
()
data
=
response
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
values
=
row
.
split
(
'
\t
'
)
values
=
row
.
split
(
'
\t
'
)
if
len
(
values
)
>=
7
:
if
len
(
values
)
>=
7
:
...
@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
...
@@ -264,7 +266,7 @@ def getGOTerms(genes, database='UniProtKB', completeAnnot = False):
else
:
else
:
termsmap
[
key
]
=
set
([
values
[
6
]])
termsmap
[
key
]
=
set
([
values
[
6
]])
taxonmap
[
key
]
=
int
(
values
[
4
])
taxonmap
[
key
]
=
int
(
values
[
4
])
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
if
completeAnnot
:
if
completeAnnot
:
if
len
(
genes
)
==
1
:
if
len
(
genes
)
==
1
:
...
@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
...
@@ -304,13 +306,13 @@ def getGenes(goterms, database='UniProtKB', taxo=None):
url
=
__ebiGOUrl__
+
uri_string
+
goterm
.
strip
()
url
=
__ebiGOUrl__
+
uri_string
+
goterm
.
strip
()
# Get the entry: fill in the fields specified below
# Get the entry: fill in the fields specified below
try
:
try
:
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
data
=
urllib
.
request
.
urlopen
(
url
)
.
read
()
.
decode
(
"utf-8"
)
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
for
row
in
data
.
splitlines
()[
1
:]:
# we ignore first (header) row
values
=
row
.
split
(
'
\t
'
)
values
=
row
.
split
(
'
\t
'
)
if
len
(
values
)
>=
7
:
if
len
(
values
)
>=
7
:
genes
.
add
(
values
[
1
])
genes
.
add
(
values
[
1
])
map
[
goterm
]
=
list
(
genes
)
map
[
goterm
]
=
list
(
genes
)
except
(
urllib
.
error
.
HTTPError
,
ex
)
:
except
urllib
.
error
.
HTTPError
as
ex
:
raise
RuntimeError
(
ex
.
read
())
raise
RuntimeError
(
ex
.
read
())
if
len
(
goterms
)
==
1
:
if
len
(
goterms
)
==
1
:
return
map
[
goterms
[
0
]]
return
map
[
goterms
[
0
]]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment