Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
B
binfpy
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
opensource
binfpy
Commits
39b93af9
Commit
39b93af9
authored
Aug 17, 2022
by
Mikael Boden
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ASR_added
parent
ea1b47e1
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
418 additions
and
2 deletions
+418
-2
asr.py
asr.py
+329
-0
prob.py
prob.py
+26
-1
sequence.py
sequence.py
+63
-1
No files found.
asr.py
0 → 100644
View file @
39b93af9
This diff is collapsed.
Click to expand it.
prob.py
View file @
39b93af9
...
@@ -203,9 +203,19 @@ class Distrib():
...
@@ -203,9 +203,19 @@ class Distrib():
maxprob
=
self
[
sym
]
maxprob
=
self
[
sym
]
return
maxsym
return
maxsym
def
getProb
(
self
):
""" Return a list of (sym, prob) tuples, in order of their alphabet"""
return
[(
sym
,
self
.
prob
(
sym
))
for
sym
in
self
.
alpha
]
def
getBits
(
self
):
""" Return a list of (sym, bits) tuples, in order of their alphabet"""
H
=
sum
([
-
f
*
(
math
.
log2
(
f
)
if
f
>
0
else
999
)
for
f
in
self
.
prob
()])
I
=
math
.
log2
(
len
(
self
.
alpha
))
-
H
return
[(
s
,
self
.
prob
(
s
)
*
I
)
for
s
in
self
.
alpha
]
def
getsort
(
self
):
def
getsort
(
self
):
""" Return the list of symbols, in order of their probability. """
""" Return the list of symbols, in order of their probability. """
symlist
=
[
s
ym
for
(
sym
,
_
)
in
self
.
getProbsort
()]
symlist
=
[
s
for
(
s
,
_
)
in
self
.
getProbsort
()]
return
symlist
return
symlist
def
getProbsort
(
self
):
def
getProbsort
(
self
):
...
@@ -1089,3 +1099,18 @@ def lgamma(x):
...
@@ -1089,3 +1099,18 @@ def lgamma(x):
y
+=
1
y
+=
1
ser
+=
(
cof
[
j
]
/
y
)
ser
+=
(
cof
[
j
]
/
y
)
return
(
-
tmp
+
math
.
log
(
2.5066282746310005
*
ser
/
x
))
return
(
-
tmp
+
math
.
log
(
2.5066282746310005
*
ser
/
x
))
import
sequence
as
seq
if
__name__
==
'__main__'
:
myseqs
=
[
seq
.
Sequence
(
'TCCTAGCCCC'
),
seq
.
Sequence
(
'GCCGCCCCCA'
),
seq
.
Sequence
(
'ATCCGCCCGG'
),
seq
.
Sequence
(
'CCCCCGCCTT'
)]
mymc
=
MarkovChain
(
seq
.
DNA_Alphabet
)
for
myseq
in
myseqs
:
print
(
myseq
,
len
(
myseq
))
mymc
.
observe
(
myseq
)
for
t
in
mymc
.
transit
:
print
(
t
,
mymc
.
transit
[
t
])
sequence.py
View file @
39b93af9
...
@@ -1344,6 +1344,61 @@ class PWM(object):
...
@@ -1344,6 +1344,61 @@ class PWM(object):
maxindex
=
i
maxindex
=
i
return
(
maxscore
,
maxindex
)
return
(
maxscore
,
maxindex
)
def
readPWMs
(
filename
,
format
=
'MEME'
):
fh
=
open
(
filename
,
'rt'
)
VERSION
=
None
ALPHABET
=
None
STRANDS
=
None
BACKGROUND
=
None
MOTIF
=
None
URL
=
None
FOREGROUND
=
[]
NSITES
=
1
EXPECT
=
None
COLLECTION
=
{}
data
=
fh
.
read
()
lines
=
data
.
splitlines
()
for
line
in
lines
:
myline
=
line
.
strip
()
words
=
myline
.
split
();
if
EXPECT
==
None
:
try
:
if
myline
.
startswith
(
'MEME'
)
and
len
(
words
)
>
1
:
VERSION
=
words
[
len
(
words
)
-
1
]
elif
myline
.
startswith
(
'ALPHABET'
)
and
len
(
words
)
>
1
:
ALPHABET
=
Alphabet
(
words
[
len
(
words
)
-
1
])
elif
myline
.
startswith
(
'MOTIF'
)
and
len
(
words
)
>
1
:
if
MOTIF
!=
None
:
# we have one motif that needs to be stored first
COLLECTION
[
MOTIF
]
=
(
FOREGROUND
,
BACKGROUND
)
FOREGROUND
=
[]
MOTIF
=
words
[
1
]
elif
myline
.
startswith
(
'URL'
)
and
len
(
words
)
>
1
:
URL
=
words
[
1
]
elif
len
(
words
)
==
0
:
EXPECT
=
None
elif
not
myline
.
startswith
(
'#'
):
EXPECT
=
myline
if
EXPECT
.
startswith
(
'letter-probability matrix'
):
match
=
re
.
compile
(
r""".*nsites=\s*(?P<name>[0-9]*?)\s.*"""
,
re
.
VERBOSE
)
.
match
(
EXPECT
)
NSITES
=
int
(
match
.
group
(
'name'
))
except
:
print
(
'Error in format: '
+
line
)
return
None
elif
len
(
words
)
==
0
:
EXPECT
=
None
elif
EXPECT
.
startswith
(
'Background'
):
BACKGROUND
=
Distrib
(
ALPHABET
)
for
z
in
zip
(
words
[::
2
],
words
[
1
::
2
]):
BACKGROUND
.
observe
(
z
[
0
],
float
(
z
[
1
]))
elif
EXPECT
.
startswith
(
'letter-probability matrix'
):
d
=
Distrib
(
ALPHABET
)
for
z
in
zip
(
ALPHABET
.
symbols
,
words
):
d
.
observe
(
z
[
0
],
float
(
z
[
1
])
*
NSITES
)
FOREGROUND
.
append
(
d
)
# save last motif here
COLLECTION
[
MOTIF
]
=
(
FOREGROUND
,
BACKGROUND
)
return
COLLECTION
# Web Service Functions -------------------
# Web Service Functions -------------------
def
getSequence
(
id
,
database
=
'uniprotkb'
,
start
=
None
,
end
=
None
):
def
getSequence
(
id
,
database
=
'uniprotkb'
,
start
=
None
,
end
=
None
):
...
@@ -1438,7 +1493,7 @@ def runBLAST(sequence, program='blastp', database='uniprotkb', exp='1e-1'):
...
@@ -1438,7 +1493,7 @@ def runBLAST(sequence, program='blastp', database='uniprotkb', exp='1e-1'):
ids
.
append
(
id
.
split
(
':'
)[
1
])
ids
.
append
(
id
.
split
(
':'
)[
1
])
return
ids
return
ids
if
__name__
==
'__main__'
:
if
__name__
==
'__main__
1
'
:
aln
=
readClustalFile
(
'/Users/mikael/simhome/ASR/gappy.aln'
,
Protein_Alphabet
)
aln
=
readClustalFile
(
'/Users/mikael/simhome/ASR/gappy.aln'
,
Protein_Alphabet
)
x
,
g
,
i
=
aln
.
outliers
()
x
,
g
,
i
=
aln
.
outliers
()
for
s
in
range
(
len
(
aln
)):
for
s
in
range
(
len
(
aln
)):
...
@@ -1449,3 +1504,10 @@ if __name__ == '__main__':
...
@@ -1449,3 +1504,10 @@ if __name__ == '__main__':
if
idx
>=
0
:
if
idx
>=
0
:
print
(
'
\t
'
,
aln
[
s
]
.
sequence
[
idx
:])
print
(
'
\t
'
,
aln
[
s
]
.
sequence
[
idx
:])
print
((
'Read'
,
len
(
aln
),
'sequences'
))
print
((
'Read'
,
len
(
aln
),
'sequences'
))
if
__name__
==
'__main__'
:
motifs
=
readPWMs
(
'/Users/mikael/meme-5.4.1/motif_databases/PROKARYOTE/collectf.meme'
)
for
name
in
motifs
:
print
(
name
)
for
fg
in
motifs
[
name
][
0
]:
print
(
'
\t
'
,
fg
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment