Commit 8625943a authored by Mikael Boden's avatar Mikael Boden

guide.py

parent b493fe5d
...@@ -485,9 +485,9 @@ def readFastaFile(filename, alphabet): ...@@ -485,9 +485,9 @@ def readFastaFile(filename, alphabet):
def writeFastaFile(filename, seqs): def writeFastaFile(filename, seqs):
""" Write the specified sequences to a FASTA file. """ """ Write the specified sequences to a FASTA file. """
fh = open(filename, 'w') fh = open(filename, 'wt')
for seq in seqs: for seq in seqs:
fh.write(str(seq)) fh.write(seq.writeFasta())
fh.close() fh.close()
def readClustalString(string, alphabet): def readClustalString(string, alphabet):
...@@ -524,7 +524,7 @@ def readClustalFile(filename, alphabet): ...@@ -524,7 +524,7 @@ def readClustalFile(filename, alphabet):
def writeClustalFile(filename, aln): def writeClustalFile(filename, aln):
""" Write the specified alignment to a Clustal file. """ """ Write the specified alignment to a Clustal file. """
fh = open(filename, 'w') fh = open(filename, 'wt')
fh.write('CLUSTAL W (1.83) multiple sequence alignment\n\n\n') # fake header so that clustal believes it fh.write('CLUSTAL W (1.83) multiple sequence alignment\n\n\n') # fake header so that clustal believes it
fh.write(aln.writeClustal()) fh.write(aln.writeClustal())
fh.close() fh.close()
...@@ -670,21 +670,26 @@ def readGeoFile(filename, id_column = 0): ...@@ -670,21 +670,26 @@ def readGeoFile(filename, id_column = 0):
# Our implementations are mainly serviced by EBI. # Our implementations are mainly serviced by EBI.
############################################################################### ###############################################################################
def getSequence(entryId, dbName, alphabet): def getSequence(entryId, dbName = 'uniprotkb', alphabet = Protein_Alphabet, format = 'fasta'):
""" Retrieve a single entry from a database """ Retrieve a single entry from a database
entryId: ID for entry e.g. 'P63166' or 'SUMO1_MOUSE' entryId: ID for entry e.g. 'P63166' or 'SUMO1_MOUSE'
dbName: name of db e.g. 'uniprotkb', 'pdb' or 'refseqn'. dbName: name of database e.g. 'uniprotkb' or 'pdb' or 'refseqn'; see http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/dbfetch.databases for available databases
See: http://www.uniprot.org/faq/28. """ format: file format specific to database e.g. 'fasta' or 'uniprot' for uniprotkb (see http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/dbfetch.databases)
See http://www.ebi.ac.uk/Tools/dbfetch/syntax.jsp for more info re URL syntax
"""
if not isinstance(entryId, str): if not isinstance(entryId, str):
entryId = entryId.decode("utf-8") entryId = entryId.decode("utf-8")
url ='http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?style=raw&db=' + dbName + '&format=fasta&id=' + entryId url ='http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?style=raw&db=' + dbName + '&format=' + format + '&id=' + entryId
try: try:
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read()
return readFastaString(data.decode("utf-8"), alphabet)[0] if format == 'fasta':
return readFastaString(data.decode("utf-8"), alphabet)[0]
else:
return data.decode("utf-8")
except urllib.error.HTTPError as ex: except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
def searchSequences(query, dbName): def searchSequences(query, dbName = 'uniprot'):
""" """
Retrieve multiple entries matching query from a database currently only via UniProtKB Retrieve multiple entries matching query from a database currently only via UniProtKB
query: search term(s) e.g. 'organism:9606+AND+antigen' query: search term(s) e.g. 'organism:9606+AND+antigen'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment