Commit f396eeed authored by Mikael Boden's avatar Mikael Boden

bugs_fixed_webservice_GO

parent 6b05a37e
...@@ -186,9 +186,9 @@ def getGOTerms(genes): ...@@ -186,9 +186,9 @@ def getGOTerms(genes):
if type(genes) != list and type(genes) != set and type(genes) != tuple: if type(genes) != list and type(genes) != set and type(genes) != tuple:
genes = [genes] genes = [genes]
map = dict() map = dict()
uri_string = 'annotation/search?geneProductId='
batchsize = 100 # size of query batch batchsize = 100 # size of query batch
genecnt = 0 genecnt = 0
limitpage = 100 # number of record on each returned page
while genecnt < len(genes): while genecnt < len(genes):
genebatch = [] genebatch = []
for index in range(batchsize): for index in range(batchsize):
...@@ -197,16 +197,19 @@ def getGOTerms(genes): ...@@ -197,16 +197,19 @@ def getGOTerms(genes):
else: else:
break break
genecnt += 1 genecnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&geneProductId='
for i in range(len(genebatch)): for i in range(len(genebatch)):
gene = genebatch[i] gene = genebatch[i]
uri_string += gene + "," if i < len(genes) - 1 else gene uri_string += gene + "," if i < len(genebatch) - 1 else gene
# Construct URL # Construct URL
url = __ebiGOUrl__ + uri_string # Get the entry: fill in the fields specified below
# Get the entry: fill in the fields specified below # first turn off server certificate verification
try: if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
# first turn off server certificate verification ssl._create_default_https_context = ssl._create_unverified_context
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): page = 1
ssl._create_default_https_context = ssl._create_unverified_context try:
while (True):
url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
urlreq = urllib.request.Request(url) urlreq = urllib.request.Request(url)
urlreq.add_header('Accept-encoding', 'gzip') urlreq.add_header('Accept-encoding', 'gzip')
response = urllib.request.urlopen(urlreq) response = urllib.request.urlopen(urlreq)
...@@ -217,6 +220,8 @@ def getGOTerms(genes): ...@@ -217,6 +220,8 @@ def getGOTerms(genes):
else: else:
data = response.read().decode("utf-8") data = response.read().decode("utf-8")
ret = json.loads(data) ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']: for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9" genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080" gotermid = row['goId'] # would look like "GO:0002080"
...@@ -224,8 +229,11 @@ def getGOTerms(genes): ...@@ -224,8 +229,11 @@ def getGOTerms(genes):
map[genename] = set([gotermid]) map[genename] = set([gotermid])
else: else:
map[genename].add(gotermid) map[genename].add(gotermid)
except urllib.error.HTTPError as ex: if len(ret['results']) < limitpage:
raise RuntimeError(ex.read()) break
page += 1
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
return map return map
def getGenes(goterms, taxo=None): def getGenes(goterms, taxo=None):
...@@ -238,26 +246,52 @@ def getGenes(goterms, taxo=None): ...@@ -238,26 +246,52 @@ def getGenes(goterms, taxo=None):
if type(goterms) != list and type(goterms) != set and type(goterms) != tuple: if type(goterms) != list and type(goterms) != set and type(goterms) != tuple:
goterms = [goterms] goterms = [goterms]
map = dict() map = dict()
uri_string = 'annotation/search?taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId=' batchsize = 10 # size of query batch
for i in range(len(goterms)): termcnt = 0
goterm = goterms[i] limitpage = 100 # number of record on each returned page
uri_string += goterm + "," if i < len(goterms) - 1 else goterm while termcnt < len(goterms):
# Get the entry: fill in the fields specified below termbatch = []
try: for index in range(batchsize):
if termcnt < len(goterms):
termbatch.append(goterms[termcnt])
else:
break
termcnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)):
term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term
# first turn off server certificate verification # first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context ssl._create_default_https_context = ssl._create_unverified_context
data = urllib.request.urlopen(__ebiGOUrl__ + uri_string).read().decode("utf-8") page = 1
ret = json.loads(data) try:
for row in ret['results']: while (True):
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9" url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
gotermid = row['goId'] # would look like "GO:0002080" urlreq = urllib.request.Request(url)
if not gotermid in map: urlreq.add_header('Accept-encoding', 'gzip')
map[gotermid] = set([genename]) response = urllib.request.urlopen(urlreq)
else: if response.info().get('Content-Encoding') == 'gzip':
map[gotermid].add(genename) buf = StringIO(response.read())
except urllib.error.HTTPError as ex: f = gzip.GzipFile(fileobj=buf)
raise RuntimeError(ex.read()) data = f.read().decode("utf-8")
else:
data = response.read().decode("utf-8")
ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080"
if not gotermid in map:
map[gotermid] = set([genename])
else:
map[gotermid].add(genename)
if len(ret['results']) < limitpage:
break
page += 1
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
return map return map
class EBI(object): class EBI(object):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment