Commit f396eeed authored by Mikael Boden's avatar Mikael Boden

bugs_fixed_webservice_GO

parent 6b05a37e
...@@ -186,9 +186,9 @@ def getGOTerms(genes): ...@@ -186,9 +186,9 @@ def getGOTerms(genes):
if type(genes) != list and type(genes) != set and type(genes) != tuple: if type(genes) != list and type(genes) != set and type(genes) != tuple:
genes = [genes] genes = [genes]
map = dict() map = dict()
uri_string = 'annotation/search?geneProductId='
batchsize = 100 # size of query batch batchsize = 100 # size of query batch
genecnt = 0 genecnt = 0
limitpage = 100 # number of record on each returned page
while genecnt < len(genes): while genecnt < len(genes):
genebatch = [] genebatch = []
for index in range(batchsize): for index in range(batchsize):
...@@ -197,16 +197,19 @@ def getGOTerms(genes): ...@@ -197,16 +197,19 @@ def getGOTerms(genes):
else: else:
break break
genecnt += 1 genecnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&geneProductId='
for i in range(len(genebatch)): for i in range(len(genebatch)):
gene = genebatch[i] gene = genebatch[i]
uri_string += gene + "," if i < len(genes) - 1 else gene uri_string += gene + "," if i < len(genebatch) - 1 else gene
# Construct URL # Construct URL
url = __ebiGOUrl__ + uri_string
# Get the entry: fill in the fields specified below # Get the entry: fill in the fields specified below
try:
# first turn off server certificate verification # first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context ssl._create_default_https_context = ssl._create_unverified_context
page = 1
try:
while (True):
url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
urlreq = urllib.request.Request(url) urlreq = urllib.request.Request(url)
urlreq.add_header('Accept-encoding', 'gzip') urlreq.add_header('Accept-encoding', 'gzip')
response = urllib.request.urlopen(urlreq) response = urllib.request.urlopen(urlreq)
...@@ -217,6 +220,8 @@ def getGOTerms(genes): ...@@ -217,6 +220,8 @@ def getGOTerms(genes):
else: else:
data = response.read().decode("utf-8") data = response.read().decode("utf-8")
ret = json.loads(data) ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']: for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9" genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080" gotermid = row['goId'] # would look like "GO:0002080"
...@@ -224,6 +229,9 @@ def getGOTerms(genes): ...@@ -224,6 +229,9 @@ def getGOTerms(genes):
map[genename] = set([gotermid]) map[genename] = set([gotermid])
else: else:
map[genename].add(gotermid) map[genename].add(gotermid)
if len(ret['results']) < limitpage:
break
page += 1
except urllib.error.HTTPError as ex: except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
return map return map
...@@ -238,17 +246,40 @@ def getGenes(goterms, taxo=None): ...@@ -238,17 +246,40 @@ def getGenes(goterms, taxo=None):
if type(goterms) != list and type(goterms) != set and type(goterms) != tuple: if type(goterms) != list and type(goterms) != set and type(goterms) != tuple:
goterms = [goterms] goterms = [goterms]
map = dict() map = dict()
uri_string = 'annotation/search?taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId=' batchsize = 10 # size of query batch
for i in range(len(goterms)): termcnt = 0
goterm = goterms[i] limitpage = 100 # number of record on each returned page
uri_string += goterm + "," if i < len(goterms) - 1 else goterm while termcnt < len(goterms):
# Get the entry: fill in the fields specified below termbatch = []
try: for index in range(batchsize):
if termcnt < len(goterms):
termbatch.append(goterms[termcnt])
else:
break
termcnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)):
term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term
# first turn off server certificate verification # first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)): if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context ssl._create_default_https_context = ssl._create_unverified_context
data = urllib.request.urlopen(__ebiGOUrl__ + uri_string).read().decode("utf-8") page = 1
try:
while (True):
url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
urlreq = urllib.request.Request(url)
urlreq.add_header('Accept-encoding', 'gzip')
response = urllib.request.urlopen(urlreq)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read().decode("utf-8")
else:
data = response.read().decode("utf-8")
ret = json.loads(data) ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']: for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9" genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080" gotermid = row['goId'] # would look like "GO:0002080"
...@@ -256,6 +287,9 @@ def getGenes(goterms, taxo=None): ...@@ -256,6 +287,9 @@ def getGenes(goterms, taxo=None):
map[gotermid] = set([genename]) map[gotermid] = set([genename])
else: else:
map[gotermid].add(genename) map[gotermid].add(genename)
if len(ret['results']) < limitpage:
break
page += 1
except urllib.error.HTTPError as ex: except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read()) raise RuntimeError(ex.read())
return map return map
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment