Commit f396eeed authored by Mikael Boden's avatar Mikael Boden

bugs_fixed_webservice_GO

parent 6b05a37e
......@@ -186,9 +186,9 @@ def getGOTerms(genes):
if type(genes) != list and type(genes) != set and type(genes) != tuple:
genes = [genes]
map = dict()
uri_string = 'annotation/search?geneProductId='
batchsize = 100 # size of query batch
genecnt = 0
limitpage = 100 # number of record on each returned page
while genecnt < len(genes):
genebatch = []
for index in range(batchsize):
......@@ -197,16 +197,19 @@ def getGOTerms(genes):
else:
break
genecnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&geneProductId='
for i in range(len(genebatch)):
gene = genebatch[i]
uri_string += gene + "," if i < len(genes) - 1 else gene
# Construct URL
url = __ebiGOUrl__ + uri_string
# Get the entry: fill in the fields specified below
try:
# first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
uri_string += gene + "," if i < len(genebatch) - 1 else gene
# Construct URL
# Get the entry: fill in the fields specified below
# first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
page = 1
try:
while (True):
url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
urlreq = urllib.request.Request(url)
urlreq.add_header('Accept-encoding', 'gzip')
response = urllib.request.urlopen(urlreq)
......@@ -217,6 +220,8 @@ def getGOTerms(genes):
else:
data = response.read().decode("utf-8")
ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080"
......@@ -224,8 +229,11 @@ def getGOTerms(genes):
map[genename] = set([gotermid])
else:
map[genename].add(gotermid)
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
if len(ret['results']) < limitpage:
break
page += 1
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
return map
def getGenes(goterms, taxo=None):
......@@ -238,26 +246,52 @@ def getGenes(goterms, taxo=None):
if type(goterms) != list and type(goterms) != set and type(goterms) != tuple:
goterms = [goterms]
map = dict()
uri_string = 'annotation/search?taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(goterms)):
goterm = goterms[i]
uri_string += goterm + "," if i < len(goterms) - 1 else goterm
# Get the entry: fill in the fields specified below
try:
batchsize = 10 # size of query batch
termcnt = 0
limitpage = 100 # number of record on each returned page
while termcnt < len(goterms):
termbatch = []
for index in range(batchsize):
if termcnt < len(goterms):
termbatch.append(goterms[termcnt])
else:
break
termcnt += 1
uri_string = 'annotation/search?limit=' + str(limitpage) + '&taxonId=' + taxo + "&goId=" if taxo else 'annotation/search?goId='
for i in range(len(termbatch)):
term = termbatch[i]
uri_string += term + "," if i < len(termbatch) - 1 else term
# first turn off server certificate verification
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
data = urllib.request.urlopen(__ebiGOUrl__ + uri_string).read().decode("utf-8")
ret = json.loads(data)
for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080"
if not gotermid in map:
map[gotermid] = set([genename])
else:
map[gotermid].add(genename)
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
page = 1
try:
while (True):
url = __ebiGOUrl__ + uri_string + '&page=' + str(page)
urlreq = urllib.request.Request(url)
urlreq.add_header('Accept-encoding', 'gzip')
response = urllib.request.urlopen(urlreq)
if response.info().get('Content-Encoding') == 'gzip':
buf = StringIO(response.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read().decode("utf-8")
else:
data = response.read().decode("utf-8")
ret = json.loads(data)
if page == 1 and int(ret['numberOfHits']) > limitpage * 100:
print('Warning:', ret['numberOfHits'], 'matches in a query. Be patient.')
for row in ret['results']:
genename = row['geneProductId'] # would look like "UniProtKB:A0A140VJQ9"
gotermid = row['goId'] # would look like "GO:0002080"
if not gotermid in map:
map[gotermid] = set([genename])
else:
map[gotermid].add(genename)
if len(ret['results']) < limitpage:
break
page += 1
except urllib.error.HTTPError as ex:
raise RuntimeError(ex.read())
return map
class EBI(object):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment