Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
import ast | |
import math | |
import re | |
import time | |
import httpx | |
from utils import rshift | |
class TokenAcquirer: | |
"""Google Translate API token generator | |
translate.google.com uses a token to authorize the requests. If you are | |
not Google, you do have this token and will have to pay for use. | |
This class is the result of reverse engineering on the obfuscated and | |
minified code used by Google to generate such token. | |
The token is based on a seed which is updated once per hour and on the | |
text that will be translated. | |
Both are combined - by some strange math - in order to generate a final | |
token (e.g. 744915.856682) which is used by the API to validate the | |
request. | |
This operation will cause an additional request to get an initial | |
token from translate.google.com. | |
Example usage: | |
>>> from googletrans.gtoken import TokenAcquirer | |
>>> acquirer = TokenAcquirer() | |
>>> text = 'test' | |
>>> tk = acquirer.do(text) | |
>>> tk | |
950629.577246 | |
""" | |
RE_TKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL) | |
RE_RAWTKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL) | |
def __init__(self, client: httpx.Client, tkk='0', host='translate.google.com'): | |
self.client = client | |
self.tkk = tkk | |
self.host = host if 'http' in host else 'https://' + host | |
def _update(self): | |
"""update tkk | |
""" | |
# we don't need to update the base TKK value when it is still valid | |
now = math.floor(int(time.time() * 1000) / 3600000.0) | |
if self.tkk and int(self.tkk.split('.')[0]) == now: | |
return | |
r = self.client.get(self.host) | |
raw_tkk = self.RE_TKK.search(r.text) | |
if raw_tkk: | |
self.tkk = raw_tkk.group(1) | |
return | |
try: | |
# this will be the same as python code after stripping out a reserved word 'var' | |
code = self.RE_TKK.search(r.text).group(1).replace('var ', '') | |
# unescape special ascii characters such like a \x3d(=) | |
code = code.encode().decode('unicode-escape') | |
except AttributeError: | |
raise Exception('Could not find TKK token for this request.\nSee https://github.com/ssut/py-googletrans/issues/234 for more details.') | |
except: | |
raise | |
if code: | |
tree = ast.parse(code) | |
visit_return = False | |
operator = '+' | |
n, keys = 0, dict(a=0, b=0) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Assign): | |
name = node.targets[0].id | |
if name in keys: | |
if isinstance(node.value, ast.Num): | |
keys[name] = node.value.n | |
# the value can sometimes be negative | |
elif isinstance(node.value, ast.UnaryOp) and \ | |
isinstance(node.value.op, ast.USub): # pragma: nocover | |
keys[name] = -node.value.operand.n | |
elif isinstance(node, ast.Return): | |
# parameters should be set after this point | |
visit_return = True | |
elif visit_return and isinstance(node, ast.Num): | |
n = node.n | |
elif visit_return and n > 0: | |
# the default operator is '+' but implement some more for | |
# all possible scenarios | |
if isinstance(node, ast.Add): # pragma: nocover | |
pass | |
elif isinstance(node, ast.Sub): # pragma: nocover | |
operator = '-' | |
elif isinstance(node, ast.Mult): # pragma: nocover | |
operator = '*' | |
elif isinstance(node, ast.Pow): # pragma: nocover | |
operator = '**' | |
elif isinstance(node, ast.BitXor): # pragma: nocover | |
operator = '^' | |
# a safety way to avoid Exceptions | |
clause = compile('{1}{0}{2}'.format( | |
operator, keys['a'], keys['b']), '', 'eval') | |
value = eval(clause, dict(__builtin__={})) | |
result = '{}.{}'.format(n, value) | |
self.tkk = result | |
def _lazy(self, value): | |
"""like lazy evaluation, this method returns a lambda function that | |
returns value given. | |
We won't be needing this because this seems to have been built for | |
code obfuscation. | |
the original code of this method is as follows: | |
... code-block: javascript | |
var ek = function(a) { | |
return function() { | |
return a; | |
}; | |
} | |
""" | |
return lambda: value | |
def _xr(self, a, b): | |
size_b = len(b) | |
c = 0 | |
while c < size_b - 2: | |
d = b[c + 2] | |
d = ord(d[0]) - 87 if 'a' <= d else int(d) | |
d = rshift(a, d) if '+' == b[c + 1] else a << d | |
a = a + d & 4294967295 if '+' == b[c] else a ^ d | |
c += 3 | |
return a | |
def acquire(self, text): | |
a = [] | |
# Convert text to ints | |
for i in text: | |
val = ord(i) | |
if val < 0x10000: | |
a += [val] | |
else: | |
# Python doesn't natively use Unicode surrogates, so account for those | |
a += [ | |
math.floor((val - 0x10000) / 0x400 + 0xD800), | |
math.floor((val - 0x10000) % 0x400 + 0xDC00) | |
] | |
b = self.tkk if self.tkk != '0' else '' | |
d = b.split('.') | |
b = int(d[0]) if len(d) > 1 else 0 | |
# assume e means char code array | |
e = [] | |
g = 0 | |
size = len(a) | |
while g < size: | |
l = a[g] | |
# just append if l is less than 128(ascii: DEL) | |
if l < 128: | |
e.append(l) | |
# append calculated value if l is less than 2048 | |
else: | |
if l < 2048: | |
e.append(l >> 6 | 192) | |
else: | |
# append calculated value if l matches special condition | |
if (l & 64512) == 55296 and g + 1 < size and \ | |
a[g + 1] & 64512 == 56320: | |
g += 1 | |
l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important | |
e.append(l >> 18 | 240) | |
e.append(l >> 12 & 63 | 128) | |
else: | |
e.append(l >> 12 | 224) | |
e.append(l >> 6 & 63 | 128) | |
e.append(l & 63 | 128) | |
g += 1 | |
a = b | |
for i, value in enumerate(e): | |
a += value | |
a = self._xr(a, '+-a^+6') | |
a = self._xr(a, '+-3^+b+-f') | |
a ^= int(d[1]) if len(d) > 1 else 0 | |
if a < 0: # pragma: nocover | |
a = (a & 2147483647) + 2147483648 | |
a %= 1000000 # int(1E6) | |
return '{}.{}'.format(a, a ^ b) | |
def do(self, text): | |
self._update() | |
tk = self.acquire(text) | |
return tk | |