This repository has been archived on 2024-07-30. You can view files and clone it, but cannot push or open issues or pull requests.
python-aternos/python_aternos/atjsparse.py

228 lines
5.7 KiB
Python
Raw Normal View History

2022-07-01 14:28:39 +04:00
"""Parsing and executing JavaScript code"""
2022-12-25 12:49:27 +04:00
import abc
import json
2022-01-06 19:57:26 +04:00
import base64
2022-12-25 12:49:27 +04:00
import subprocess
2022-06-23 15:13:56 +04:00
2022-12-25 12:49:27 +04:00
from pathlib import Path
from typing import Optional, Union
from typing import Type, Any
2022-12-25 12:49:27 +04:00
import regex
import js2py
import requests
2022-06-23 15:13:56 +04:00
from .atlog import log
2022-12-25 12:49:27 +04:00
js: Optional['Interpreter'] = None
class Interpreter(abc.ABC):
"""Base JS interpreter class"""
def __init__(self) -> None:
"""Base JS interpreter class"""
2022-12-25 12:49:27 +04:00
def __getitem__(self, name: str) -> Any:
"""Support for `js[name]` syntax
instead of `js.get_var(name)`
Args:
name (str): Variable name
Returns:
Variable value
"""
2022-12-25 12:49:27 +04:00
return self.get_var(name)
@abc.abstractmethod
def exec_js(self, func: str) -> None:
"""Executes JavaScript code
Args:
func (str): JS function
"""
@abc.abstractmethod
def get_var(self, name: str) -> Any:
"""Returns JS variable value
from the interpreter
Args:
name (str): Variable name
Returns:
Variable value
"""
class NodeInterpreter(Interpreter):
2022-12-25 18:19:28 +04:00
"""Node.JS interpreter wrapper,
starts a simple web server in background"""
2022-12-25 12:49:27 +04:00
def __init__(
self,
node: Union[str, Path] = 'node',
host: str = 'localhost',
port: int = 8001) -> None:
"""Node.JS interpreter wrapper,
starts a simple web server in background
Args:
node (Union[str, Path], optional): Path to `node` executable
host (str, optional): Hostname for the web server
port (int, optional): Port for the web server
"""
2022-12-25 12:49:27 +04:00
super().__init__()
file_dir = Path(__file__).absolute().parent
server_js = file_dir / 'data' / 'server.js'
self.url = f'http://{host}:{port}'
self.timeout = 2
2022-12-25 18:19:28 +04:00
# pylint: disable=consider-using-with
2022-12-25 12:49:27 +04:00
self.proc = subprocess.Popen(
args=[
node, server_js,
f'{port}', host,
],
stdout=subprocess.PIPE,
2022-12-25 12:49:27 +04:00
)
2022-12-25 18:19:28 +04:00
# pylint: enable=consider-using-with
assert self.proc.stdout is not None
ok_msg = self.proc.stdout.readline()
log.debug('Received from server.js: %s', ok_msg)
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def exec_js(self, func: str) -> None:
resp = requests.post(self.url, data=func, timeout=self.timeout)
resp.raise_for_status()
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def get_var(self, name: str) -> Any:
resp = requests.post(self.url, data=name, timeout=self.timeout)
resp.raise_for_status()
log.debug('NodeJS response: %s', resp.content)
return json.loads(resp.content)
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def __del__(self) -> None:
try:
self.proc.terminate()
self.proc.communicate()
except AttributeError:
log.warning(
'NodeJS process was not initialized, '
'but __del__ was called'
)
2022-12-25 12:49:27 +04:00
class Js2PyInterpreter(Interpreter):
2022-12-25 18:19:28 +04:00
"""Js2Py interpreter,
uses js2py library to execute code"""
2022-12-25 12:49:27 +04:00
# Thanks to http://regex.inginf.units.it
arrowexp = regex.compile(r'\w[^\}]*+')
def __init__(self) -> None:
2022-12-25 18:19:28 +04:00
"""Js2Py interpreter,
uses js2py library to execute code"""
2022-12-25 12:49:27 +04:00
super().__init__()
ctx = js2py.EvalJs({'atob': atob})
2023-06-30 10:26:04 +04:00
ctx.execute('''
window.Map = function(_i){ };
window.setTimeout = function(_f,_t){ };
window.setInterval = function(_f,_t){ };
window.encodeURIComponent = function(_s){ };
window.document = { };
document.doctype = { };
document.getElementById = function(_s){ };
''')
2022-12-25 12:49:27 +04:00
self.ctx = ctx
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def exec_js(self, func: str) -> None:
self.ctx.execute(self.to_ecma5(func))
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def get_var(self, name: str) -> Any:
return self.ctx[name]
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
def to_ecma5(self, func: str) -> str:
"""Converts from ECMA6 format to ECMA5
(replacing arrow expressions)
and removes comment blocks
Args:
func (str): ECMA6 function
Returns:
ECMA5 function
"""
# Delete anything between /* and */
func = regex.sub(r'/\*.+?\*/', '', func)
# Search for arrow expressions
match = self.arrowexp.search(func)
if match is None:
return func
2022-12-25 18:19:28 +04:00
2022-12-25 12:49:27 +04:00
# Convert the function
conv = '(function(){' + match[0] + '})()'
# Convert 1 more expression.
# It doesn't change,
# so it was hardcoded
# as a regexp
return regex.sub(
r'(?:s|\(s\)) => s.split\([\'"]{2}\).reverse\(\).join\([\'"]{2}\)',
'function(s){return s.split(\'\').reverse().join(\'\')}',
conv
)
2022-06-23 15:13:56 +04:00
def atob(s: str) -> str:
2022-12-25 12:49:27 +04:00
"""Wrapper for the built-in library function.
Decodes a base64 string
2022-07-01 14:28:39 +04:00
Args:
s (str): Encoded data
Returns:
Decoded string
2022-07-01 14:28:39 +04:00
"""
2022-06-23 15:13:56 +04:00
return base64.standard_b64decode(str(s)).decode('utf-8')
def get_interpreter(
2022-12-25 18:19:28 +04:00
*args,
create: Type[Interpreter] = Js2PyInterpreter,
2022-12-25 18:19:28 +04:00
**kwargs) -> 'Interpreter':
"""Get or create a JS interpreter.
`*args` and `**kwargs` will be passed
directly to JS interpreter `__init__`
(when creating it)
Args:
create (Type[Interpreter], optional): Preferred interpreter
Returns:
JS interpreter instance
"""
2022-12-25 18:19:28 +04:00
global js # pylint: disable=global-statement
# create if none
2022-12-25 12:49:27 +04:00
if js is None:
js = create(*args, **kwargs)
# and return
2022-12-25 12:49:27 +04:00
return js