Skip to content

Commit

Permalink
[jsinterp] Implement typeof operator
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkf committed Dec 7, 2024
1 parent c509896 commit e02b214
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 50 deletions.
15 changes: 14 additions & 1 deletion test/test_jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,20 @@ def test_comma(self):
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)

def test_void(self):
self._test('function f() { return void 42; }', None)
self._test('function f() { return void 42; }', JS_Undefined)

def test_typeof(self):
self._test('function f() { return typeof undefined; }', 'undefined')
self._test('function f() { return typeof NaN; }', 'number')
self._test('function f() { return typeof Infinity; }', 'number')
self._test('function f() { return typeof true; }', 'boolean')
self._test('function f() { return typeof null; }', 'object')
self._test('function f() { return typeof "a string"; }', 'string')
self._test('function f() { return typeof 42; }', 'number')
self._test('function f() { return typeof 42.42; }', 'number')
self._test('function f() { var g = function(){}; return typeof g; }', 'function')
self._test('function f() { return typeof {key: "value"}; }', 'object')
# not yet implemented: Symbol, BigInt

def test_return_function(self):
jsi = JSInterpreter('''
Expand Down
156 changes: 107 additions & 49 deletions youtube_dl/jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import operator
import re

from functools import update_wrapper
from functools import update_wrapper, wraps

from .utils import (
error_to_compat_str,
Expand All @@ -23,6 +23,7 @@
compat_filter as filter,
compat_itertools_zip_longest as zip_longest,
compat_map as map,
compat_numeric_types,
compat_str,
)

Expand Down Expand Up @@ -138,6 +139,43 @@ def _js_ternary(cndn, if_true=True, if_false=False):
return if_true


def _js_unary_op(op):

@wraps_op(op)
def wrapped(_, a):
return op(a)

return wrapped


# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof
def _js_typeof(expr):
try:
result = {
JS_Undefined: 'undefined',
_NaN: 'number',
_Infinity: 'number',
True: 'boolean',
False: 'boolean',
None: 'object',
}[expr]
except (TypeError, KeyError):
result = None
if result is None:
for t, n in (
(compat_basestring, 'string'),
(compat_numeric_types, 'number'),
):
if isinstance(expr, t):
result = n
break
else:
if callable(expr):
result = 'function'
# TODO: Symbol, BigInt
return 'object' if result is None else result


# (op, definition) in order of binding priority, tightest first
# avoid dict to maintain order
# definition None => Defined in JSInterpreter._operator
Expand Down Expand Up @@ -176,6 +214,11 @@ def _js_ternary(cndn, if_true=True, if_false=False):
('&&', None),
)

_UNARY_OPERATORS_X = (
('void', _js_unary_op(lambda _: JS_Undefined)),
('typeof', _js_unary_op(_js_typeof)),
)

_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))

_NAME_RE = r'[a-zA-Z_$][\w$]*'
Expand Down Expand Up @@ -242,6 +285,7 @@ def truncate_string(s, left, right=0):

@classmethod
def wrap_interpreter(cls, f):
@wraps(f)
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion)
Expand Down Expand Up @@ -347,6 +391,8 @@ def regex_flags(cls, expr):
def __op_chars(cls):
op_chars = set(';,[')
for op in cls._all_operators():
if op[0].isalpha():
continue
op_chars.update(op[0])
return op_chars

Expand Down Expand Up @@ -425,7 +471,7 @@ def _all_operators(_cached=[]):
if not _cached:
_cached.extend(itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
return _cached

def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
Expand Down Expand Up @@ -479,6 +525,52 @@ def _dump(self, obj, namespace):
_FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(')

def handle_operators(self, expr, local_vars, allow_recursion):

for op, _ in self._all_operators():
# hackety: </> have higher priority than <</>>, but don't confuse them
skip_delim = (op + op) if op in '<>*?' else None
if op == '?':
skip_delim = (skip_delim, '?.')
separated = list(self._separate(expr, op, skip_delims=skip_delim))
if len(separated) < 2:
continue

right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue

left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True

@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0:
Expand Down Expand Up @@ -533,9 +625,15 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
else:
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)

if expr.startswith('void '):
left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
return None, should_return
for op, _ in _UNARY_OPERATORS_X:
if not expr.startswith(op):
continue
operand = expr[len(op):]
if not operand or operand[0] != ' ':
continue
op_result = self.handle_operators(expr, local_vars, allow_recursion)
if op_result:
return op_result[0], should_return

if expr.startswith('{'):
inner, outer = self._separate_at_paren(expr)
Expand Down Expand Up @@ -582,7 +680,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if_expr, expr = self._separate_at_paren(expr)
else:
# may lose ... else ... because of ll.368-374
if_expr, expr = self._separate_at_paren(expr, delim=';')
if_expr, expr = self._separate_at_paren(' %s;' % (expr,), delim=';')
else_expr = None
m = re.match(r'else\s*(?P<block>\{)?', expr)
if m:
Expand Down Expand Up @@ -790,49 +888,9 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
return self._index(val, idx), should_return

for op, _ in self._all_operators():
# hackety: </> have higher priority than <</>>, but don't confuse them
skip_delim = (op + op) if op in '<>*?' else None
if op == '?':
skip_delim = (skip_delim, '?.')
separated = list(self._separate(expr, op, skip_delims=skip_delim))
if len(separated) < 2:
continue

right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue

left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
op_result = self.handle_operators(expr, local_vars, allow_recursion)
if op_result:
return op_result[0], should_return

if md.get('attribute'):
variable, member, nullish = m.group('var', 'member', 'nullish')
Expand Down

0 comments on commit e02b214

Please sign in to comment.