From 7158c3745d0f2b3c54e726ff74d400d1a72f63ba Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Thu, 4 Apr 2024 18:15:52 +0900 Subject: [PATCH] is_unicodepoint --- aheui/aheui.py | 26 +++++++++++++++++--------- aheui/int/bigint.py | 5 +++++ aheui/int/smallint.py | 10 +++++++++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/aheui/aheui.py b/aheui/aheui.py index 7bf46b3..ec5e2ad 100644 --- a/aheui/aheui.py +++ b/aheui/aheui.py @@ -283,18 +283,26 @@ def read_number(input_buffer=input_buffer): return num -@jit.dont_look_inside -def write_number(value): - os.write(outfp, value) +def write_number(value_str): + os.write(outfp, value_str) -@jit.dont_look_inside def write_utf8(warnings, value): - if not (0 <= value < 0x110000): - warnings.warn(b'write-utf8-range', value) - value = 0xfffd - os.write(outfp, unichr(value).encode('utf-8')) + REPLACE_CHAR = unichr(0xfffd).encode('utf-8') + + if bigint.is_unicodepoint(value): + codepoint = bigint.toint(value) + unicode_char = unichr(codepoint) + bytes = unicode_char.encode('utf-8') + else: + bytes = REPLACE_CHAR + os.write(outfp, bytes) + + +def warn_utf8_range(warnings, value): + warnings.warn(b'write-utf8-range', value) + os.write(outfp, unichr(0xfffd).encode('utf-8')) class Program(object): _immutable_fields_ = ['labels[**]', 'opcodes[*]', 'values[*]', 'size'] @@ -417,7 +425,7 @@ def mainloop(program, debug): r = selected.pop() write_number(bigint.str(r)) elif op == c.OP_POPCHAR: - r = selected.pop_longlong() + r = selected.pop() write_utf8(warnings, r) elif op == c.OP_PUSHNUM: num = read_number() diff --git a/aheui/int/bigint.py b/aheui/int/bigint.py index 4744b8a..fb4ddc5 100644 --- a/aheui/int/bigint.py +++ b/aheui/int/bigint.py @@ -67,3 +67,8 @@ def ge(r1, r2): def is_zero(r): # return r.sign == 0 return r._size == 0 # pypy 7.3.15 + + +@jit.elidable +def is_unicodepoint(r): + return 0 <= r._size and r.int_le(0x110000) diff --git a/aheui/int/smallint.py b/aheui/int/smallint.py index cbd09b6..c61086d 100644 --- a/aheui/int/smallint.py +++ b/aheui/int/smallint.py @@ -2,7 +2,7 @@ try: import builtins -except ImportError: # python2 +except ImportError: builtins = __builtins__ from aheui._compat import _bytestr @@ -37,6 +37,10 @@ def str(r): return _bytestr(r) +def hex(r): + return hex(r) + + def add(r1, r2): return r1 + r2 @@ -63,3 +67,7 @@ def ge(r1, r2): def is_zero(r): return r == 0 + + +def is_unicodepoint(r): + return 0 < r <= 0x110000