From b25a418f12900b59d100d0f39acba08441fd4ba3 Mon Sep 17 00:00:00 2001 From: Adam Saponara Date: Wed, 24 Jul 2024 22:12:24 -0400 Subject: [PATCH] send invalid codepoints as U+FFFD --- termbox2.h | 16 +++++++++++++++- tests/test_invalid_utf8/expected.ansi | 2 +- tests/test_invalid_utf8/test.php | 5 ++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/termbox2.h b/termbox2.h index 5fc791f..ce59acf 100644 --- a/termbox2.h +++ b/termbox2.h @@ -1535,6 +1535,7 @@ static int send_sgr(uint32_t fg, uint32_t bg, int fg_is_default, static int send_cursor_if(int x, int y); static int send_char(int x, int y, uint32_t ch); static int send_cluster(int x, int y, uint32_t *ch, size_t nch); +static int is_valid_codepoint(uint32_t ch); static int convert_num(uint32_t num, char *buf); static int cell_cmp(struct tb_cell *a, struct tb_cell *b); static int cell_copy(struct tb_cell *dst, struct tb_cell *src); @@ -3225,9 +3226,13 @@ static int send_cluster(int x, int y, uint32_t *ch, size_t nch) { for (i = 0; i < (int)nch; i++) { uint32_t ch32 = *(ch + i); int chu8_len; - if (ch32 == 0) { // replace null with space (from termbox 19dbee5) + if (ch32 == 0) { + // Replace NULL with space (from termbox 19dbee5) chu8_len = 1; chu8[0] = ' '; + } else if (!is_valid_codepoint(ch32)) { + // Replace invalid codepoint with U+FFFD + chu8_len = tb_utf8_unicode_to_char(chu8, 0xfffd); } else { chu8_len = tb_utf8_unicode_to_char(chu8, ch32); } @@ -3237,6 +3242,15 @@ static int send_cluster(int x, int y, uint32_t *ch, size_t nch) { return TB_OK; } +static int is_valid_codepoint(uint32_t ch) { + return ( + ch > 0x10ffff // out of range + || ((ch & 0xfffe) == 0xfffe) // noncharacter + || (ch >= 0xfdd0 && ch <= 0xfdef) // noncharacter + || (ch >= 0xd800 && ch <= 0xdfff) // surrogate pair (UTF-16 only) + ) ? 0 : 1; +} + static int convert_num(uint32_t num, char *buf) { int i, l = 0; char ch; diff --git a/tests/test_invalid_utf8/expected.ansi b/tests/test_invalid_utf8/expected.ansi index b440b63..ec78ff0 100644 --- a/tests/test_invalid_utf8/expected.ansi +++ b/tests/test_invalid_utf8/expected.ansi @@ -1,5 +1,5 @@ #5foo� - +#5� diff --git a/tests/test_invalid_utf8/test.php b/tests/test_invalid_utf8/test.php index 76480dd..46b0947 100755 --- a/tests/test_invalid_utf8/test.php +++ b/tests/test_invalid_utf8/test.php @@ -2,7 +2,10 @@ declare(strict_types=1); $test->ffi->tb_init(); -$test->ffi->tb_print_ex(0, 0, 0, 0, NULL, "foo\xc2\x00password"); + +$test->ffi->tb_print_ex(0, 0, 0, 0, NULL, "foo\xc2\x00password"); // stop at NULL +$test->ffi->tb_set_cell(0, 1, 0xffff, 0, 0); // invalid codepoint + $test->ffi->tb_present(); $test->screencap();