Skip to content

Commit 3bcd48f

Browse files
authored
Improve parse_identifier (#4691)
Ascii string length is no longer computed during string allocation. JerryScript-DCO-1.0-Signed-off-by: Daniel Batiz [email protected]
1 parent e7ffb70 commit 3bcd48f

19 files changed

+172
-92
lines changed

jerry-core/ecma/base/ecma-helpers-string.c

+28-2
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,33 @@ ecma_find_special_string (const lit_utf8_byte_t *string_p, /**< utf8 string */
335335
return NULL;
336336
} /* ecma_find_special_string */
337337

338+
/**
339+
* Allocate new ecma-string and fill it with characters from ascii characters
340+
*
341+
* @return pointer to ecma-string descriptor
342+
*/
343+
ecma_string_t *
344+
ecma_new_ecma_string_from_ascii (const lit_utf8_byte_t *string_p, /**< ascii string */
345+
lit_utf8_size_t string_size) /**< string size */
346+
{
347+
JERRY_ASSERT (string_p != NULL || string_size == 0);
348+
349+
ecma_string_t *string_desc_p = ecma_find_special_string (string_p, string_size);
350+
351+
if (string_desc_p != NULL)
352+
{
353+
return string_desc_p;
354+
}
355+
356+
lit_utf8_byte_t *data_p;
357+
string_desc_p = ecma_new_ecma_string_from_utf8_buffer (string_size, string_size, &data_p);
358+
359+
string_desc_p->u.hash = lit_utf8_string_calc_hash (string_p, string_size);
360+
memcpy (data_p, string_p, string_size);
361+
362+
return string_desc_p;
363+
} /* ecma_new_ecma_string_from_ascii */
364+
338365
/**
339366
* Allocate new ecma-string and fill it with characters from the utf8 string
340367
*
@@ -2449,8 +2476,7 @@ ecma_string_substr (const ecma_string_t *string_p, /**< pointer to an ecma strin
24492476

24502477
if (string_length == buffer_size)
24512478
{
2452-
ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos,
2453-
(lit_utf8_size_t) end_pos);
2479+
ecma_string_p = ecma_new_ecma_string_from_utf8 (start_p + start_pos, (lit_utf8_size_t) end_pos);
24542480
}
24552481
else
24562482
{

jerry-core/ecma/base/ecma-helpers.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,10 @@ ecma_length_t ecma_op_advance_string_index (ecma_string_t *str_p, ecma_length_t
299299
ecma_string_t *ecma_new_map_key_string (ecma_value_t value);
300300
bool ecma_prop_name_is_map_key (ecma_string_t *string_p);
301301
#endif /* JERRY_BUILTIN_CONTAINER */
302-
ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size);
302+
ecma_string_t *ecma_new_ecma_string_from_ascii (const lit_utf8_byte_t *string_p,
303+
lit_utf8_size_t string_size);
304+
ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p,
305+
lit_utf8_size_t string_size);
303306
ecma_string_t *ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p,
304307
lit_utf8_size_t string_size);
305308
ecma_string_t *ecma_new_ecma_external_string_from_cesu8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size,

jerry-core/ecma/base/ecma-literal-storage.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,11 @@ ecma_finalize_lit_storage (void)
165165
*/
166166
ecma_value_t
167167
ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, /**< string to be searched */
168-
lit_utf8_size_t size) /**< size of the string */
168+
lit_utf8_size_t size, /**< size of the string */
169+
bool is_ascii) /**< encode of the string */
169170
{
170-
ecma_string_t *string_p = ecma_new_ecma_string_from_utf8 (chars_p, size);
171+
ecma_string_t *string_p = (is_ascii ? ecma_new_ecma_string_from_ascii (chars_p, size)
172+
: ecma_new_ecma_string_from_utf8 (chars_p, size));
171173

172174
if (ECMA_IS_DIRECT_STRING (string_p))
173175
{
@@ -702,7 +704,7 @@ ecma_snapshot_get_literal (const uint8_t *literal_base_p, /**< literal start */
702704

703705
uint16_t length = *(const uint16_t *) literal_p;
704706

705-
return ecma_find_or_create_literal_string (literal_p + sizeof (uint16_t), length);
707+
return ecma_find_or_create_literal_string (literal_p + sizeof (uint16_t), length, false);
706708
} /* ecma_snapshot_get_literal */
707709

708710
/**

jerry-core/ecma/base/ecma-literal-storage.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ typedef struct
4040

4141
void ecma_finalize_lit_storage (void);
4242

43-
ecma_value_t ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, lit_utf8_size_t size);
43+
ecma_value_t ecma_find_or_create_literal_string (const lit_utf8_byte_t *chars_p, lit_utf8_size_t size, bool is_ascii);
4444
ecma_value_t ecma_find_or_create_literal_number (ecma_number_t number_arg);
4545
#if JERRY_BUILTIN_BIGINT
4646
ecma_value_t ecma_find_or_create_literal_bigint (ecma_value_t bigint);

jerry-core/ecma/builtin-objects/ecma-builtin-helpers-date.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ ecma_date_to_string_format (ecma_number_t datetime_number, /**< datetime */
680680

681681
JERRY_ASSERT (dest_p <= date_buffer + date_buffer_length);
682682

683-
return ecma_make_string_value (ecma_new_ecma_string_from_utf8 (date_buffer,
683+
return ecma_make_string_value (ecma_new_ecma_string_from_ascii (date_buffer,
684684
(lit_utf8_size_t) (dest_p - date_buffer)));
685685
} /* ecma_date_to_string_format */
686686

jerry-core/ecma/operations/ecma-bigint.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ ecma_bigint_to_string (ecma_value_t value, /**< BigInt value */
250250
}
251251

252252
ecma_string_t *string_p;
253-
string_p = ecma_new_ecma_string_from_utf8 (string_buffer_p + char_start_p, char_size_p - char_start_p);
253+
string_p = ecma_new_ecma_string_from_ascii (string_buffer_p + char_start_p, char_size_p - char_start_p);
254254

255255
jmem_heap_free_block (string_buffer_p, char_size_p);
256256
return string_p;

jerry-core/parser/js/common.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ typedef enum
7474
LEXER_FLAG_SOURCE_PTR = (1 << 2), /**< the literal is directly referenced in the source code
7575
* (no need to allocate memory) */
7676
LEXER_FLAG_LATE_INIT = (1 << 3), /**< initialize this variable after the byte code is freed */
77+
LEXER_FLAG_ASCII = (1 << 4), /**< the literal contains only ascii characters */
7778
#if JERRY_ESNEXT
78-
LEXER_FLAG_GLOBAL = (1 << 4), /**< this local identifier is not a let or const declaration */
79+
LEXER_FLAG_GLOBAL = (1 << 5), /**< this local identifier is not a let or const declaration */
7980
#endif /* JERRY_ESNEXT */
8081
} lexer_literal_status_flags_t;
8182

jerry-core/parser/js/js-lexer.c

+26-19
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
642642
parser_line_counter_t column = context_p->column;
643643
const uint8_t *source_end_p = context_p->source_end_p;
644644
size_t length = 0;
645-
uint8_t has_escape = false;
645+
lexer_lit_location_flags_t status_flags = LEXER_LIT_LOCATION_IS_ASCII;
646646

647647
do
648648
{
@@ -657,7 +657,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
657657
return true;
658658
}
659659

660-
has_escape = true;
660+
status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE;
661661

662662
#if JERRY_ESNEXT
663663
if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
@@ -711,6 +711,8 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
711711

712712
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
713713
{
714+
status_flags &= (uint32_t) ~LEXER_LIT_LOCATION_IS_ASCII;
715+
714716
#if JERRY_ESNEXT
715717
utf8_length = lit_read_code_point_from_utf8 (source_p,
716718
(lit_utf8_size_t) (source_end_p - source_p),
@@ -738,7 +740,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
738740
else if (source_p[0] >= LIT_UTF8_4_BYTE_MARKER)
739741
{
740742
decoded_length = 2 * 3;
741-
has_escape = true;
743+
status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE;
742744
}
743745
#else /* !JERRY_ESNEXT */
744746
if (code_point < LIT_UTF8_4_BYTE_MARKER)
@@ -789,7 +791,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
789791

790792
context_p->token.type = LEXER_LITERAL;
791793
context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
792-
context_p->token.lit_location.has_escape = has_escape;
794+
context_p->token.lit_location.status_flags = (uint8_t) status_flags;
793795

794796
context_p->token.column = context_p->column;
795797
context_p->token.lit_location.char_p = context_p->source_p;
@@ -807,7 +809,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
807809
const uint8_t *ident_start_p = context_p->source_p;
808810
uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH];
809811

810-
if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
812+
if (JERRY_UNLIKELY (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
811813
{
812814
lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length);
813815
ident_start_p = buffer_p;
@@ -953,7 +955,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
953955
parser_line_counter_t original_line = line;
954956
parser_line_counter_t original_column = column;
955957
size_t length = 0;
956-
uint8_t has_escape = false;
958+
lexer_lit_location_flags_t status_flags = LEXER_LIT_LOCATION_IS_ASCII;
957959

958960
#if JERRY_ESNEXT
959961
if (str_end_character == LIT_CHAR_RIGHT_BRACE)
@@ -986,7 +988,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
986988
continue;
987989
}
988990

989-
has_escape = true;
991+
status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE;
990992

991993
/* Newline is ignored. */
992994
if (*source_p == LIT_CHAR_CR)
@@ -1163,7 +1165,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
11631165
* after a backslash). Always converted to two 3 byte
11641166
* long sequence. */
11651167
length += 2 * 3;
1166-
has_escape = true;
1168+
status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE;
11671169
source_p += 4;
11681170
#if JERRY_ESNEXT
11691171
raw_length_adjust += 2;
@@ -1192,7 +1194,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
11921194
Note: ECMAScript v6, 11.8.6.1 <CR> or <CR><LF> are both normalized to <LF> */
11931195
if (*source_p == LIT_CHAR_CR)
11941196
{
1195-
has_escape = true;
1197+
status_flags = LEXER_LIT_LOCATION_HAS_ESCAPE;
11961198
source_p++;
11971199
length++;
11981200
if (source_p < source_end_p
@@ -1261,7 +1263,7 @@ lexer_parse_string (parser_context_t *context_p, /**< context */
12611263
context_p->token.lit_location.char_p = string_start_p;
12621264
context_p->token.lit_location.length = (prop_length_t) length;
12631265
context_p->token.lit_location.type = LEXER_STRING_LITERAL;
1264-
context_p->token.lit_location.has_escape = has_escape;
1266+
context_p->token.lit_location.status_flags = (uint8_t) status_flags;
12651267

12661268
context_p->source_p = source_p + 1;
12671269
context_p->line = line;
@@ -1328,7 +1330,7 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
13281330
context_p->token.extra_value = LEXER_NUMBER_DECIMAL;
13291331
context_p->token.lit_location.char_p = source_p;
13301332
context_p->token.lit_location.type = LEXER_NUMBER_LITERAL;
1331-
context_p->token.lit_location.has_escape = false;
1333+
context_p->token.lit_location.status_flags = LEXER_LIT_LOCATION_IS_ASCII;
13321334

13331335
if (source_p[0] == LIT_CHAR_0
13341336
&& source_p + 1 < source_end_p)
@@ -2240,7 +2242,7 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
22402242
{
22412243
JERRY_ASSERT (context_p->u.allocated_buffer_p == NULL);
22422244

2243-
if (!literal_p->has_escape)
2245+
if (!(literal_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
22442246
{
22452247
return literal_p->char_p;
22462248
}
@@ -2601,6 +2603,11 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
26012603
status_flags |= LEXER_FLAG_USED;
26022604
}
26032605

2606+
if (lit_location_p->status_flags & LEXER_LIT_LOCATION_IS_ASCII)
2607+
{
2608+
literal_p->status_flags |= LEXER_FLAG_ASCII;
2609+
}
2610+
26042611
literal_p->status_flags = status_flags;
26052612

26062613
context_p->lit_object.literal_p = literal_p;
@@ -3490,7 +3497,7 @@ lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< lef
34903497
return false;
34913498
}
34923499

3493-
if (!left_p->has_escape)
3500+
if (!(left_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
34943501
{
34953502
return memcmp (left_p->char_p, right_p, size) == 0;
34963503
}
@@ -3518,12 +3525,12 @@ lexer_compare_identifiers (parser_context_t *context_p, /**< context */
35183525
return false;
35193526
}
35203527

3521-
if (!left_p->has_escape)
3528+
if (!(left_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
35223529
{
35233530
return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length);
35243531
}
35253532

3526-
if (!right_p->has_escape)
3533+
if (!(right_p->status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
35273534
{
35283535
return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length);
35293536
}
@@ -3568,7 +3575,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
35683575
return false;
35693576
}
35703577

3571-
if (!left_ident_p->has_escape && !right_ident_p->has_escape)
3578+
if (!((left_ident_p->status_flags | right_ident_p->status_flags) & LEXER_LIT_LOCATION_HAS_ESCAPE))
35723579
{
35733580
return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0;
35743581
}
@@ -3591,7 +3598,7 @@ lexer_string_is_use_strict (parser_context_t *context_p) /**< context */
35913598
&& context_p->token.lit_location.type == LEXER_STRING_LITERAL);
35923599

35933600
return (context_p->token.lit_location.length == 10
3594-
&& !context_p->token.lit_location.has_escape
3601+
&& !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)
35953602
&& memcmp (context_p->token.lit_location.char_p, "use strict", 10) == 0);
35963603
} /* lexer_string_is_use_strict */
35973604

@@ -3649,7 +3656,7 @@ lexer_token_is_let (parser_context_t *context_p) /**< context */
36493656
JERRY_ASSERT (context_p->token.type == LEXER_LITERAL);
36503657

36513658
return (context_p->token.keyword_type == LEXER_KEYW_LET
3652-
&& !context_p->token.lit_location.has_escape);
3659+
&& !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE));
36533660
} /* lexer_token_is_let */
36543661

36553662
/**
@@ -3667,7 +3674,7 @@ lexer_token_is_async (parser_context_t *context_p) /**< context */
36673674
|| context_p->token.type == LEXER_TEMPLATE_LITERAL);
36683675

36693676
return (context_p->token.keyword_type == LEXER_KEYW_ASYNC
3670-
&& !context_p->token.lit_location.has_escape);
3677+
&& !(context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE));
36713678
} /* lexer_token_is_async */
36723679

36733680
#endif /* JERRY_ESNEXT */

jerry-core/parser/js/js-lexer.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,16 @@ typedef enum
305305
#endif /* JERRY_BUILTIN_BIGINT */
306306
} lexer_number_type_t;
307307

308+
/**
309+
* Lexer literal flags.
310+
**/
311+
typedef enum
312+
{
313+
LEXER_LIT_LOCATION_NO_OPTS = 0, /**< no options */
314+
LEXER_LIT_LOCATION_HAS_ESCAPE = (1 << 0), /**< binding has escape */
315+
LEXER_LIT_LOCATION_IS_ASCII = (1 << 1), /**< all characters are ascii characters */
316+
} lexer_lit_location_flags_t;
317+
308318
/**
309319
* Lexer character (string / identifier) literal data.
310320
*/
@@ -313,7 +323,7 @@ typedef struct
313323
const uint8_t *char_p; /**< start of identifier or string token */
314324
prop_length_t length; /**< length or index of a literal */
315325
uint8_t type; /**< type of the current literal */
316-
uint8_t has_escape; /**< has escape sequences */
326+
uint8_t status_flags; /**< any combination of lexer_lit_location_flags_t status bits */
317327
} lexer_lit_location_t;
318328

319329
/**

jerry-core/parser/js/js-parser-expr.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1922,7 +1922,7 @@ parser_parse_unary_expression (parser_context_t *context_p, /**< context */
19221922
}
19231923
#endif /* JERRY_MODULE_SYSTEM */
19241924

1925-
if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
1925+
if (JERRY_UNLIKELY (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE))
19261926
{
19271927
parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
19281928
}
@@ -2281,7 +2281,7 @@ parser_parse_unary_expression (parser_context_t *context_p, /**< context */
22812281
JERRY_ASSERT ((context_p->status_flags & PARSER_IS_GENERATOR_FUNCTION)
22822282
&& !(context_p->status_flags & PARSER_DISALLOW_AWAIT_YIELD));
22832283

2284-
if (context_p->token.lit_location.has_escape)
2284+
if (context_p->token.lit_location.status_flags & LEXER_LIT_LOCATION_HAS_ESCAPE)
22852285
{
22862286
parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD);
22872287
}

jerry-core/parser/js/js-parser-internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ void parser_emit_cbc_forward_branch (parser_context_t *context_p, uint16_t opcod
724724
parser_branch_node_t *parser_emit_cbc_forward_branch_item (parser_context_t *context_p, uint16_t opcode,
725725
parser_branch_node_t *next_p);
726726
void parser_emit_cbc_backward_branch (parser_context_t *context_p, uint16_t opcode, uint32_t offset);
727+
ecma_string_t *parser_new_ecma_string_from_literal (lexer_literal_t *literal_p);
727728
void parser_set_branch_to_current_position (parser_context_t *context_p, parser_branch_t *branch_p);
728729
void parser_set_breaks_to_current_position (parser_context_t *context_p, parser_branch_node_t *current_p);
729730
void parser_set_continues_to_current_position (parser_context_t *context_p, parser_branch_node_t *current_p);

0 commit comments

Comments
 (0)