From c7a3398466895c46c875966fc9da6ad11619bce6 Mon Sep 17 00:00:00 2001 From: Himanshu Kumar Date: Wed, 16 Oct 2024 16:56:43 -0700 Subject: [PATCH] Security fix in libyaml: Limit depth of nesting by default Fix in libYaml: 51843fe While here, bring in couple more commits in libYaml: 588eabf & 840b65c --- Sources/CYaml/include/yaml.h | 22 +++++++++++++++---- Sources/CYaml/src/parser.c | 41 ++++++++++++++++++++++++++++++++++++ Sources/CYaml/src/scanner.c | 4 ++-- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/Sources/CYaml/include/yaml.h b/Sources/CYaml/include/yaml.h index 89050e4f..e34b793d 100644 --- a/Sources/CYaml/include/yaml.h +++ b/Sources/CYaml/include/yaml.h @@ -1095,7 +1095,7 @@ typedef struct yaml_parser_s { yaml_error_type_t error; /** Error description. */ const char *problem; - /** The byte about which the problem occured. */ + /** The byte about which the problem occurred. */ size_t problem_offset; /** The problematic value (@c -1 is none). */ int problem_value; @@ -1335,7 +1335,7 @@ yaml_parser_delete(yaml_parser_t *parser); * Set a string input. * * Note that the @a input pointer must be valid while the @a parser object - * exists. The application is responsible for destroing @a input after + * exists. The application is responsible for destroying @a input after * destroying the @a parser. * * @param[in,out] parser A parser object. @@ -1456,6 +1456,20 @@ yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); YAML_DECLARE(int) yaml_parser_load(yaml_parser_t *parser, yaml_document_t *document); +/** + * Set the maximum depth of nesting. + * + * Default: 1000 + * + * Each nesting level increases the stack and the number of previous + * starting events that the parser has to check. + * + * @param[in] max The maximum number of allowed nested events + */ + +YAML_DECLARE(void) +yaml_set_max_nest_level(int max); + /** @} */ /** @@ -1734,7 +1748,7 @@ typedef struct yaml_emitter_s { size_t length; /** Does the scalar contain line breaks? */ int multiline; - /** Can the scalar be expessed in the flow plain style? */ + /** Can the scalar be expressed in the flow plain style? */ int flow_plain_allowed; /** Can the scalar be expressed in the block plain style? */ int block_plain_allowed; @@ -1950,7 +1964,7 @@ yaml_emitter_close(yaml_emitter_t *emitter); /** * Emit a YAML document. * - * The documen object may be generated using the yaml_parser_load() function + * The document object may be generated using the yaml_parser_load() function * or the yaml_document_initialize() function. The emitter takes the * responsibility for the document object and destroys its content after * it is emitted. The document object is destroyed even if the function fails. diff --git a/Sources/CYaml/src/parser.c b/Sources/CYaml/src/parser.c index 22dd511f..870cbcb6 100644 --- a/Sources/CYaml/src/parser.c +++ b/Sources/CYaml/src/parser.c @@ -64,6 +64,8 @@ * Public API declarations. */ +int MAX_NESTING_LEVEL = 1000; + YAML_DECLARE(int) yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); @@ -80,6 +82,10 @@ yaml_parser_set_parser_error_context(yaml_parser_t *parser, const char *context, yaml_mark_t context_mark, const char *problem, yaml_mark_t problem_mark); +static int +yaml_maximum_level_reached(yaml_parser_t *parser, + yaml_mark_t context_mark, yaml_mark_t problem_mark); + /* * State functions. */ @@ -162,6 +168,12 @@ static int yaml_parser_append_tag_directive(yaml_parser_t *parser, yaml_tag_directive_t value, int allow_duplicates, yaml_mark_t mark); +YAML_DECLARE(void) +yaml_set_max_nest_level(int max) +{ + MAX_NESTING_LEVEL = max; +} + /* * Get the next event. */ @@ -217,6 +229,14 @@ yaml_parser_set_parser_error_context(yaml_parser_t *parser, return 0; } +static int +yaml_maximum_level_reached(yaml_parser_t *parser, + yaml_mark_t context_mark, yaml_mark_t problem_mark) +{ + yaml_parser_set_parser_error_context(parser, + "while parsing", context_mark, "Maximum nesting level reached, set with yaml_set_max_nest_level())", problem_mark); + return 0; +} /* * State dispatcher. @@ -657,6 +677,10 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, return 1; } else if (token->type == YAML_FLOW_SEQUENCE_START_TOKEN) { + if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) { + yaml_maximum_level_reached(parser, start_mark, token->start_mark); + goto error; + } end_mark = token->end_mark; parser->state = YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE; SEQUENCE_START_EVENT_INIT(*event, anchor, tag, implicit, @@ -664,6 +688,10 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, return 1; } else if (token->type == YAML_FLOW_MAPPING_START_TOKEN) { + if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) { + yaml_maximum_level_reached(parser, start_mark, token->start_mark); + goto error; + } end_mark = token->end_mark; parser->state = YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE; MAPPING_START_EVENT_INIT(*event, anchor, tag, implicit, @@ -671,6 +699,10 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, return 1; } else if (block && token->type == YAML_BLOCK_SEQUENCE_START_TOKEN) { + if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) { + yaml_maximum_level_reached(parser, start_mark, token->start_mark); + goto error; + } end_mark = token->end_mark; parser->state = YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE; SEQUENCE_START_EVENT_INIT(*event, anchor, tag, implicit, @@ -678,6 +710,10 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, return 1; } else if (block && token->type == YAML_BLOCK_MAPPING_START_TOKEN) { + if (!STACK_LIMIT(parser, parser->indents, MAX_NESTING_LEVEL - parser->flow_level)) { + yaml_maximum_level_reached(parser, start_mark, token->start_mark); + goto error; + } end_mark = token->end_mark; parser->state = YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE; MAPPING_START_EVENT_INIT(*event, anchor, tag, implicit, @@ -1022,6 +1058,11 @@ yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser, return 0; return yaml_parser_parse_node(parser, event, 0, 0); } + else if (token->type == YAML_FLOW_SEQUENCE_END_TOKEN) { + yaml_mark_t mark = token->start_mark; + parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE; + return yaml_parser_process_empty_scalar(parser, event, mark); + } else { yaml_mark_t mark = token->end_mark; SKIP_TOKEN(parser); diff --git a/Sources/CYaml/src/scanner.c b/Sources/CYaml/src/scanner.c index cf00beb3..c298271b 100644 --- a/Sources/CYaml/src/scanner.c +++ b/Sources/CYaml/src/scanner.c @@ -273,7 +273,7 @@ * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation * increase that precedes a block collection (cf. the INDENT token in Python). * The token BLOCK-END denote indentation decrease that ends a block collection - * (cf. the DEDENT token in Python). However YAML has some syntax pecularities + * (cf. the DEDENT token in Python). However YAML has some syntax peculiarities * that makes detections of these tokens more complex. * * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators @@ -3290,7 +3290,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check if we are at the end of the scalar. */ - /* Fix for crash unitialized value crash + /* Fix for crash uninitialized value crash * Credit for the bug and input is to OSS Fuzz * Credit for the fix to Alex Gaynor */