diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index e3b09ac69..c6132acb1 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -348,6 +348,16 @@ typedef struct #define LEXER_KEYWORD_LIST_LENGTH(name) (const uint8_t) (sizeof ((name)) / sizeof ((name)[0])) /** @} */ +/** + * Length of the shortest keyword. + */ +#define LEXER_KEYWORD_MIN_LENGTH 2 + +/** + * Length of the longest keyword. + */ +#define LEXER_KEYWORD_MAX_LENGTH 10 + /** * Keywords with 2 characters. */ @@ -475,6 +485,10 @@ static const keyword_string_t * const keyword_strings_list[] = keywords_with_length_10 }; +JERRY_STATIC_ASSERT (sizeof (keyword_strings_list) / sizeof (const keyword_string_t *) + == (LEXER_KEYWORD_MAX_LENGTH - LEXER_KEYWORD_MIN_LENGTH) + 1, + keyword_strings_list_size_must_equal_to_keyword_max_length_difference); + /** * List of the keyword groups length. */ @@ -510,7 +524,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ size_t length = 0; context_p->token.type = LEXER_LITERAL; - context_p->token.literal_is_reserved = false; + context_p->token.ident_is_strict_keyword = false; context_p->token.lit_location.type = LEXER_IDENT_LITERAL; context_p->token.lit_location.has_escape = false; @@ -574,21 +588,30 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ context_p->source_p = ident_start_p; context_p->token.column = context_p->column; + context_p->token.lit_location.char_p = ident_start_p; + context_p->token.lit_location.length = (prop_length_t) length; if (length > PARSER_MAXIMUM_IDENT_LENGTH) { parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_TOO_LONG); } - /* Check keywords (Only if there is no \u escape sequence in the pattern). */ + /* Check keywords. */ if (check_keywords - && !context_p->token.lit_location.has_escape - && (length >= 2 && length <= 10)) + && (length >= LEXER_KEYWORD_MIN_LENGTH && length <= LEXER_KEYWORD_MAX_LENGTH)) { - const keyword_string_t *keyword_list_p = keyword_strings_list[length - 2]; + uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH]; + + if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape)) + { + lexer_convert_ident_to_cesu8 (ident_start_p, buffer_p, (prop_length_t) length); + ident_start_p = buffer_p; + } + + const keyword_string_t *keyword_list_p = keyword_strings_list[length - LEXER_KEYWORD_MIN_LENGTH]; int start = 0; - int end = keyword_lengths_list[length - 2]; + int end = keyword_lengths_list[length - LEXER_KEYWORD_MIN_LENGTH]; int middle = end / 2; do @@ -607,6 +630,11 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ #if ENABLED (JERRY_ES2015) if (keyword_p->type == LEXER_KEYW_YIELD && (context_p->status_flags & PARSER_IS_GENERATOR_FUNCTION)) { + if (ident_start_p == buffer_p) + { + parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD); + } + if (context_p->status_flags & PARSER_DISALLOW_YIELD) { parser_raise_error (context_p, PARSER_ERR_YIELD_NOT_ALLOWED); @@ -622,10 +650,15 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ parser_raise_error (context_p, PARSER_ERR_STRICT_IDENT_NOT_ALLOWED); } - context_p->token.literal_is_reserved = true; + context_p->token.ident_is_strict_keyword = true; break; } + if (ident_start_p == buffer_p) + { + parser_raise_error (context_p, PARSER_ERR_INVALID_KEYWORD); + } + context_p->token.type = (uint8_t) keyword_p->type; break; } @@ -646,13 +679,6 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */ while (start < end); } - if (context_p->token.type == LEXER_LITERAL) - { - /* Fill literal data. */ - context_p->token.lit_location.char_p = ident_start_p; - context_p->token.lit_location.length = (prop_length_t) length; - } - context_p->source_p = source_p; context_p->column = column; } /* lexer_parse_identifier */ @@ -964,7 +990,7 @@ lexer_parse_number (parser_context_t *context_p) /**< context */ size_t length; context_p->token.type = LEXER_LITERAL; - context_p->token.literal_is_reserved = false; + context_p->token.ident_is_strict_keyword = false; context_p->token.extra_value = LEXER_NUMBER_DECIMAL; context_p->token.lit_location.char_p = source_p; context_p->token.lit_location.type = LEXER_NUMBER_LITERAL; @@ -1688,9 +1714,9 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */ * Convert an ident with escapes to a utf8 string. */ void -lexer_convert_ident_to_utf8 (const uint8_t *source_p, /**< source string */ - uint8_t *destination_p, /**< destination string */ - prop_length_t length) /**< length of destination string */ +lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */ + uint8_t *destination_p, /**< destination string */ + prop_length_t length) /**< length of destination string */ { const uint8_t *destination_end_p = destination_p + length; @@ -1712,7 +1738,7 @@ lexer_convert_ident_to_utf8 (const uint8_t *source_p, /**< source string */ *destination_p++ = *source_p++; } while (destination_p < destination_end_p); -} /* lexer_convert_ident_to_utf8 */ +} /* lexer_convert_ident_to_cesu8 */ /** * Construct a literal object from an identifier. @@ -1748,7 +1774,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */ if (literal_p->type == LEXER_IDENT_LITERAL) { - lexer_convert_ident_to_utf8 (source_p, destination_start_p, literal_p->length); + lexer_convert_ident_to_cesu8 (source_p, destination_start_p, literal_p->length); } else { @@ -2412,7 +2438,7 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */ literal_p->u.bytecode_p = (ecma_compiled_code_t *) re_bytecode_p; context_p->token.type = LEXER_LITERAL; - context_p->token.literal_is_reserved = false; + context_p->token.ident_is_strict_keyword = false; context_p->token.lit_location.type = LEXER_REGEXP_LITERAL; context_p->lit_object.literal_p = literal_p; @@ -2479,7 +2505,7 @@ lexer_expect_identifier (parser_context_t *context_p, /**< context */ /* When parsing default exports for modules, it is not required by functions or classes to have identifiers. * In this case we use a synthetic name for them. */ context_p->token.type = LEXER_LITERAL; - context_p->token.literal_is_reserved = false; + context_p->token.ident_is_strict_keyword = false; context_p->token.lit_location.type = LEXER_IDENT_LITERAL; context_p->token.lit_location.has_escape = false; lexer_construct_literal_object (context_p, &lexer_default_literal, literal_type); diff --git a/jerry-core/parser/js/js-lexer.h b/jerry-core/parser/js/js-lexer.h index 64ca8f3be..fec1b2f08 100644 --- a/jerry-core/parser/js/js-lexer.h +++ b/jerry-core/parser/js/js-lexer.h @@ -288,8 +288,7 @@ typedef struct typedef struct { uint8_t type; /**< token type */ - uint8_t literal_is_reserved; /**< future reserved keyword - * (when char_literal.type is LEXER_IDENT_LITERAL) */ + uint8_t ident_is_strict_keyword; /**< identifier is strict reserved keyword */ uint8_t extra_value; /**< helper value for different purposes */ uint8_t flags; /**< flag bits for the current token */ parser_line_counter_t line; /**< token start line */ diff --git a/jerry-core/parser/js/js-parser-expr.c b/jerry-core/parser/js/js-parser-expr.c index b594f2ece..8d484286a 100644 --- a/jerry-core/parser/js/js-parser-expr.c +++ b/jerry-core/parser/js/js-parser-expr.c @@ -1054,7 +1054,7 @@ parser_parse_function_expression (parser_context_t *context_p, /**< context */ } #endif /* ENABLED (JERRY_DEBUGGER) */ - if (context_p->token.literal_is_reserved + if (context_p->token.ident_is_strict_keyword || context_p->lit_object.type != LEXER_LITERAL_OBJECT_ANY) { status_flags |= PARSER_HAS_NON_STRICT_ARG; diff --git a/jerry-core/parser/js/js-parser-internal.h b/jerry-core/parser/js/js-parser-internal.h index 43c532464..99254f91d 100644 --- a/jerry-core/parser/js/js-parser-internal.h +++ b/jerry-core/parser/js/js-parser-internal.h @@ -637,7 +637,7 @@ void lexer_parse_string (parser_context_t *context_p); void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type); void lexer_scan_identifier (parser_context_t *context_p, uint32_t ident_opts); ecma_char_t lexer_hex_to_character (parser_context_t *context_p, const uint8_t *source_p, int length); -void lexer_convert_ident_to_utf8 (const uint8_t *source_p, uint8_t *destination_p, prop_length_t length); +void lexer_convert_ident_to_cesu8 (const uint8_t *source_p, uint8_t *destination_p, prop_length_t length); void lexer_expect_object_literal_id (parser_context_t *context_p, uint32_t ident_opts); void lexer_construct_literal_object (parser_context_t *context_p, const lexer_lit_location_t *literal_p, uint8_t literal_type); diff --git a/jerry-core/parser/js/js-parser-module.c b/jerry-core/parser/js/js-parser-module.c index dbefff399..e8336ce9c 100644 --- a/jerry-core/parser/js/js-parser-module.c +++ b/jerry-core/parser/js/js-parser-module.c @@ -367,7 +367,7 @@ parser_module_parse_export_clause (parser_context_t *context_p) /**< parser cont /* 15.2.3.1 The referenced binding cannot be a reserved word. */ if (context_p->token.type != LEXER_LITERAL || context_p->token.lit_location.type != LEXER_IDENT_LITERAL - || context_p->token.literal_is_reserved) + || context_p->token.ident_is_strict_keyword) { parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_EXPECTED); } diff --git a/jerry-core/parser/js/js-parser-util.c b/jerry-core/parser/js/js-parser-util.c index a1d6217fb..ee7361a6a 100644 --- a/jerry-core/parser/js/js-parser-util.c +++ b/jerry-core/parser/js/js-parser-util.c @@ -852,6 +852,10 @@ parser_error_to_string (parser_error_t error) /**< error code */ { return "Character cannot be part of an identifier."; } + case PARSER_ERR_INVALID_KEYWORD: + { + return "Escape sequences are not allowed in keywords."; + } case PARSER_ERR_INVALID_NUMBER: { return "Invalid number."; diff --git a/jerry-core/parser/js/js-parser.c b/jerry-core/parser/js/js-parser.c index b08976868..37a6f3730 100644 --- a/jerry-core/parser/js/js-parser.c +++ b/jerry-core/parser/js/js-parser.c @@ -1729,7 +1729,7 @@ parser_parse_function_arguments (parser_context_t *context_p, /**< context */ &context_p->token.lit_location, LEXER_IDENT_LITERAL); - if (context_p->token.literal_is_reserved + if (context_p->token.ident_is_strict_keyword || context_p->lit_object.type != LEXER_LITERAL_OBJECT_ANY) { context_p->status_flags |= PARSER_HAS_NON_STRICT_ARG; diff --git a/jerry-core/parser/js/js-parser.h b/jerry-core/parser/js/js-parser.h index 74d40d941..95f812bc0 100644 --- a/jerry-core/parser/js/js-parser.h +++ b/jerry-core/parser/js/js-parser.h @@ -48,6 +48,7 @@ typedef enum PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE, /**< invalid unicode escape sequence */ PARSER_ERR_INVALID_IDENTIFIER_START, /**< character cannot be start of an identifier */ PARSER_ERR_INVALID_IDENTIFIER_PART, /**< character cannot be part of an identifier */ + PARSER_ERR_INVALID_KEYWORD, /**< escape sequences are not allowed in keywords */ PARSER_ERR_INVALID_NUMBER, /**< invalid number literal */ PARSER_ERR_MISSING_EXPONENT, /**< missing exponent */ diff --git a/jerry-core/parser/js/js-scanner-util.c b/jerry-core/parser/js/js-scanner-util.c index 6b09dcaae..cf0c19d01 100644 --- a/jerry-core/parser/js/js-scanner-util.c +++ b/jerry-core/parser/js/js-scanner-util.c @@ -1147,7 +1147,7 @@ scanner_scope_find_let_declaration (parser_context_t *context_p, /**< context */ { uint8_t *destination_p = (uint8_t *) scanner_malloc (context_p, literal_p->length); - lexer_convert_ident_to_utf8 (literal_p->char_p, destination_p, literal_p->length); + lexer_convert_ident_to_cesu8 (literal_p->char_p, destination_p, literal_p->length); name_p = ecma_new_ecma_string_from_utf8 (destination_p, literal_p->length); scanner_free (destination_p, literal_p->length); diff --git a/tests/jerry/keyword.js b/tests/jerry/keyword.js new file mode 100644 index 000000000..558d2533a --- /dev/null +++ b/tests/jerry/keyword.js @@ -0,0 +1,47 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +function check_syntax_error(code) +{ + try { + eval(code) + assert(false) + } catch (e) { + assert(e instanceof SyntaxError) + } +} + +function check_strict_syntax_error(code) +{ + "use strict" + + try { + eval(code) + assert(false) + } catch (e) { + assert(e instanceof SyntaxError) + } +} + +check_syntax_error("d\\u006f {} while (false)") +check_syntax_error("\\u0076\\u0061\\u0072 var = 5") +check_syntax_error("wit\\u0068 ({}) {}") +check_syntax_error("\\u0066alse") +check_syntax_error("type\\006ff 3.14") +check_syntax_error("try {} fin\\u0061lly {}") +check_syntax_error("f\\u0075nction f() {}") +check_syntax_error("a instanc\\u0065of b") + +check_strict_syntax_error("\\u006c\\u0065\\u0074 _let = 5"); +check_strict_syntax_error("\\u0070rotecte\\u0064");