From d39a076b2e7fbca22da9a109195641c81089fa50 Mon Sep 17 00:00:00 2001 From: kisbg Date: Fri, 24 Jul 2020 13:42:57 +0200 Subject: [PATCH] Added RegExp dotAll flag (#4000) JerryScript-DCO-1.0-Signed-off-by: bence gabor kis kisbg@inf.u-szeged.hu --- jerry-core/ecma/base/ecma-helpers.c | 3 +- .../ecma-builtin-regexp-prototype.c | 7 ++- .../ecma-builtin-regexp-prototype.inc.h | 4 ++ .../ecma/operations/ecma-regexp-object.c | 18 +++++++- .../ecma/operations/ecma-regexp-object.h | 3 +- jerry-core/lit/lit-magic-strings.inc.h | 3 ++ jerry-core/lit/lit-magic-strings.ini | 1 + jerry-core/parser/js/js-lexer.c | 6 +++ jerry-core/parser/regexp/re-bytecode.h | 5 -- jerry-core/parser/regexp/re-compiler.c | 2 +- tests/jerry/es.next/regexp-dotAll.js | 46 +++++++++++++++++++ 11 files changed, 87 insertions(+), 11 deletions(-) create mode 100644 tests/jerry/es.next/regexp-dotAll.js diff --git a/jerry-core/ecma/base/ecma-helpers.c b/jerry-core/ecma/base/ecma-helpers.c index b4e8b73dd..1f7ee990a 100644 --- a/jerry-core/ecma/base/ecma-helpers.c +++ b/jerry-core/ecma/base/ecma-helpers.c @@ -1357,7 +1357,8 @@ void ecma_bytecode_deref (ecma_compiled_code_t *bytecode_p) /**< byte code pointer */ { JERRY_ASSERT (bytecode_p->refs > 0); - JERRY_ASSERT (!(bytecode_p->status_flags & CBC_CODE_FLAGS_STATIC_FUNCTION)); + JERRY_ASSERT (!CBC_IS_FUNCTION (bytecode_p->status_flags) + || !(bytecode_p->status_flags & CBC_CODE_FLAGS_STATIC_FUNCTION)); bytecode_p->refs--; diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c index e02010250..d01fca15f 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c @@ -55,6 +55,7 @@ enum ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE, ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_STICKY, ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_UNICODE, + ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL, #endif /* ENABLED (JERRY_ESNEXT) */ #if ENABLED (JERRY_BUILTIN_ANNEXB) ECMA_REGEXP_PROTOTYPE_ROUTINE_COMPILE, @@ -107,7 +108,8 @@ ecma_builtin_regexp_prototype_flags_helper (ecma_extended_object_t *re_obj_p, /* RE_FLAG_IGNORE_CASE, RE_FLAG_MULTILINE, RE_FLAG_STICKY, - RE_FLAG_UNICODE + RE_FLAG_UNICODE, + RE_FLAG_DOTALL, }; uint16_t offset = (uint16_t) (builtin_routine_id - ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_GLOBAL); @@ -133,6 +135,7 @@ ecma_builtin_regexp_prototype_get_flags (ecma_object_t *object_p) /**< this obje LIT_MAGIC_STRING_GLOBAL, LIT_MAGIC_STRING_IGNORECASE_UL, LIT_MAGIC_STRING_MULTILINE, + LIT_MAGIC_STRING_DOTALL, LIT_MAGIC_STRING_UNICODE, LIT_MAGIC_STRING_STICKY }; @@ -142,6 +145,7 @@ ecma_builtin_regexp_prototype_get_flags (ecma_object_t *object_p) /**< this obje LIT_CHAR_LOWERCASE_G, LIT_CHAR_LOWERCASE_I, LIT_CHAR_LOWERCASE_M, + LIT_CHAR_LOWERCASE_S, LIT_CHAR_LOWERCASE_U, LIT_CHAR_LOWERCASE_Y }; @@ -607,6 +611,7 @@ ecma_builtin_regexp_prototype_dispatch_routine (uint16_t builtin_routine_id, /** case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE: case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_STICKY: case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_UNICODE: + case ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL: { ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) obj_p; diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h index 5b8662d1a..3d023cb53 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -43,6 +43,10 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_IGNORECASE_UL, ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_IGNORE_CASE, ECMA_PROPERTY_FLAG_CONFIGURABLE) +ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_DOTALL, + ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_DOT_ALL, + ECMA_PROPERTY_FLAG_CONFIGURABLE) + ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_MULTILINE, ECMA_REGEXP_PROTOTYPE_ROUTINE_GET_MULTILINE, ECMA_PROPERTY_FLAG_CONFIGURABLE) diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index bdc431693..161204f24 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -98,6 +98,13 @@ ecma_regexp_parse_flags (ecma_string_t *flags_str_p, /**< Input string with flag flag = RE_FLAG_UNICODE; break; } +#if ENABLED (JERRY_ESNEXT) + case 's': + { + flag = RE_FLAG_DOTALL; + break; + } +#endif /* ENABLED (JERRY_ESNEXT) */ default: { flag = RE_FLAG_EMPTY; @@ -1505,7 +1512,9 @@ class_found: const lit_code_point_t cp = ecma_regexp_unicode_advance (&str_curr_p, re_ctx_p->input_end_p); - if (JERRY_UNLIKELY (cp <= LIT_UTF16_CODE_UNIT_MAX && lit_char_is_line_terminator ((ecma_char_t) cp))) + if (!(re_ctx_p->flags & RE_FLAG_DOTALL) + && JERRY_UNLIKELY (cp <= LIT_UTF16_CODE_UNIT_MAX + && lit_char_is_line_terminator ((ecma_char_t) cp))) { goto fail; } @@ -1521,8 +1530,13 @@ class_found: } const ecma_char_t ch = lit_cesu8_read_next (&str_curr_p); +#if !ENABLED (JERRY_ESNEXT) + bool has_dot_all_flag = false; +#else /* ENABLED (JERRY_ESNEXT) */ + bool has_dot_all_flag = (re_ctx_p->flags & RE_FLAG_DOTALL) != 0; +#endif /* !ENABLED (JERRY_ESNEXT) */ - if (lit_char_is_line_terminator (ch)) + if (!has_dot_all_flag && lit_char_is_line_terminator (ch)) { goto fail; } diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index fdb33b202..714ba04e1 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -40,7 +40,8 @@ typedef enum RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */ RE_FLAG_MULTILINE = (1u << 3), /**< ECMA-262 v5, 15.10.7.4 */ RE_FLAG_STICKY = (1u << 4), /**< ECMA-262 v6, 21.2.5.12 */ - RE_FLAG_UNICODE = (1u << 5) /**< ECMA-262 v6, 21.2.5.15 */ + RE_FLAG_UNICODE = (1u << 5), /**< ECMA-262 v6, 21.2.5.15 */ + RE_FLAG_DOTALL = (1u << 6) /**< ECMA-262 v9, 21.2.5.3 */ /* Bits from bit 13 is reserved for function types (see CBC_FUNCTION_TYPE_SHIFT). */ } ecma_regexp_flags_t; diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index f2bd8610a..bf9a1cc84 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -338,6 +338,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CREATE, "create") || ENABLED (JERRY_BUILTIN_WEAKSET) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DELETE, "delete") #endif +#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ESNEXT) +LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DOTALL, "dotAll") +#endif #if ENABLED (JERRY_BUILTIN_ANNEXB) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape") #endif diff --git a/jerry-core/lit/lit-magic-strings.ini b/jerry-core/lit/lit-magic-strings.ini index d2a48b8f0..6778a8865 100644 --- a/jerry-core/lit/lit-magic-strings.ini +++ b/jerry-core/lit/lit-magic-strings.ini @@ -182,6 +182,7 @@ LIT_MAGIC_STRING_SQRT1_2_U = "SQRT1_2" LIT_MAGIC_STRING_BOOLEAN = "boolean" LIT_MAGIC_STRING_COMPILE = "compile" LIT_MAGIC_STRING_DEFAULT = "default" +LIT_MAGIC_STRING_DOTALL = "dotAll" LIT_MAGIC_STRING_FOR_EACH_UL = "forEach" LIT_MAGIC_STRING_GET_DATE_UL = "getDate" LIT_MAGIC_STRING_GET_INT8_UL = "getInt8" diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c index 3dd07885a..e6da94389 100644 --- a/jerry-core/parser/js/js-lexer.c +++ b/jerry-core/parser/js/js-lexer.c @@ -2816,6 +2816,12 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */ { flag = RE_FLAG_STICKY; } +#if ENABLED (JERRY_ESNEXT) + else if (source_p[0] == LIT_CHAR_LOWERCASE_S) + { + flag = RE_FLAG_DOTALL; + } +#endif /* ENABLED (JERRY_ESNEXT) */ if (flag == 0) { diff --git a/jerry-core/parser/regexp/re-bytecode.h b/jerry-core/parser/regexp/re-bytecode.h index 2466faef9..61d2a7adb 100644 --- a/jerry-core/parser/regexp/re-bytecode.h +++ b/jerry-core/parser/regexp/re-bytecode.h @@ -36,11 +36,6 @@ */ #define RE_CACHE_SIZE 8u -/** - * RegExp flags mask (first 10 bits are for reference count and the rest for the actual RegExp flags) - */ -#define RE_FLAGS_MASK 0x3F - /** * Maximum value that can be encoded in the RegExp bytecode as a single byte. */ diff --git a/jerry-core/parser/regexp/re-compiler.c b/jerry-core/parser/regexp/re-compiler.c index c28fd1700..0202299f7 100644 --- a/jerry-core/parser/regexp/re-compiler.c +++ b/jerry-core/parser/regexp/re-compiler.c @@ -61,7 +61,7 @@ re_cache_lookup (ecma_string_t *pattern_str_p, /**< pattern string */ ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source); - if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags + if (cached_bytecode_p->header.status_flags == flags && ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p)) { return cached_bytecode_p; diff --git a/tests/jerry/es.next/regexp-dotAll.js b/tests/jerry/es.next/regexp-dotAll.js new file mode 100644 index 000000000..7432e2ace --- /dev/null +++ b/tests/jerry/es.next/regexp-dotAll.js @@ -0,0 +1,46 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var str1 = 'bar\nexample foo example'; +var str2 = 'bare\nxample foo example'; +var regex_with_dotAll_flag = new RegExp ('bar.example','s'); +var regex_without_dotAll_flag = new RegExp ('bar.example'); + +// testing regexp.prototype.dotAll +assert (regex_with_dotAll_flag.dotAll == true); +assert (regex_without_dotAll_flag.dotAll == false); + +// basic dotAll flag test +assert (str1.replace (regex_with_dotAll_flag,'') == " foo example"); +assert (str1.replace (regex_without_dotAll_flag,'') == str1); +assert (str2.replace (regex_with_dotAll_flag, "") == str2); + +// testing dotAll with other flag +for (let re of [/^.$/su, /^.$/sum]) { + assert (re.test("a")); + assert (re.test("3")); + assert (re.test("π")); + assert (re.test("\u2027")); + assert (re.test("\u0085")); + assert (re.test("\v")); + assert (re.test("\f")); + assert (re.test("\u180E")); + assert (re.test("\u{10300}")); + assert (re.test("\n")); + assert (re.test("\r")); + assert (re.test("\u2028")); + assert (re.test("\u2029")); + assert (re.test("\uD800")); + assert (re.test("\uDFFF")); +}