From 279d4d41199bb9f7c01db1b8b46ca7f950ed43ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=C3=A1tyai?= Date: Fri, 22 Nov 2019 14:04:03 +0100 Subject: [PATCH] Add handling for RegExp unicode and sticky flags (#3341) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu --- .../ecma-builtin-regexp-prototype.c | 157 +++++++++++++++--- .../ecma-builtin-regexp-prototype.inc.h | 8 + .../ecma/operations/ecma-regexp-object.c | 44 +++-- .../ecma/operations/ecma-regexp-object.h | 4 +- jerry-core/lit/lit-magic-strings.inc.h | 6 + jerry-core/lit/lit-magic-strings.ini | 2 + tests/jerry/es2015/regexp-construct.js | 21 +++ tests/jerry/es2015/regexp-routines.js | 86 ++++++++++ tests/jerry/es5.1/regexp-routines.js | 20 +++ tests/jerry/regexp-routines.js | 9 - 10 files changed, 310 insertions(+), 47 deletions(-) create mode 100644 tests/jerry/es2015/regexp-routines.js create mode 100644 tests/jerry/es5.1/regexp-routines.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c index c211b44d7..e73f06a91 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c @@ -91,31 +91,50 @@ ecma_builtin_regexp_prototype_flags_helper (ecma_value_t this, /**< this value * static ecma_value_t ecma_builtin_regexp_prototype_get_flags (ecma_value_t this_arg) /**< this argument */ { - uint16_t flags = RE_FLAG_EMPTY; - ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags); - if (ECMA_IS_VALUE_ERROR (ret_value)) + static const lit_magic_string_id_t flag_lit_ids[] = { - return ret_value; + LIT_MAGIC_STRING_GLOBAL, + LIT_MAGIC_STRING_IGNORECASE_UL, + LIT_MAGIC_STRING_MULTILINE, + LIT_MAGIC_STRING_UNICODE, + LIT_MAGIC_STRING_STICKY + }; + + static const lit_utf8_byte_t flag_chars[] = + { + LIT_CHAR_LOWERCASE_G, + LIT_CHAR_LOWERCASE_I, + LIT_CHAR_LOWERCASE_M, + LIT_CHAR_LOWERCASE_U, + LIT_CHAR_LOWERCASE_Y + }; + + if (!ecma_is_value_object (this_arg)) + { + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' value is not an object.")); } - ecma_stringbuilder_t result = ecma_stringbuilder_create (); + ecma_object_t *object_p = ecma_get_object_from_value (this_arg); - if (flags & RE_FLAG_GLOBAL) + ecma_stringbuilder_t builder = ecma_stringbuilder_create (); + for (uint32_t i = 0; i < sizeof (flag_lit_ids) / sizeof (lit_magic_string_id_t); i++) { - ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_G); + ecma_value_t result = ecma_op_object_get_by_magic_id (object_p, flag_lit_ids[i]); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_stringbuilder_destroy (&builder); + return result; + } + + if (ecma_op_to_boolean (result)) + { + ecma_stringbuilder_append_byte (&builder, flag_chars[i]); + } + + ecma_free_value (result); } - if (flags & RE_FLAG_IGNORE_CASE) - { - ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_I); - } - - if (flags & RE_FLAG_MULTILINE) - { - ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_M); - } - - return ecma_make_string_value (ecma_stringbuilder_finalize (&result)); + return ecma_make_string_value (ecma_stringbuilder_finalize (&builder)); } /* ecma_builtin_regexp_prototype_get_flags */ /** @@ -224,6 +243,56 @@ ecma_builtin_regexp_prototype_get_multiline (ecma_value_t this_arg) /**< this ar return ecma_make_boolean_value (flags & RE_FLAG_MULTILINE); } /* ecma_builtin_regexp_prototype_get_multiline */ + +/** + * The RegExp.prototype object's 'sticky' accessor property + * + * See also: + * ECMA-262 v6, 21.2.5.12 + * + * @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object + * ECMA_VALUE_TRUE - if 'sticky' flag is set + * ECMA_VALUE_FALSE - otherwise + * + * Returned value must be freed with ecma_free_value. + */ +static ecma_value_t +ecma_builtin_regexp_prototype_get_sticky (ecma_value_t this_arg) /**< this argument */ +{ + uint16_t flags = RE_FLAG_EMPTY; + ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags); + if (ECMA_IS_VALUE_ERROR (ret_value)) + { + return ret_value; + } + + return ecma_make_boolean_value (flags & RE_FLAG_STICKY); +} /* ecma_builtin_regexp_prototype_get_sticky */ + +/** + * The RegExp.prototype object's 'unicode' accessor property + * + * See also: + * ECMA-262 v6, 21.2.5.15 + * + * @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object + * ECMA_VALUE_TRUE - if 'unicode' flag is set + * ECMA_VALUE_FALSE - otherwise + * + * Returned value must be freed with ecma_free_value. + */ +static ecma_value_t +ecma_builtin_regexp_prototype_get_unicode (ecma_value_t this_arg) /**< this argument */ +{ + uint16_t flags = RE_FLAG_EMPTY; + ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags); + if (ECMA_IS_VALUE_ERROR (ret_value)) + { + return ret_value; + } + + return ecma_make_boolean_value (flags & RE_FLAG_UNICODE); +} /* ecma_builtin_regexp_prototype_get_unicode */ #endif /* ENABLED (JERRY_ES2015) */ #if ENABLED (JERRY_BUILTIN_ANNEXB) @@ -419,9 +488,58 @@ ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */ static ecma_value_t ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argument */ { +#if ENABLED (JERRY_ES2015) + if (!ecma_is_value_object (this_arg)) + { + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' value is not an object.")); + } + + ecma_object_t *object_p = ecma_get_object_from_value (this_arg); + + ecma_value_t result = ecma_op_object_get_by_magic_id (object_p, LIT_MAGIC_STRING_SOURCE); + if (ECMA_IS_VALUE_ERROR (result)) + { + return result; + } + + ecma_string_t *source_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (source_p == NULL) + { + return ECMA_VALUE_ERROR; + } + + result = ecma_op_object_get_by_magic_id (object_p, LIT_MAGIC_STRING_FLAGS); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_ecma_string (source_p); + return result; + } + + ecma_string_t *flags_p = ecma_op_to_string (result); + ecma_free_value (result); + + if (flags_p == NULL) + { + ecma_deref_ecma_string (source_p); + return ECMA_VALUE_ERROR; + } + + ecma_stringbuilder_t builder = ecma_stringbuilder_create (); + ecma_stringbuilder_append_byte (&builder, LIT_CHAR_SLASH); + ecma_stringbuilder_append (&builder, source_p); + ecma_stringbuilder_append_byte (&builder, LIT_CHAR_SLASH); + ecma_stringbuilder_append (&builder, flags_p); + + ecma_deref_ecma_string (source_p); + ecma_deref_ecma_string (flags_p); + + return ecma_make_string_value (ecma_stringbuilder_finalize (&builder)); +#else /* !ENABLED (JERRY_ES2015) */ if (!ecma_object_is_regexp_object (this_arg)) { - return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type")); + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' value is not a RegExp object.")); } ecma_object_t *obj_p = ecma_get_object_from_value (this_arg); @@ -465,6 +583,7 @@ ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argume } return ecma_make_string_value (ecma_stringbuilder_finalize (&result)); +#endif /* ENABLED (JERRY_ES2015) */ } /* ecma_builtin_regexp_prototype_to_string */ #if ENABLED (JERRY_ES2015) diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h index 05f822d96..2f6713f56 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -47,6 +47,14 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_MULTILINE, ecma_builtin_regexp_prototype_get_multiline, ECMA_PROPERTY_FIXED) +ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_UNICODE, + ecma_builtin_regexp_prototype_get_unicode, + ECMA_PROPERTY_FIXED) + +ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_STICKY, + ecma_builtin_regexp_prototype_get_sticky, + ECMA_PROPERTY_FIXED) + ROUTINE (LIT_GLOBAL_SYMBOL_REPLACE, ecma_builtin_regexp_prototype_symbol_replace, 2, 2) #else /* !ENABLED (JERRY_ES2015) */ /* ECMA-262 v5, 15.10.7.1 */ diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index 803323161..9354a58c2 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -66,54 +66,62 @@ ecma_regexp_parse_flags (ecma_string_t *flags_str_p, /**< Input string with flag uint16_t *flags_p) /**< [out] parsed flag bits */ { ecma_value_t ret_value = ECMA_VALUE_EMPTY; + uint16_t result_flags = RE_FLAG_EMPTY; ECMA_STRING_TO_UTF8_STRING (flags_str_p, flags_start_p, flags_start_size); const lit_utf8_byte_t *flags_str_curr_p = flags_start_p; const lit_utf8_byte_t *flags_str_end_p = flags_start_p + flags_start_size; - while (flags_str_curr_p < flags_str_end_p - && ecma_is_value_empty (ret_value)) + while (flags_str_curr_p < flags_str_end_p) { + ecma_regexp_flags_t flag; switch (*flags_str_curr_p++) { case 'g': { - if (*flags_p & RE_FLAG_GLOBAL) - { - ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags.")); - } - *flags_p |= RE_FLAG_GLOBAL; + flag = RE_FLAG_GLOBAL; break; } case 'i': { - if (*flags_p & RE_FLAG_IGNORE_CASE) - { - ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags.")); - } - *flags_p |= RE_FLAG_IGNORE_CASE; + flag = RE_FLAG_IGNORE_CASE; break; } case 'm': { - if (*flags_p & RE_FLAG_MULTILINE) - { - ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags.")); - } - *flags_p |= RE_FLAG_MULTILINE; + flag = RE_FLAG_MULTILINE; + break; + } + case 'y': + { + flag = RE_FLAG_STICKY; + break; + } + case 'u': + { + flag = RE_FLAG_UNICODE; break; } default: { - ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags.")); + flag = RE_FLAG_EMPTY; break; } } + + if (flag == RE_FLAG_EMPTY || (result_flags & flag) != 0) + { + ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags.")); + break; + } + + result_flags = (uint16_t) (result_flags | flag); } ECMA_FINALIZE_UTF8_STRING (flags_start_p, flags_start_size); + *flags_p = result_flags; return ret_value; } /* ecma_regexp_parse_flags */ diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index a4d507d82..4cdd353e8 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -38,7 +38,9 @@ typedef enum RE_FLAG_EMPTY = 0u, /* Empty RegExp flags */ RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */ RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */ - RE_FLAG_MULTILINE = (1u << 3) /**< ECMA-262 v5, 15.10.7.4 */ + RE_FLAG_MULTILINE = (1u << 3), /**< ECMA-262 v5, 15.10.7.4 */ + RE_FLAG_STICKY = (1u << 4), /**< ECMA-262 v6, 21.2.5.12 */ + RE_FLAG_UNICODE = (1u << 5) /**< ECMA-262 v6, 21.2.5.15 */ } ecma_regexp_flags_t; /** diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index cee2de28b..6f58950c5 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -337,6 +337,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOURCE, "source") #if ENABLED (JERRY_BUILTIN_ARRAY) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPLICE, "splice") #endif +#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) +LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STICKY, "sticky") +#endif LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_STRING, "string") #if ENABLED (JERRY_BUILTIN_ANNEXB) && ENABLED (JERRY_BUILTIN_STRING) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SUBSTR, "substr") @@ -449,6 +452,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SPECIES, "species") #if ENABLED (JERRY_BUILTIN_NUMBER) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_TO_FIXED_UL, "toFixed") #endif +#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) +LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNICODE, "unicode") +#endif #if ENABLED (JERRY_BUILTIN_ARRAY) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_UNSHIFT, "unshift") #endif diff --git a/jerry-core/lit/lit-magic-strings.ini b/jerry-core/lit/lit-magic-strings.ini index 7d337f8b2..00d6ec721 100644 --- a/jerry-core/lit/lit-magic-strings.ini +++ b/jerry-core/lit/lit-magic-strings.ini @@ -149,6 +149,7 @@ LIT_MAGIC_STRING_SEARCH = "search" LIT_MAGIC_STRING_SOURCE = "source" LIT_MAGIC_STRING_SPLICE = "splice" LIT_MAGIC_STRING_STRING = "string" +LIT_MAGIC_STRING_STICKY = "sticky" LIT_MAGIC_STRING_SYMBOL = "symbol" LIT_MAGIC_STRING_SUBSTR = "substr" LIT_MAGIC_STRING_ENTRIES = "entries" @@ -180,6 +181,7 @@ LIT_MAGIC_STRING_SET_INT8_UL = "setInt8" LIT_MAGIC_STRING_SET_YEAR_UL = "setYear" LIT_MAGIC_STRING_SPECIES = "species" LIT_MAGIC_STRING_TO_FIXED_UL = "toFixed" +LIT_MAGIC_STRING_UNICODE = "unicode" LIT_MAGIC_STRING_UNSHIFT = "unshift" LIT_MAGIC_STRING_VALUE_OF_UL = "valueOf" LIT_MAGIC_STRING_WEAKMAP_UL = "WeakMap" diff --git a/tests/jerry/es2015/regexp-construct.js b/tests/jerry/es2015/regexp-construct.js index 48599a931..199a3327d 100644 --- a/tests/jerry/es2015/regexp-construct.js +++ b/tests/jerry/es2015/regexp-construct.js @@ -28,3 +28,24 @@ try { } catch (e) { assert(e === 5); } + +r = new RegExp ("a","gimuy"); +assert (r.global === true); +assert (r.ignoreCase === true); +assert (r.multiline === true); +assert (r.unicode === true); +assert (r.sticky === true); + +try { + new RegExp ("a", "uu"); + assert (false); +} catch (e) { + assert (e instanceof SyntaxError); +} + +try { + new RegExp ("a", "yy"); + assert (false); +} catch (e) { + assert (e instanceof SyntaxError); +} diff --git a/tests/jerry/es2015/regexp-routines.js b/tests/jerry/es2015/regexp-routines.js new file mode 100644 index 000000000..268ec5526 --- /dev/null +++ b/tests/jerry/es2015/regexp-routines.js @@ -0,0 +1,86 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var r = new RegExp('a', 'gimuy'); +assert (r.flags === 'gimuy'); +assert (r.toString() === '/a/gimuy'); + +try { + Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags').get.call(42); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +var o = { + global: true, + unicode: true, + sticky: true, + source: "str" +} + +Object.defineProperty(o, 'flags', Object.getOwnPropertyDescriptor(RegExp.prototype, 'flags')); +assert(o.flags === "guy"); +assert (RegExp.prototype.toString.call (o) === "/str/guy"); + +Object.defineProperty(o, 'multiline', { 'get': function () {throw "abrupt flag get"; }}); +try { + o.flags + assert (false); +} catch (e) { + assert (e === "abrupt flag get"); +} + +try { + RegExp.prototype.toString.call(42); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +assert (RegExp.prototype.toString.call({}) === "/undefined/undefined"); + +var o = {}; +Object.defineProperty (o, 'source', { 'get' : function () {throw "abrupt source get"; } }); +try { + RegExp.prototype.toString.call(o); + assert (false); +} catch (e) { + assert (e === "abrupt source get"); +} + +var o = {source: {toString: function() {throw "abrupt source toString";}}}; +try { + RegExp.prototype.toString.call(o); + assert (false); +} catch (e) { + assert (e === "abrupt source toString"); +} + +var o = {source: "str"}; +Object.defineProperty (o, 'flags', { 'get' : function () {throw "abrupt flags get"; } }); +try { + RegExp.prototype.toString.call(o); + assert (false); +} catch (e) { + assert (e === "abrupt flags get"); +} + +var o = {source: "str", flags: {toString: function() {throw "abrupt flags toString";}}}; +try { + RegExp.prototype.toString.call(o); + assert (false); +} catch (e) { + assert (e === "abrupt flags toString"); +} diff --git a/tests/jerry/es5.1/regexp-routines.js b/tests/jerry/es5.1/regexp-routines.js new file mode 100644 index 000000000..fdd864c5c --- /dev/null +++ b/tests/jerry/es5.1/regexp-routines.js @@ -0,0 +1,20 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +try { + RegExp.prototype.toString.call ({}); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} diff --git a/tests/jerry/regexp-routines.js b/tests/jerry/regexp-routines.js index 00e328a2b..33b533d92 100644 --- a/tests/jerry/regexp-routines.js +++ b/tests/jerry/regexp-routines.js @@ -39,15 +39,6 @@ catch (e) r = new RegExp ("a", "mig"); assert (r.toString () == "/a/gim"); -try { - r.toString.call({}, "a"); - assert (false) -} -catch (e) -{ - assert (e instanceof TypeError); -} - /* Test continous calls to the exec method to see how does the match * updates the lastIndex propertyand see if the match restarts.