From 7c0b1ca88a2a0d317b6cbe9aee62d6f2fb494015 Mon Sep 17 00:00:00 2001 From: Szilagyi Adam Date: Tue, 10 Dec 2019 14:40:41 +0100 Subject: [PATCH] Update String.prototype.match to ECMA-262 v6 (#3375) The algorithm is based on ECMA-262 v6, 21.1.3.11 JerryScript-DCO-1.0-Signed-off-by: Adam Szilagyi aszilagy@inf.u-szeged.hu --- .../ecma-builtin-regexp-prototype.c | 159 +----------- .../ecma-builtin-string-prototype.c | 229 ++++++------------ .../ecma/operations/ecma-regexp-object.c | 186 ++++++++++++++ .../ecma/operations/ecma-regexp-object.h | 4 + tests/jerry/es2015/string-prototype-match.js | 82 +++++++ 5 files changed, 343 insertions(+), 317 deletions(-) create mode 100644 tests/jerry/es2015/string-prototype-match.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c index 0bee3834e..6ff749936 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c @@ -630,164 +630,7 @@ static ecma_value_t ecma_builtin_regexp_prototype_symbol_match (ecma_value_t this_arg, /**< this argument */ ecma_value_t string_arg) /**< source string */ { - if (!ecma_is_value_object (this_arg)) - { - return ecma_raise_type_error (ECMA_ERR_MSG ("TypeError")); - } - - ecma_string_t *str_p = ecma_op_to_string (string_arg); - - if (JERRY_UNLIKELY (str_p == NULL)) - { - return ECMA_VALUE_ERROR; - } - - ecma_object_t *obj_p = ecma_get_object_from_value (this_arg); - - ecma_value_t global_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_GLOBAL); - - if (ECMA_IS_VALUE_ERROR (global_value)) - { - ecma_deref_ecma_string (str_p); - return global_value; - } - - bool global = ecma_op_to_boolean (global_value); - - ecma_free_value (global_value); - - if (!global) - { - ecma_value_t result = ecma_op_regexp_exec (this_arg, str_p); - ecma_deref_ecma_string (str_p); - return result; - } - - ecma_value_t full_unicode_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_UNICODE); - - if (ECMA_IS_VALUE_ERROR (full_unicode_value)) - { - ecma_deref_ecma_string (str_p); - return full_unicode_value; - } - - bool full_unicode = ecma_op_to_boolean (full_unicode_value); - - ecma_free_value (full_unicode_value); - - ecma_value_t set_status = ecma_op_object_put (obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_uint32_value (0), - true); - - if (ECMA_IS_VALUE_ERROR (set_status)) - { - ecma_deref_ecma_string (str_p); - return set_status; - } - - ecma_value_t ret_value = ECMA_VALUE_ERROR; - ecma_value_t result_array = ecma_op_create_array_object (0, 0, false); - ecma_object_t *result_array_p = ecma_get_object_from_value (result_array); - uint32_t n = 0; - - while (true) - { - ecma_value_t result_value = ecma_op_regexp_exec (this_arg, str_p); - - if (ECMA_IS_VALUE_ERROR (result_value)) - { - goto result_cleanup; - } - - if (ecma_is_value_null (result_value)) - { - if (n == 0) - { - ret_value = ECMA_VALUE_NULL; - goto result_cleanup; - } - - ecma_deref_ecma_string (str_p); - return result_array; - } - - ecma_object_t *result_value_p = ecma_get_object_from_value (result_value); - ecma_value_t match_str_value = ecma_op_object_get_by_uint32_index (result_value_p, 0); - - ecma_deref_object (result_value_p); - - if (ECMA_IS_VALUE_ERROR (match_str_value)) - { - goto result_cleanup; - } - - ecma_string_t *match_str_p = ecma_op_to_string (match_str_value); - - if (JERRY_UNLIKELY (match_str_p == NULL)) - { - ecma_free_value (match_str_value); - goto result_cleanup; - } - - ecma_value_t new_prop = ecma_builtin_helper_def_prop_by_index (result_array_p, - n, - match_str_value, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (new_prop)); - - ecma_value_t match_result = ECMA_VALUE_ERROR; - if (ecma_string_is_empty (match_str_p)) - { - ecma_value_t this_index = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_LASTINDEX_UL); - - if (ECMA_IS_VALUE_ERROR (this_index)) - { - goto match_cleanup; - } - - uint32_t index; - ecma_value_t length_value = ecma_op_to_length (this_index, &index); - - ecma_free_value (this_index); - - if (ECMA_IS_VALUE_ERROR (length_value)) - { - goto match_cleanup; - } - - uint32_t next_index = ecma_op_advance_string_index (str_p, index, full_unicode); - - ecma_value_t next_set_status = ecma_op_object_put (obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_uint32_value (next_index), - true); - - if (ECMA_IS_VALUE_ERROR (next_set_status)) - { - goto match_cleanup; - } - } - - match_result = ECMA_VALUE_EMPTY; - -match_cleanup: - ecma_deref_ecma_string (match_str_p); - ecma_free_value (match_str_value); - - if (ECMA_IS_VALUE_ERROR (match_result)) - { - goto result_cleanup; - } - - n++; - } - -result_cleanup: - ecma_deref_ecma_string (str_p); - ecma_deref_object (result_array_p); - return ret_value; + return ecma_regexp_match_helper (this_arg, string_arg); } /* ecma_builtin_regexp_prototype_symbol_match */ #endif /* ENABLED (JERRY_ES2015) */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c index 73fc6445b..e2629ad11 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c @@ -17,6 +17,7 @@ #include "ecma-array-object.h" #include "ecma-builtin-helpers.h" #include "ecma-builtins.h" +#include "ecma-builtin-regexp.inc.h" #include "ecma-conversion.h" #include "ecma-exceptions.h" #include "ecma-function-object.h" @@ -317,180 +318,90 @@ static ecma_value_t ecma_builtin_string_prototype_object_match (ecma_value_t this_to_string_value, /**< this argument */ ecma_value_t regexp_arg) /**< routine's argument */ { - ecma_value_t regexp_value = ECMA_VALUE_EMPTY; - - ecma_value_t ret_value = ecma_builtin_string_prepare_search (regexp_arg, ®exp_value); - - if (ECMA_IS_VALUE_ERROR (ret_value)) +#if ENABLED (JERRY_ES2015) + if (!(ecma_is_value_undefined (regexp_arg) || ecma_is_value_null (regexp_arg))) { - return ret_value; - } + ecma_value_t matcher = ecma_op_get_method_by_symbol_id (regexp_arg, LIT_MAGIC_STRING_MATCH); - JERRY_ASSERT (!ecma_is_value_empty (regexp_value)); - ecma_object_t *regexp_obj_p = ecma_get_object_from_value (regexp_value); - - /* 5. */ - ecma_value_t global_value = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_GLOBAL); - - if (ECMA_IS_VALUE_ERROR (global_value)) - { - ecma_deref_object (regexp_obj_p); - return global_value; - } - - ecma_string_t *this_string_p = ecma_get_string_from_value (this_to_string_value); - ecma_ref_ecma_string (this_string_p); - - JERRY_ASSERT (ecma_is_value_boolean (global_value)); - - if (ecma_is_value_false (global_value)) - { - /* 7. */ - ret_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false); - ecma_deref_ecma_string (this_string_p); - ecma_deref_object (regexp_obj_p); - return ret_value; - } - - /* 8.a. */ - ecma_string_t *index_zero_string_p = ecma_get_ecma_string_from_uint32 (0); - - ecma_value_t put_value = ecma_op_object_put (regexp_obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_integer_value (0), - true); - - JERRY_ASSERT (ecma_is_value_boolean (put_value) - || ecma_is_value_empty (put_value) - || ECMA_IS_VALUE_ERROR (put_value)); - - - if (ECMA_IS_VALUE_ERROR (put_value)) - { - ecma_deref_ecma_string (this_string_p); - ecma_deref_object (regexp_obj_p); - return put_value; - } - - /* 8.b. */ - ecma_value_t new_array_value = ecma_op_create_array_object (NULL, 0, false); - - JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (new_array_value)); - - ecma_object_t *new_array_obj_p = ecma_get_object_from_value (new_array_value); - - /* 8.c. */ - ecma_number_t previous_last_index = 0; - /* 8.d. */ - uint32_t n = 0; - /* 8.e. */ - bool last_match = true; - - ret_value = ECMA_VALUE_ERROR; - - /* 8.f. */ - while (last_match) - { - /* 8.f.i. */ - ecma_value_t exec_value = ecma_regexp_exec_helper (regexp_value, this_to_string_value, false); - - if (ECMA_IS_VALUE_ERROR (exec_value)) + if (ECMA_IS_VALUE_ERROR (matcher)) { - break; + return matcher; } - if (ecma_is_value_null (exec_value)) + if (!ecma_is_value_undefined (matcher)) { - /* 8.f.ii. */ - break; + ecma_object_t *matcher_method = ecma_get_object_from_value (matcher); + ecma_value_t result = ecma_op_function_call (matcher_method, regexp_arg, &this_to_string_value, 1); + ecma_deref_object (matcher_method); + return result; } - - /* 8.f.iii. */ - ecma_value_t this_index_value = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_LASTINDEX_UL); - - if (ECMA_IS_VALUE_ERROR (this_index_value)) - { - goto cleanup; - } - - ecma_value_t this_index_number = ecma_op_to_number (this_index_value); - - if (ECMA_IS_VALUE_ERROR (this_index_value)) - { - ecma_free_value (this_index_value); - goto cleanup; - } - - ecma_number_t this_index = ecma_get_number_from_value (this_index_number); - - /* 8.f.iii.2. */ - if (this_index == previous_last_index) - { - /* 8.f.iii.2.a. */ - ecma_value_t index_put_value = ecma_op_object_put (regexp_obj_p, - ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), - ecma_make_number_value (this_index + 1), - true); - if (ECMA_IS_VALUE_ERROR (index_put_value)) - { - ecma_free_value (this_index_value); - ecma_free_number (this_index_number); - goto cleanup; - } - - /* 8.f.iii.2.b. */ - previous_last_index = this_index + 1; - } - else - { - /* 8.f.iii.3. */ - previous_last_index = this_index; - } - - /* 8.f.iii.4. */ - JERRY_ASSERT (ecma_is_value_object (exec_value)); - ecma_object_t *exec_obj_p = ecma_get_object_from_value (exec_value); - - ecma_value_t match_string_value = ecma_op_object_get (exec_obj_p, index_zero_string_p); - JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (match_string_value)); - - /* 8.f.iii.5. */ - ecma_value_t completion; - completion = ecma_builtin_helper_def_prop_by_index (new_array_obj_p, - n, - match_string_value, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (completion)); - /* 8.f.iii.6. */ - n++; - - ecma_free_value (match_string_value); - ecma_free_value (this_index_value); - ecma_free_number (this_index_number); - - ecma_deref_object (exec_obj_p); } - - if (n == 0) +#else /* !ENABLED (JERRY_ES2015) */ + if (ecma_object_is_regexp_object (regexp_arg)) { - /* 8.g. */ - ret_value = ECMA_VALUE_NULL; + return ecma_regexp_match_helper (regexp_arg, this_to_string_value); } - else +#endif /* ENABLED (JERRY_ES2015) */ + + ecma_string_t *pattern_p = ecma_regexp_read_pattern_str_helper (regexp_arg); + + if (JERRY_UNLIKELY (pattern_p == NULL)) { - /* 8.h. */ - ecma_ref_object (new_array_obj_p); - ret_value = new_array_value; + return ECMA_VALUE_ERROR; } -cleanup: - ecma_deref_object (new_array_obj_p); + ecma_value_t new_regexp = ecma_op_create_regexp_object (pattern_p, 0); - ecma_deref_object (regexp_obj_p); - ecma_deref_ecma_string (this_string_p); + ecma_deref_ecma_string (pattern_p); - return ret_value; + if (ECMA_IS_VALUE_ERROR (new_regexp)) + { + return new_regexp; + } + +#if ENABLED (JERRY_ES2015) + ecma_object_t *new_regexp_obj = ecma_get_object_from_value (new_regexp); + + ecma_value_t func_value = ecma_op_object_get_by_symbol_id (new_regexp_obj, LIT_MAGIC_STRING_MATCH); + + if (ECMA_IS_VALUE_ERROR (func_value) || !ecma_op_is_callable (func_value)) + { + ecma_deref_object (new_regexp_obj); + + if (!ECMA_IS_VALUE_ERROR (func_value)) + { + ecma_free_value (func_value); + ecma_raise_type_error (ECMA_ERR_MSG ("@@match is not callable.")); + } + + return ECMA_VALUE_ERROR; + } + + ecma_object_t *func_obj = ecma_get_object_from_value (func_value); + + ecma_string_t *str_p = ecma_op_to_string (this_to_string_value); + + if (JERRY_UNLIKELY (str_p == NULL)) + { + ecma_deref_object (new_regexp_obj); + ecma_deref_object (func_obj); + return ECMA_VALUE_ERROR; + } + + ecma_value_t str_value = ecma_make_string_value (str_p); + + ecma_value_t result = ecma_op_function_call (func_obj, new_regexp, &str_value, 1); + + ecma_deref_ecma_string (str_p); + ecma_deref_object (new_regexp_obj); + ecma_deref_object (func_obj); +#else /* !ENABLED (JERRY_ES2015) */ + ecma_value_t result = ecma_regexp_match_helper (new_regexp, this_to_string_value); + + ecma_free_value (new_regexp); +#endif /* ENABLED (JERRY_ES2015) */ + + return result; } /* ecma_builtin_string_prototype_object_match */ /** diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index 5bc7c665e..05729a780 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -2248,6 +2248,192 @@ cleanup_string: return result; } /* ecma_regexp_replace_helper */ +/** + * Helper function for RegExp based matching + * + * See also: + * String.prototype.match + * RegExp.prototype[@@match] + * + * @return ecma_value_t + */ +ecma_value_t +ecma_regexp_match_helper (ecma_value_t this_arg, /**< this argument */ + ecma_value_t string_arg) /**< source string */ +{ + if (!ecma_is_value_object (this_arg)) + { + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object.")); + } + + ecma_string_t *str_p = ecma_op_to_string (string_arg); + + if (JERRY_UNLIKELY (str_p == NULL)) + { + return ECMA_VALUE_ERROR; + } + + ecma_object_t *obj_p = ecma_get_object_from_value (this_arg); + + ecma_value_t global_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_GLOBAL); + + if (ECMA_IS_VALUE_ERROR (global_value)) + { + ecma_deref_ecma_string (str_p); + return global_value; + } + + bool global = ecma_op_to_boolean (global_value); + + ecma_free_value (global_value); + + if (!global) + { + ecma_value_t result = ecma_op_regexp_exec (this_arg, str_p); + ecma_deref_ecma_string (str_p); + return result; + } + +#if ENABLED (JERRY_ES2015) + ecma_value_t full_unicode_value = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_UNICODE); + + if (ECMA_IS_VALUE_ERROR (full_unicode_value)) + { + ecma_deref_ecma_string (str_p); + return full_unicode_value; + } + + bool full_unicode = ecma_op_to_boolean (full_unicode_value); + + ecma_free_value (full_unicode_value); +#endif /* ENABLED (JERRY_ES2015) */ + + ecma_value_t set_status = ecma_op_object_put (obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (0), + true); + + if (ECMA_IS_VALUE_ERROR (set_status)) + { + ecma_deref_ecma_string (str_p); + return set_status; + } + + ecma_value_t ret_value = ECMA_VALUE_ERROR; + ecma_value_t result_array = ecma_op_create_array_object (0, 0, false); + ecma_object_t *result_array_p = ecma_get_object_from_value (result_array); + uint32_t n = 0; + + while (true) + { + ecma_value_t result_value = ecma_op_regexp_exec (this_arg, str_p); + + if (ECMA_IS_VALUE_ERROR (result_value)) + { + goto result_cleanup; + } + + if (ecma_is_value_null (result_value)) + { + if (n == 0) + { + ret_value = ECMA_VALUE_NULL; + goto result_cleanup; + } + + ecma_deref_ecma_string (str_p); + return result_array; + } + + ecma_object_t *result_value_p = ecma_get_object_from_value (result_value); + ecma_value_t match_str_value = ecma_op_object_get_by_uint32_index (result_value_p, 0); + + ecma_deref_object (result_value_p); + + if (ECMA_IS_VALUE_ERROR (match_str_value)) + { + goto result_cleanup; + } + + ecma_string_t *match_str_p = ecma_op_to_string (match_str_value); + + if (JERRY_UNLIKELY (match_str_p == NULL)) + { + ecma_free_value (match_str_value); + goto result_cleanup; + } + + ecma_value_t new_prop = ecma_builtin_helper_def_prop_by_index (result_array_p, + n, + match_str_value, + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + + JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (new_prop)); + + ecma_value_t match_result = ECMA_VALUE_ERROR; + if (ecma_string_is_empty (match_str_p)) + { + ecma_value_t this_index = ecma_op_object_get_by_magic_id (obj_p, LIT_MAGIC_STRING_LASTINDEX_UL); + + if (ECMA_IS_VALUE_ERROR (this_index)) + { + goto match_cleanup; + } + +#if ENABLED (JERRY_ES2015) + uint32_t index; + ecma_value_t length_value = ecma_op_to_length (this_index, &index); + + ecma_free_value (this_index); + + if (ECMA_IS_VALUE_ERROR (length_value)) + { + goto match_cleanup; + } + + uint32_t next_index = ecma_op_advance_string_index (str_p, index, full_unicode); + + ecma_value_t next_set_status = ecma_op_object_put (obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_uint32_value (next_index), + true); +#else /* !ENABLED (JERRY_ES2015) */ + ecma_number_t next_index = ecma_get_number_from_value (this_index); + + ecma_value_t next_set_status = ecma_op_object_put (obj_p, + ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL), + ecma_make_number_value (next_index + 1), + true); + + ecma_free_value (this_index); +#endif /* ENABLED (JERRY_ES2015) */ + + if (ECMA_IS_VALUE_ERROR (next_set_status)) + { + goto match_cleanup; + } + } + + match_result = ECMA_VALUE_EMPTY; + +match_cleanup: + ecma_deref_ecma_string (match_str_p); + ecma_free_value (match_str_value); + + if (ECMA_IS_VALUE_ERROR (match_result)) + { + goto result_cleanup; + } + + n++; + } + +result_cleanup: + ecma_deref_ecma_string (str_p); + ecma_deref_object (result_array_p); + return ret_value; +} /* ecma_regexp_match_helper */ + /** * RegExpExec operation * diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index 1b5c023aa..2fb4cde83 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -111,6 +111,10 @@ ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg); +ecma_value_t +ecma_regexp_match_helper (ecma_value_t this_arg, + ecma_value_t string_arg); + ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p); /** * @} diff --git a/tests/jerry/es2015/string-prototype-match.js b/tests/jerry/es2015/string-prototype-match.js new file mode 100644 index 000000000..02e818a41 --- /dev/null +++ b/tests/jerry/es2015/string-prototype-match.js @@ -0,0 +1,82 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +class NewRegExp extends RegExp { + [Symbol.match](str) { + var result = RegExp.prototype[Symbol.match].call(this, str); + var successful = 0; + if (result) { + successful = 1; + } + return successful; + } +} + +var str = 'This is a random string.'; +var regexp = new NewRegExp(/[A-Z]/g); +assert(str.match(regexp) === 1); + +try { + String.prototype.match.call(null, regexp); + assert(false); +} catch (e) { + assert(e instanceof TypeError); +} + +var regexp2 = /[A-Z]/g; +regexp2[Symbol.match] = "foo"; + +try { + str.match(regexp2); + assert(false); +} catch (e) { + assert(e instanceof TypeError); +} + +Object.defineProperty (regexp2, Symbol.match, { get () { throw 5 }}); + +try { + str.match(regexp2); + assert(false); +} catch (e) { + assert(e === 5); +} + +var wrong_sytnax = "str.match(/[A-5]/g"; + +try { + eval(wrong_sytnax); + assert(false); +} catch (e) { + assert(e instanceof SyntaxError); +} + +delete(RegExp.prototype[Symbol.match]); + +try { + str.match(regexp); + assert(false); +} catch (e) { + assert(e instanceof TypeError); +} + +var regexp3 = "foo"; +RegExp.prototype[Symbol.match] = 3; + +try { + str.match(regexp3); + assert(false); +} catch (e) { + assert(e instanceof TypeError); +}