diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c index 722fcddc0..4c026b118 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.c @@ -633,6 +633,23 @@ ecma_builtin_regexp_prototype_symbol_search (ecma_value_t this_arg, /**< this ar return ecma_regexp_search_helper (this_arg, string_arg); } /* ecma_builtin_regexp_prototype_symbol_search */ +/** + * The RegExp.prototype object's '@@split' routine + * + * See also: + * ECMA-262 v6.0, 21.2.5.11 + * + * @return ecma value + * Returned value must be freed with ecma_free_value. + */ +static ecma_value_t +ecma_builtin_regexp_prototype_symbol_split (ecma_value_t this_arg, /**< this argument */ + ecma_value_t string_arg, /**< source string */ + ecma_value_t limit_arg) /**< limit */ +{ + return ecma_regexp_split_helper (this_arg, string_arg, limit_arg); +} /* ecma_builtin_regexp_prototype_symbol_split */ + /** * The RegExp.prototype object's '@@match' routine * diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h index c08c11939..c7ba4ea1f 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-regexp-prototype.inc.h @@ -57,6 +57,7 @@ ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_STICKY, ROUTINE (LIT_GLOBAL_SYMBOL_REPLACE, ecma_builtin_regexp_prototype_symbol_replace, 2, 2) ROUTINE (LIT_GLOBAL_SYMBOL_SEARCH, ecma_builtin_regexp_prototype_symbol_search, 1, 1) +ROUTINE (LIT_GLOBAL_SYMBOL_SPLIT, ecma_builtin_regexp_prototype_symbol_split, 2, 2) ROUTINE (LIT_GLOBAL_SYMBOL_MATCH, ecma_builtin_regexp_prototype_symbol_match, 1, 1) #else /* !ENABLED (JERRY_ES2015) */ /* ECMA-262 v5, 15.10.7.1 */ diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c index d230a5a58..47867ac36 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.c @@ -726,337 +726,183 @@ ecma_builtin_string_prototype_object_slice (ecma_string_t *get_string_val, /**< * Returned value must be freed with ecma_free_value. */ static ecma_value_t -ecma_builtin_string_prototype_object_split (ecma_value_t this_to_string_val, /**< this argument */ - ecma_value_t arg1, /**< separator */ - ecma_value_t arg2) /**< limit */ +ecma_builtin_string_prototype_object_split (ecma_value_t this_value, /**< this argument */ + ecma_value_t separator_value, /**< separator */ + ecma_value_t limit_value) /**< limit */ { - /* 5. */ - ecma_length_t limit = 0; - - if (ecma_is_value_undefined (arg2)) +#if ENABLED (JERRY_ES2015) + if (!(ecma_is_value_undefined (separator_value) || ecma_is_value_null (separator_value))) { - limit = (uint32_t) -1; - } - else - { - ecma_number_t limit_num; + ecma_object_t *obj_p = ecma_get_object_from_value (ecma_op_to_object (separator_value)); + ecma_value_t split_symbol = ecma_op_object_get_by_symbol_id (obj_p, LIT_MAGIC_STRING_SPLIT); + ecma_deref_object (obj_p); - if (ECMA_IS_VALUE_ERROR (ecma_get_number (arg2, &limit_num))) + if (ECMA_IS_VALUE_ERROR (split_symbol)) { - return ECMA_VALUE_ERROR; + return split_symbol; } - limit = ecma_number_to_uint32 (limit_num); + if (!ecma_is_value_undefined (split_symbol) && !ecma_is_value_null (split_symbol)) + { + if (!ecma_op_is_callable (split_symbol)) + { + ecma_free_value (split_symbol); + return ecma_raise_type_error (ECMA_ERR_MSG ("@@split is not callable")); + } + + ecma_object_t *split_method_p = ecma_get_object_from_value (split_symbol); + + ecma_value_t arguments[] = { this_value, limit_value }; + ecma_value_t split_result = ecma_op_function_call (split_method_p, separator_value, arguments, 2); + + ecma_deref_object (split_method_p); + return split_result; + } } - - /* 3. */ - ecma_value_t new_array = ecma_op_create_array_object (0, 0, false); - - if (limit == 0) +#else /* !ENABLED (JERRY_ES2015) */ + if (ecma_object_is_regexp_object (separator_value)) { - return new_array; + return ecma_regexp_split_helper (separator_value, this_value, limit_value); } +#endif /* ENABLED (JERRY_ES2015) */ - ecma_object_t *new_array_p = ecma_get_object_from_value (new_array); + ecma_value_t result = ECMA_VALUE_ERROR; - /* 10. */ - if (ecma_is_value_undefined (arg1)) + /* 4. */ + ecma_string_t *string_p = ecma_op_to_string (this_value); + if (string_p == NULL) { - ecma_value_t put_comp = ecma_builtin_helper_def_prop_by_index (new_array_p, - 0, - this_to_string_val, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - - return new_array; + return result; } /* 8. */ - ecma_value_t separator = ECMA_VALUE_EMPTY; - - bool separator_is_regexp = false; - - if (ecma_object_is_regexp_object (arg1)) + uint32_t limit = UINT32_MAX; + if (!ecma_is_value_undefined (limit_value)) { - separator_is_regexp = true; - separator = ecma_copy_value (arg1); - } - else - { - ecma_string_t *separator_to_string_p = ecma_op_to_string (arg1); - - if (JERRY_UNLIKELY (separator_to_string_p == NULL)) + if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (limit_value, &limit))) { - ecma_deref_object (new_array_p); - return ECMA_VALUE_ERROR; + goto cleanup_string; } - - separator = ecma_make_string_value (separator_to_string_p); } - const ecma_string_t *this_to_string_p = ecma_get_string_from_value (this_to_string_val); - - /* 11. */ - if (ecma_string_is_empty (this_to_string_p)) + /* 12. */ + ecma_string_t *separator_p = ecma_op_to_string (separator_value); + if (separator_p == NULL) { - bool should_return = false; + goto cleanup_string; + } - if (separator_is_regexp) - { -#if ENABLED (JERRY_BUILTIN_REGEXP) - ecma_value_t regexp_value = ecma_copy_value_if_not_object (separator); - ecma_value_t match_result; - match_result = ecma_regexp_exec_helper (regexp_value, - ecma_make_magic_string_value (LIT_MAGIC_STRING__EMPTY), - true); - should_return = !ecma_is_value_null (match_result); + /* 6. */ + result = ecma_op_create_array_object (NULL, 0, false); - if (ECMA_IS_VALUE_ERROR (match_result)) - { - match_result = jcontext_take_exception (); - } + /* 14. */ + if (limit == 0) + { + goto cleanup_separator; + } - ecma_free_value (match_result); -#else /* !ENABLED (JERRY_BUILTIN_REGEXP) */ - return ecma_raise_type_error (ECMA_ERR_MSG ("REGEXP separator is disabled in split method.")); -#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ - } - else - { - ecma_string_t *separator_str_p = ecma_get_string_from_value (separator); + ecma_object_t *array_p = ecma_get_object_from_value (result); + ecma_length_t array_length = 0; - if (ecma_string_get_length (separator_str_p) == 0) - { - should_return = true; - } - } - - if (!should_return) - { - /* 11.c */ - ecma_value_t put_comp = ecma_builtin_helper_def_prop_by_index (new_array_p, - 0, - this_to_string_val, + /* 15. */ + if (ecma_is_value_undefined (separator_value)) + { + ecma_value_t put_result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length, + ecma_make_string_value (string_p), ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - } + JERRY_ASSERT (put_result == ECMA_VALUE_TRUE); + goto cleanup_separator; } - else + + /* 16. */ + if (ecma_string_is_empty (string_p)) { - /* 4. */ - ecma_length_t new_array_length = 0; - - /* 7. */ - ecma_length_t start_pos = 0; - - /* 12. */ - ecma_length_t curr_pos = start_pos; - - bool separator_is_empty = false; - bool should_return = false; - - /* 6. */ - const ecma_length_t string_length = ecma_string_get_length (this_to_string_p); - - while (curr_pos < string_length && !should_return) + if (!ecma_string_is_empty (separator_p)) { - ecma_value_t match_result = ECMA_VALUE_NULL; - - if (separator_is_regexp) - { -#if ENABLED (JERRY_BUILTIN_REGEXP) - ecma_value_t regexp_value = ecma_copy_value_if_not_object (separator); - ecma_string_t *substr_str_p = ecma_string_substr (this_to_string_p, curr_pos, string_length); - match_result = ecma_regexp_exec_helper (regexp_value, ecma_make_string_value (substr_str_p), true); - ecma_deref_ecma_string (substr_str_p); -#else /* !ENABLED (JERRY_BUILTIN_REGEXP) */ - return ecma_raise_type_error (ECMA_ERR_MSG ("REGEXP separator is disabled in split method.")); -#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */ - } - else - { - ecma_string_t *separator_str_p = ecma_get_string_from_value (separator); - ecma_length_t separator_length = ecma_string_get_length (separator_str_p); - - if (curr_pos + separator_length <= string_length) - { - bool is_different = false; - for (ecma_length_t i = 0; i < separator_length && !is_different; i++) - { - ecma_char_t char_from_string = ecma_string_get_char_at_pos (this_to_string_p, curr_pos + i); - ecma_char_t char_from_separator = ecma_string_get_char_at_pos (separator_str_p, i); - is_different = (char_from_string != char_from_separator); - } - - if (!is_different) - { - /* 6-7. */ - match_result = ecma_op_create_array_object (0, 0, false); - } - } - } - - if (ecma_is_value_null (match_result) || ECMA_IS_VALUE_ERROR (match_result)) - { - curr_pos++; - if (ECMA_IS_VALUE_ERROR (match_result)) - { - jcontext_release_exception (); - } - } - else - { - ecma_object_t *match_obj_p = ecma_get_object_from_value (match_result); - ecma_string_t *zero_str_p = ecma_get_ecma_string_from_uint32 (0); - ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX); - ecma_value_t index_prop_value; - - if (separator_is_regexp) - { - JERRY_ASSERT (!ecma_op_object_is_fast_array (match_obj_p)); - - ecma_property_value_t *index_prop_value_p = ecma_get_named_data_property (match_obj_p, magic_index_str_p); - ecma_number_t index_num = ecma_get_number_from_value (index_prop_value_p->value); - ecma_value_assign_number (&index_prop_value_p->value, index_num + (ecma_number_t) curr_pos); - index_prop_value = index_prop_value_p->value; - } - else - { - const uint32_t opts = ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE | ECMA_IS_THROW; - ecma_string_t *separator_str_p = ecma_get_string_from_value (separator); - - ecma_value_t put_comp = ecma_builtin_helper_def_prop (match_obj_p, - zero_str_p, - ecma_make_string_value (separator_str_p), - opts); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - - index_prop_value = ecma_make_uint32_value (curr_pos); - - put_comp = ecma_builtin_helper_def_prop (match_obj_p, - magic_index_str_p, - index_prop_value, - ECMA_PROPERTY_FLAG_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - } - - ecma_value_t match_comp_value = ecma_op_object_get (match_obj_p, zero_str_p); - JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (match_comp_value)); - - ecma_string_t *match_str_p = ecma_get_string_from_value (match_comp_value); - ecma_length_t match_str_length = ecma_string_get_length (match_str_p); - - separator_is_empty = ecma_string_is_empty (match_str_p); - - ecma_free_value (match_comp_value); - - ecma_number_t index_num = ecma_get_number_from_value (index_prop_value); - JERRY_ASSERT (index_num >= 0); - - - uint32_t end_pos = ecma_number_to_uint32 (index_num); - - if (separator_is_empty) - { - end_pos = curr_pos + 1; - } - - /* 13.c.iii.1-2 */ - ecma_string_t *substr_str_p = ecma_string_substr (this_to_string_p, - start_pos, - end_pos); - - ecma_value_t put_comp; - put_comp = ecma_builtin_helper_def_prop_by_index (new_array_p, - new_array_length, - ecma_make_string_value (substr_str_p), - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - - /* 13.c.iii.3 */ - new_array_length++; - - /* 13.c.iii.4 */ - if (new_array_length == limit) - { - should_return = true; - } - - /* 13.c.iii.5 */ - start_pos = end_pos + match_str_length; - - const uint32_t match_result_array_length = ecma_array_get_length (match_obj_p) - 1; - - /* 13.c.iii.6 */ - uint32_t i = 0; - - /* 13.c.iii.7 */ - while (i < match_result_array_length) - { - /* 13.c.iii.7.a */ - i++; - match_comp_value = ecma_op_object_get_by_uint32_index (match_obj_p, i); - - JERRY_ASSERT (!ECMA_IS_VALUE_ERROR (match_comp_value)); - - /* 13.c.iii.7.b */ - put_comp = ecma_builtin_helper_def_prop_by_index (new_array_p, - new_array_length, - match_comp_value, - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - - /* 13.c.iii.7.c */ - new_array_length++; - - /* 13.c.iii.7.d */ - if (new_array_length == limit) - { - should_return = true; - } - - ecma_free_value (match_comp_value); - } - - /* 13.c.iii.8 */ - curr_pos = start_pos; - - ecma_deref_ecma_string (substr_str_p); - } - - ecma_free_value (match_result); - + ecma_value_t put_result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length, + ecma_make_string_value (string_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (put_result == ECMA_VALUE_TRUE); } - if (!should_return && !separator_is_empty) - { - /* 14. */ - ecma_string_t *substr_str_p; - substr_str_p = ecma_string_substr (this_to_string_p, - start_pos, - string_length); - - /* 15. */ - ecma_value_t put_comp; - put_comp = ecma_builtin_helper_def_prop_by_index (new_array_p, - new_array_length, - ecma_make_string_value (substr_str_p), - ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); - - JERRY_ASSERT (ecma_is_value_true (put_comp)); - ecma_deref_ecma_string (substr_str_p); - } + goto cleanup_separator; } - ecma_free_value (separator); + lit_utf8_size_t string_size; + uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *string_buffer_p = ecma_string_get_chars (string_p, + &string_size, + NULL, + NULL, + &string_flags); + lit_utf8_size_t separator_size; + uint8_t separator_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *separator_buffer_p = ecma_string_get_chars (separator_p, + &separator_size, + NULL, + NULL, + &separator_flags); - return new_array; + const lit_utf8_byte_t *const string_end_p = string_buffer_p + string_size; + const lit_utf8_byte_t *const compare_end_p = JERRY_MIN (string_end_p - separator_size + 1, + string_end_p); + const lit_utf8_byte_t *current_p = string_buffer_p; + const lit_utf8_byte_t *last_str_begin_p = string_buffer_p; + + while (current_p < compare_end_p) + { + if (!memcmp (current_p, separator_buffer_p, separator_size) + && (last_str_begin_p != current_p + separator_size)) + { + ecma_string_t *substr_p = ecma_new_ecma_string_from_utf8 (last_str_begin_p, + (lit_utf8_size_t) (current_p - last_str_begin_p)); + ecma_value_t put_result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (substr_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (put_result == ECMA_VALUE_TRUE); + ecma_deref_ecma_string (substr_p); + + if (array_length >= limit) + { + goto cleanup_buffers; + } + + current_p += separator_size; + last_str_begin_p = current_p; + continue; + } + + lit_utf8_incr (¤t_p); + } + + ecma_string_t *end_substr_p = ecma_new_ecma_string_from_utf8 (last_str_begin_p, + (lit_utf8_size_t) (string_end_p - last_str_begin_p)); + ecma_value_t put_result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length, + ecma_make_string_value (end_substr_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (put_result == ECMA_VALUE_TRUE); + ecma_deref_ecma_string (end_substr_p); + +cleanup_buffers: + if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) string_buffer_p, string_size); + } + + if (separator_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) separator_buffer_p, separator_size); + } + +cleanup_separator: + ecma_deref_ecma_string (separator_p); +cleanup_string: + ecma_deref_ecma_string (string_p); + return result; } /* ecma_builtin_string_prototype_object_split */ /** diff --git a/jerry-core/ecma/operations/ecma-regexp-object.c b/jerry-core/ecma/operations/ecma-regexp-object.c index 40eaa740a..8220f3f04 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.c +++ b/jerry-core/ecma/operations/ecma-regexp-object.c @@ -1606,6 +1606,527 @@ cleanup_string: return result; } /* ecma_regexp_search_helper */ +/** + * Helper function for RegExp based string split operation + * + * See also: + * ECMA-262 v6, 21.2.5.11 + * + * @return array of split and captured strings + */ +ecma_value_t +ecma_regexp_split_helper (ecma_value_t this_arg, /**< this value */ + ecma_value_t string_arg, /**< string value */ + ecma_value_t limit_arg) /**< limit value */ +{ +#if ENABLED (JERRY_ES2015) + /* 2. */ + if (!ecma_is_value_object (this_arg)) + { + return ecma_raise_type_error (ECMA_ERR_MSG ("'this' is not an object.")); + } + + ecma_value_t result = ECMA_VALUE_ERROR; + + /* 3-4. */ + ecma_string_t *const string_p = ecma_op_to_string (string_arg); + if (string_p == NULL) + { + return result; + } + + /* 5-6. */ + ecma_object_t *const regexp_obj_p = ecma_get_object_from_value (this_arg); + ecma_value_t constructor = ecma_op_species_constructor (regexp_obj_p, ECMA_BUILTIN_ID_REGEXP); + if (ECMA_IS_VALUE_ERROR (constructor)) + { + goto cleanup_string; + } + + ecma_object_t *const constructor_obj_p = ecma_get_object_from_value (constructor); + + /* 7-8. */ + ecma_value_t flags = ecma_op_object_get_by_magic_id (regexp_obj_p, LIT_MAGIC_STRING_FLAGS); + if (ECMA_IS_VALUE_ERROR (flags)) + { + ecma_deref_object (constructor_obj_p); + goto cleanup_string; + } + + ecma_string_t *flags_str_p = ecma_op_to_string (flags); + ecma_free_value (flags); + + if (JERRY_UNLIKELY (flags_str_p == NULL)) + { + ecma_deref_object (constructor_obj_p); + goto cleanup_string; + } + + lit_utf8_size_t flags_size; + uint8_t flags_str_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *flags_buffer_p = ecma_string_get_chars (flags_str_p, + &flags_size, + NULL, + NULL, + &flags_str_flags); + + bool unicode = false; + bool sticky = false; + + /* 9-11. */ + const lit_utf8_byte_t *const flags_end_p = flags_buffer_p + flags_size; + for (const lit_utf8_byte_t *current_p = flags_buffer_p; current_p < flags_end_p; ++current_p) + { + switch (*current_p) + { + case LIT_CHAR_LOWERCASE_U: + { + unicode = true; + break; + } + case LIT_CHAR_LOWERCASE_Y: + { + sticky = true; + break; + } + } + } + + if (flags_str_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) flags_buffer_p, flags_size); + } + + /* 12. */ + if (!sticky) + { + ecma_stringbuilder_t builder = ecma_stringbuilder_create_from (flags_str_p); + ecma_stringbuilder_append_byte (&builder, LIT_CHAR_LOWERCASE_Y); + + ecma_deref_ecma_string (flags_str_p); + flags_str_p = ecma_stringbuilder_finalize (&builder); + } + + /* 13-14. */ + ecma_value_t arguments[] = { this_arg, ecma_make_string_value (flags_str_p) }; + ecma_value_t splitter = ecma_op_function_construct (constructor_obj_p, ECMA_VALUE_UNDEFINED, arguments, 2); + + ecma_deref_ecma_string (flags_str_p); + ecma_deref_object (constructor_obj_p); + + if (ECMA_IS_VALUE_ERROR (splitter)) + { + goto cleanup_string; + } + + ecma_object_t *const splitter_obj_p = ecma_get_object_from_value (splitter); + + /* 17. */ + uint32_t limit = UINT32_MAX; + if (!ecma_is_value_undefined (limit_arg)) + { + if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (limit_arg, &limit))) + { + goto cleanup_splitter; + } + } + + /* 15. */ + ecma_value_t array = ecma_op_create_array_object (NULL, 0, false); + + /* 21. */ + if (limit == 0) + { + result = array; + goto cleanup_splitter; + } + + const lit_utf8_size_t string_length = ecma_string_get_length (string_p); + + ecma_object_t *const array_p = ecma_get_object_from_value (array); + ecma_length_t array_length = 0; + + /* 22. */ + if (string_length == 0) + { + const ecma_value_t match = ecma_op_regexp_exec (splitter, string_p); + + if (ECMA_IS_VALUE_ERROR (match)) + { + goto cleanup_array; + } + + if (ecma_is_value_null (match)) + { + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length, + ecma_make_string_value (string_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + } + + ecma_free_value (match); + result = array; + goto cleanup_splitter; + } + + /* 23. */ + uint32_t current_index = 0; + uint32_t previous_index = 0; + + ecma_string_t *const lastindex_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL); + + /* 24. */ + while (current_index < string_length) + { + /* 24.a-b. */ + result = ecma_op_object_put (splitter_obj_p, + lastindex_str_p, + ecma_make_uint32_value (current_index), + true); + + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_array; + } + + JERRY_ASSERT (ecma_is_value_true (result)); + + /* 24.c-d. */ + result = ecma_op_regexp_exec (splitter, string_p); + if (ECMA_IS_VALUE_ERROR (result)) + { + goto cleanup_array; + } + + /* 24.e. */ + if (ecma_is_value_null (result)) + { + current_index = ecma_op_advance_string_index (string_p, current_index, unicode); + continue; + } + + ecma_object_t *const match_array_p = ecma_get_object_from_value (result); + + /* 24.f.i. */ + result = ecma_op_object_get (splitter_obj_p, lastindex_str_p); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_object (match_array_p); + goto cleanup_array; + } + + uint32_t end_index; + const ecma_value_t length_value = ecma_op_to_length (result, &end_index); + ecma_free_value (result); + + if (ECMA_IS_VALUE_ERROR (length_value)) + { + result = ECMA_VALUE_ERROR; + ecma_deref_object (match_array_p); + goto cleanup_array; + } + + /* 24.f.iii. */ + if (previous_index == end_index) + { + ecma_deref_object (match_array_p); + current_index = ecma_op_advance_string_index (string_p, current_index, unicode); + continue; + } + + /* 24.f.iv.1-4. */ + ecma_string_t *const split_str_p = ecma_string_substr (string_p, previous_index, current_index); + + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (split_str_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_deref_ecma_string (split_str_p); + + /* 24.f.iv.5. */ + if (array_length == limit) + { + ecma_deref_object (match_array_p); + result = array; + goto cleanup_splitter; + } + + /* 24.f.iv.6. */ + previous_index = end_index; + + /* 24.f.iv.7-8. */ + uint32_t match_length; + result = ecma_op_object_get_length (match_array_p, &match_length); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_object (match_array_p); + goto cleanup_array; + } + + /* 24.f.iv.9. */ + match_length = (match_length > 0) ? match_length - 1 : match_length; + + uint32_t match_index = 1; + while (match_index <= match_length) + { + /* 24.f.iv.11.a-b. */ + result = ecma_op_object_get_by_uint32_index (match_array_p, match_index++); + if (ECMA_IS_VALUE_ERROR (result)) + { + ecma_deref_object (match_array_p); + goto cleanup_array; + } + + const ecma_value_t capture = result; + + /* 24.f.iv.11.c. */ + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + capture, + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + + ecma_free_value (capture); + + if (array_length == limit) + { + ecma_deref_object (match_array_p); + result = array; + goto cleanup_splitter; + } + } + + /* 24.f.iv.12. */ + current_index = end_index; + + ecma_deref_object (match_array_p); + } + + ecma_string_t *const end_str_p = ecma_string_substr (string_p, previous_index, string_length); + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (end_str_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_deref_ecma_string (end_str_p); + + result = array; + goto cleanup_splitter; + +cleanup_array: + ecma_deref_object (array_p); +cleanup_splitter: + ecma_deref_object (splitter_obj_p); +cleanup_string: + ecma_deref_ecma_string (string_p); + + return result; +#else /* ENABLED (JERRY_ES2015) */ + ecma_value_t result = ECMA_VALUE_ERROR; + + /* 2. */ + ecma_string_t *string_p = ecma_op_to_string (string_arg); + if (JERRY_UNLIKELY (string_p == NULL)) + { + return result; + } + + /* 5. */ + uint32_t limit = UINT32_MAX; + if (!ecma_is_value_undefined (limit_arg)) + { + if (ECMA_IS_VALUE_ERROR (ecma_op_to_length (limit_arg, &limit))) + { + goto cleanup_string; + } + } + + /* 15. */ + ecma_value_t array = ecma_op_create_array_object (NULL, 0, false); + + /* 21. */ + if (limit == 0) + { + result = array; + goto cleanup_string; + } + + ecma_object_t *const array_p = ecma_get_object_from_value (array); + ecma_length_t array_length = 0; + + ecma_object_t *const regexp_p = ecma_get_object_from_value (this_arg); + ecma_extended_object_t *const ext_object_p = (ecma_extended_object_t *) regexp_p; + re_compiled_code_t *const bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, + ext_object_p->u.class_prop.u.value); + + lit_utf8_size_t string_size; + lit_utf8_size_t string_length; + uint8_t string_flags = ECMA_STRING_FLAG_IS_ASCII; + const lit_utf8_byte_t *string_buffer_p = ecma_string_get_chars (string_p, + &string_size, + &string_length, + NULL, + &string_flags); + + const lit_utf8_byte_t *current_str_p = string_buffer_p; + const lit_utf8_byte_t *previous_str_p = string_buffer_p; + const lit_utf8_byte_t *const string_end_p = string_buffer_p + string_size; + + /* Handle RegExp.prototype separately. */ + if (JERRY_UNLIKELY (bc_p == NULL)) + { + while (current_str_p < string_end_p) + { + lit_utf8_incr (¤t_str_p); + ecma_string_t *str_p = ecma_new_ecma_string_from_utf8 (previous_str_p, + (lit_utf8_size_t) (current_str_p - previous_str_p)); + + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (str_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_deref_ecma_string (str_p); + + if (array_length == limit) + { + result = array; + goto cleanup_buffer; + } + + previous_str_p = current_str_p; + } + + result = array; + goto cleanup_buffer; + } + + ecma_regexp_ctx_t re_ctx; + re_ctx.flags = bc_p->header.status_flags; + ecma_regexp_initialize_context (&re_ctx, + bc_p, + string_buffer_p, + string_buffer_p + string_size); + + uint8_t *const bc_start_p = (uint8_t *) (bc_p + 1); + + if (string_length == 0) + { + const lit_utf8_byte_t *const matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_str_p); + + if (ECMA_RE_STACK_LIMIT_REACHED (matched_p)) + { + result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded.")); + goto cleanup_array; + } + + if (matched_p == NULL) + { + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length, + ecma_make_string_value (string_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + } + + result = array; + goto cleanup_context; + } + + /* 13. */ + while (current_str_p < string_end_p) + { + /* 13.a. */ + memset (re_ctx.captures_p, 0, re_ctx.captures_count); + const lit_utf8_byte_t *const matched_p = ecma_regexp_match (&re_ctx, bc_start_p, current_str_p); + + if (ECMA_RE_STACK_LIMIT_REACHED (matched_p)) + { + result = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded.")); + goto cleanup_array; + } + + if (matched_p == NULL || matched_p == previous_str_p) + { + lit_utf8_incr (¤t_str_p); + continue; + } + + /* 13.c.iii.1. */ + ecma_string_t *const str_p = ecma_new_ecma_string_from_utf8 (previous_str_p, + (lit_utf8_size_t) (current_str_p - previous_str_p)); + + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (str_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_deref_ecma_string (str_p); + + if (array_length == limit) + { + result = array; + goto cleanup_context; + } + + /* 13.c.iii.5. */ + previous_str_p = matched_p; + + uint32_t index = 1; + while (index < re_ctx.captures_count) + { + const ecma_value_t capture = ecma_regexp_get_capture_value (re_ctx.captures_p + index); + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + capture, + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_free_value (capture); + + if (array_length == limit) + { + result = array; + goto cleanup_context; + } + + index++; + } + + /* 13.c.iii.8. */ + current_str_p = matched_p; + } + + ecma_string_t *const str_p = ecma_new_ecma_string_from_utf8 (previous_str_p, + (lit_utf8_size_t) (string_end_p - previous_str_p)); + + result = ecma_builtin_helper_def_prop_by_index (array_p, + array_length++, + ecma_make_string_value (str_p), + ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE); + JERRY_ASSERT (ecma_is_value_true (result)); + ecma_deref_ecma_string (str_p); + + result = array; + goto cleanup_context; + +cleanup_array: + ecma_deref_object (array_p); +cleanup_context: + ecma_regexp_cleanup_context (&re_ctx); +cleanup_buffer: + if (string_flags & ECMA_STRING_FLAG_MUST_BE_FREED) + { + jmem_heap_free_block ((void *) string_buffer_p, string_size); + } +cleanup_string: + ecma_deref_ecma_string (string_p); + + return result; +#endif /* ENABLED (JERRY_ES2015) */ +} /* ecma_regexp_split_helper */ + /** * Fast path for RegExp based replace operation * diff --git a/jerry-core/ecma/operations/ecma-regexp-object.h b/jerry-core/ecma/operations/ecma-regexp-object.h index c57e4da16..7b8e06b1b 100644 --- a/jerry-core/ecma/operations/ecma-regexp-object.h +++ b/jerry-core/ecma/operations/ecma-regexp-object.h @@ -106,15 +106,10 @@ lit_code_point_t ecma_regexp_canonicalize_char (lit_code_point_t ch); ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p); void ecma_regexp_initialize_props (ecma_object_t *re_obj_p, ecma_string_t *source_p, uint16_t flags); +ecma_value_t ecma_regexp_replace_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t replace_arg); ecma_value_t ecma_regexp_search_helper (ecma_value_t regexp_arg, ecma_value_t string_arg); -ecma_value_t -ecma_regexp_replace_helper (ecma_value_t this_arg, - ecma_value_t string_arg, - ecma_value_t replace_arg); - -ecma_value_t -ecma_regexp_match_helper (ecma_value_t this_arg, - ecma_value_t string_arg); +ecma_value_t ecma_regexp_split_helper (ecma_value_t this_arg, ecma_value_t string_arg, ecma_value_t limit_arg); +ecma_value_t ecma_regexp_match_helper (ecma_value_t this_arg, ecma_value_t string_arg); ecma_value_t ecma_op_regexp_exec (ecma_value_t this_arg, ecma_string_t *str_p); /** diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index a0accbc75..b66ef7a43 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -220,11 +220,8 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLAGS, "flags") #if ENABLED (JERRY_BUILTIN_MATH) LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLOOR, "floor") #endif -#if ENABLED (JERRY_BUILTIN_REGEXP) \ -|| ENABLED (JERRY_BUILTIN_STRING) -LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX, "index") -#endif #if ENABLED (JERRY_BUILTIN_REGEXP) +LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INDEX, "index") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_INPUT, "input") #endif LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_NAN, "isNaN") diff --git a/tests/jerry/es2015/string-prototype-split.js b/tests/jerry/es2015/string-prototype-split.js new file mode 100644 index 000000000..be23a55f9 --- /dev/null +++ b/tests/jerry/es2015/string-prototype-split.js @@ -0,0 +1,23 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var str = "foo//bar/baz//foo"; +res = str.split("a", Infinity); +assert (res.length === 3); +assert (res[0] === "foo//b"); +assert (res[1] === "r/b"); +assert (res[2] === "z//foo"); + +res = str.split(/\/\//, -1); +assert (res.length === 0); diff --git a/tests/jerry/es2015/symbol-split.js b/tests/jerry/es2015/symbol-split.js new file mode 100644 index 000000000..0c17e21ec --- /dev/null +++ b/tests/jerry/es2015/symbol-split.js @@ -0,0 +1,109 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var split = RegExp.prototype[Symbol.split]; + +try { + split.call (0, "string"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + split.call (new RegExp(), { + toString: () => { + throw "abrupt string" + } + }); + assert (false); +} catch (e) { + assert (e === "abrupt string"); +} + +try { + var o = {}; + o.constructor = "ctor"; + split.call (o, "str"); + assert (false); +} catch (e) { + assert (e instanceof TypeError); +} + +try { + var o = {}; + var c = {}; + o.constructor = c; + Object.defineProperty (c, Symbol.species, { get: function () { throw "abrupt species";} }); + + split.call (o, "str"); + assert (false); +} catch (e) { + assert (e === "abrupt species"); +} + +try { + split.call ({ + get flags() { + throw "abrupt flags"; + } + }, "string"); + assert (false); +} catch (e) { + assert (e === "abrupt flags"); +} + +try { + split.call ({ toString: function () { return "s"; }, flags: "g"}, + "string", + { valueOf: function () { throw "abrupt limit"; } }); + assert (false); +} catch (e) { + assert (e === "abrupt limit"); +} + +var exec = RegExp.prototype.exec; + +try { + Object.defineProperty(RegExp.prototype, "exec", { get : function() { throw "abrupt get exec"; }}) + split.call ({ toString: function () { return "s"; }, flags: "g"}, + "string") + assert (false); +} catch (e) { + assert (e === "abrupt get exec"); +} + +try { + Object.defineProperty(RegExp.prototype, "exec", { value: function (str) { + this.lastIndex++; + return { get length() { throw "abrupt match length"; }} + }}); + split.call ({ toString: function () { return "s"; }, flags: "g"}, + "string"); + assert (false); +} catch (e) { + assert (e === "abrupt match length"); +} + +try { + Object.defineProperty(RegExp.prototype, "exec", { value: function (str) { + this.lastIndex++; + return { length: 2, get 1() { throw "abrupt capture"; }} + }}); + split.call ({ toString: function () { return "s"; }, flags: "g"}, + "string"); + assert (false); +} catch (e) { + assert (e === "abrupt capture"); +} diff --git a/tests/jerry/es5.1/string-prototype-split.js b/tests/jerry/es5.1/string-prototype-split.js new file mode 100644 index 000000000..27cc79cbe --- /dev/null +++ b/tests/jerry/es5.1/string-prototype-split.js @@ -0,0 +1,23 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +var str = "foo//bar/baz//foo"; +res = str.split("a", Infinity); +assert (res.length === 0); + +res = str.split(/\/\//, -1); +assert (res.length === 3); +assert (res[0] === "foo"); +assert (res[1] === "bar/baz"); +assert (res[2] === "foo"); diff --git a/tests/jerry/string-prototype-split.js b/tests/jerry/string-prototype-split.js index bef02ca08..f216c3262 100644 --- a/tests/jerry/string-prototype-split.js +++ b/tests/jerry/string-prototype-split.js @@ -51,9 +51,6 @@ assert (res[0] === "foo//b"); res = str.split("a", NaN); assert (res.length === 0); -res = str.split("a", Infinity); -assert (res.length === 0); - res = str.split(["o"]) assert (res.length === 5); assert (res[0] === "f"); @@ -96,12 +93,6 @@ res = str.split(/\/\//, 1); assert (res.length === 1); assert (res[0] === "foo"); -res = str.split(/\/\//, -1); -assert (res.length === 3); -assert (res[0] === "foo"); -assert (res[1] === "bar/baz"); -assert (res[2] === "foo"); - str = "fo123o12bar"; res = str.split(12, undefined); assert (res.length === 3);