Implement String.prototype.split function

JerryScript-DCO-1.0-Signed-off-by: Zsolt Borbély zsborbely.u-szeged@partner.samsung.com
This commit is contained in:
Zsolt Borbély 2015-08-05 16:15:06 +02:00
parent ff580ca33e
commit 640370da43
3 changed files with 610 additions and 4 deletions

View File

@ -1667,6 +1667,128 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg
return ret_value;
} /* ecma_builtin_string_prototype_object_slice */
#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN
/**
* The abstract SplitMatch routine for String.prototype.split()
*
* See also:
* ECMA-262 v5, 15.5.4.14
*
* Used by:
* - The String.prototype.split routine.
*
* @return completion value - contains the value of the match
* - the index property of the completion value indicates the position of the
* first character in the input_string that matched
*
* Returned value must be freed with ecma_free_completion_value.
*/
static ecma_completion_value_t
ecma_builtin_helper_split_match (ecma_value_t input_string, /**< first argument */
ecma_length_t start_idx, /**< second argument */
ecma_value_t separator) /**< third argument */
{
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
/* 1. */
if (ecma_is_value_object (separator)
&& ecma_object_get_class_name (ecma_get_object_from_value (separator)) == LIT_MAGIC_STRING_REGEXP_UL)
{
ecma_value_t regexp_value = ecma_copy_value (separator, false);
ECMA_TRY_CATCH (to_string_val,
ecma_op_to_string (input_string),
ret_value);
ecma_string_t *input_str_p = ecma_get_string_from_value (to_string_val);
ecma_string_t *substr_str_p = ecma_string_substr (input_str_p, start_idx, ecma_string_get_length (input_str_p));
ret_value = ecma_regexp_exec_helper (regexp_value, ecma_make_string_value (substr_str_p), true);
if (!ecma_is_value_null (ecma_get_completion_value_value (ret_value)))
{
ecma_object_t *obj_p = ecma_get_object_from_completion_value (ret_value);
ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);
ecma_property_t *index_prop_p = ecma_get_named_property (obj_p, magic_index_str_p);
ecma_number_t *index_num_p = ecma_get_number_from_value (index_prop_p->u.named_data_property.value);
*index_num_p += start_idx;
ecma_deref_ecma_string (magic_index_str_p);
}
ecma_deref_ecma_string (substr_str_p);
ECMA_FINALIZE (to_string_val);
}
else
{
/* 2. */
JERRY_ASSERT (ecma_is_value_string (input_string) && ecma_is_value_string (separator));
ecma_string_t *string_str_p = ecma_get_string_from_value (input_string);
ecma_string_t *separator_str_p = ecma_get_string_from_value (separator);
/* 3. */
ecma_length_t string_length = ecma_string_get_length (string_str_p);
ecma_length_t separator_length = ecma_string_get_length (separator_str_p);
/* 4. */
if (start_idx + separator_length > string_length)
{
ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_NULL);
}
else
{
bool is_different = false;
/* 5. */
for (ecma_length_t i = 0; i < separator_length && !is_different; i++)
{
ecma_char_t char_from_string = ecma_string_get_char_at_pos (string_str_p, start_idx + i);
ecma_char_t char_from_separator = ecma_string_get_char_at_pos (separator_str_p, i);
if (char_from_string != char_from_separator)
{
is_different = true;
}
}
if (!is_different)
{
/* 6-7. */
ecma_completion_value_t match_array = ecma_op_create_array_object (0, 0, false);
ecma_object_t *match_array_p = ecma_get_object_from_completion_value (match_array);
ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO);
ecma_op_object_put (match_array_p, zero_str_p, ecma_make_string_value (separator_str_p), true);
ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);
ecma_property_t *index_prop_p = ecma_create_named_data_property (match_array_p,
magic_index_str_p,
true, false, false);
ecma_deref_ecma_string (magic_index_str_p);
ecma_number_t *index_num_p = ecma_alloc_number ();
*index_num_p = ecma_uint32_to_number (start_idx);
ecma_named_data_property_assign_value (match_array_p, index_prop_p, ecma_make_number_value (index_num_p));
ret_value = match_array;
ecma_dealloc_number (index_num_p);
ecma_deref_ecma_string (zero_str_p);
}
else
{
ret_value = ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_NULL);
}
}
}
return ret_value;
} /* ecma_builtin_helper_split_match */
/**
* The String.prototype object's 'split' routine
*
@ -1678,12 +1800,335 @@ ecma_builtin_string_prototype_object_slice (ecma_value_t this_arg, /**< this arg
*/
static ecma_completion_value_t
ecma_builtin_string_prototype_object_split (ecma_value_t this_arg, /**< this argument */
ecma_value_t arg1, /**< routine's first argument */
ecma_value_t arg2) /**< routine's second argument */
ecma_value_t arg1, /**< separator */
ecma_value_t arg2) /**< limit */
{
ECMA_BUILTIN_CP_UNIMPLEMENTED (this_arg, arg1, arg2);
ecma_completion_value_t ret_value = ecma_make_empty_completion_value ();
/* 1. */
ECMA_TRY_CATCH (this_check_coercible_val,
ecma_op_check_object_coercible (this_arg),
ret_value);
/* 2. */
ECMA_TRY_CATCH (this_to_string_val,
ecma_op_to_string (this_arg),
ret_value);
/* 3. */
ecma_completion_value_t new_array = ecma_op_create_array_object (0, 0, false);
/* 5. */
ecma_length_t limit = 0;
if (ecma_is_value_undefined (arg2))
{
limit = (uint32_t) -1;
}
else
{
ECMA_OP_TO_NUMBER_TRY_CATCH (limit_num, arg2, ret_value);
limit = ecma_number_to_uint32 (limit_num);
ECMA_OP_TO_NUMBER_FINALIZE (limit_num);
}
if (ecma_is_completion_value_empty (ret_value))
{
/* This variable indicates that we should return with the current array, to avoid another operation. */
bool should_return = false;
/* 9. */
if (limit == 0)
{
should_return = true;
}
else /* if (limit != 0) */
{
ecma_object_t *new_array_p = ecma_get_object_from_completion_value (new_array);
/* 10. */
if (ecma_is_value_undefined (arg1))
{
ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO);
ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p,
zero_str_p,
this_to_string_val,
true,
true,
true,
false);
JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp));
should_return = true;
ecma_deref_ecma_string (zero_str_p);
}
else /* if (!ecma_is_value_undefined (arg1)) */
{
/* 6. */
const ecma_length_t string_length = ecma_string_get_length (ecma_get_string_from_value (this_to_string_val));
/* 8. */
ecma_value_t separator = ecma_make_simple_value (ECMA_SIMPLE_VALUE_EMPTY);
if (ecma_is_value_object (arg1)
&& ecma_object_get_class_name (ecma_get_object_from_value (arg1)) == LIT_MAGIC_STRING_REGEXP_UL)
{
separator = ecma_copy_value (arg1, true);
}
else
{
ECMA_TRY_CATCH (separator_to_string_val,
ecma_op_to_string (arg1),
ret_value);
separator = ecma_copy_value (separator_to_string_val, true);
ECMA_FINALIZE (separator_to_string_val);
}
/* 11. */
if (string_length == 0 && ecma_is_completion_value_empty (ret_value))
{
/* 11.a */
ecma_completion_value_t match_result = ecma_builtin_helper_split_match (this_to_string_val,
0,
separator);
/* 11.b */
if (!ecma_is_value_null (ecma_get_completion_value_value (match_result)))
{
should_return = true;
}
else
{
/* 11.c */
ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO);
ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p,
zero_str_p,
this_to_string_val,
true,
true,
true,
false);
JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp));
/* 11.d */
should_return = true;
ecma_deref_ecma_string (zero_str_p);
}
ecma_free_completion_value (match_result);
}
else /* if (string_length != 0) || !ecma_is_completion_value_empty (ret_value) */
{
/* 4. */
ecma_length_t new_array_length = 0;
/* 7. */
ecma_length_t start_pos = 0;
/* 12. */
ecma_length_t curr_pos = start_pos;
bool separator_is_empty = false;
/* 13. */
while (curr_pos < string_length && !should_return && ecma_is_completion_value_empty (ret_value))
{
ecma_completion_value_t match_result = ecma_builtin_helper_split_match (this_to_string_val,
curr_pos,
separator);
/* 13.b */
if (ecma_is_value_null (ecma_get_completion_value_value (match_result)))
{
curr_pos++;
}
else /* if (!ecma_is_value_null (ecma_get_completion_value_value (match_result))) */
{
ecma_object_t *match_array_obj_p = ecma_get_object_from_completion_value (match_result);
ecma_string_t *zero_str_p = ecma_new_ecma_string_from_number (ECMA_NUMBER_ZERO);
ecma_completion_value_t match_comp_value = ecma_op_object_get (match_array_obj_p, zero_str_p);
JERRY_ASSERT (ecma_is_completion_value_normal (match_comp_value));
ecma_string_t *match_str_p = ecma_get_string_from_completion_value (match_comp_value);
ecma_length_t match_str_length = ecma_string_get_length (match_str_p);
ecma_string_t *magic_empty_str_p = ecma_new_ecma_string_from_magic_string_id (LIT_MAGIC_STRING__EMPTY);
separator_is_empty = ecma_compare_ecma_strings (magic_empty_str_p, match_str_p);
ecma_deref_ecma_string (magic_empty_str_p);
ecma_free_completion_value (match_comp_value);
ecma_deref_ecma_string (zero_str_p);
ecma_string_t *magic_index_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_INDEX);
ecma_property_t *index_prop_p = ecma_get_named_property (match_array_obj_p, magic_index_str_p);
ecma_number_t *index_num_p = ecma_get_number_from_value (index_prop_p->u.named_data_property.value);
JERRY_ASSERT (*index_num_p >= 0);
uint32_t end_pos = ecma_number_to_uint32 (*index_num_p);
if (separator_is_empty)
{
end_pos = curr_pos + 1;
}
/* 13.c.iii.1-2 */
ecma_string_t *substr_str_p = ecma_string_substr (ecma_get_string_from_value (this_to_string_val),
start_pos,
end_pos);
ecma_string_t *array_length_str_p = ecma_new_ecma_string_from_uint32 (new_array_length);
ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p,
array_length_str_p,
ecma_make_string_value (substr_str_p),
true,
true,
true,
false);
JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp));
/* 13.c.iii.3 */
new_array_length++;
/* 13.c.iii.4 */
if (new_array_length == limit && ecma_is_completion_value_empty (ret_value))
{
should_return = true;
}
/* 13.c.iii.5 */
start_pos = end_pos + match_str_length;
ecma_string_t *magic_length_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LENGTH);
ECMA_TRY_CATCH (array_length_val,
ecma_op_object_get (match_array_obj_p, magic_length_str_p),
ret_value);
ECMA_OP_TO_NUMBER_TRY_CATCH (array_length_num, array_length_val, ret_value);
/* The first item is the match object, thus we should skip it. */
const uint32_t match_result_array_length = ecma_number_to_uint32 (array_length_num) - 1;
/* 13.c.iii.6 */
uint32_t i = 0;
/* 13.c.iii.7 */
while (i < match_result_array_length && ecma_is_completion_value_empty (ret_value))
{
/* 13.c.iii.7.a */
i++;
ecma_string_t *idx_str_p = ecma_new_ecma_string_from_uint32 (i);
ecma_string_t *new_array_idx_str_p = ecma_new_ecma_string_from_uint32 (new_array_length);
ecma_completion_value_t match_comp_value = ecma_op_object_get (match_array_obj_p, idx_str_p);
JERRY_ASSERT (ecma_is_completion_value_normal (match_comp_value));
ecma_value_t match_result_value = ecma_get_completion_value_value (match_comp_value);
/* 13.c.iii.7.b */
ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p,
new_array_idx_str_p,
match_result_value,
true,
true,
true,
false);
JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp));
/* 13.c.iii.7.c */
new_array_length++;
/* 13.c.iii.7.d */
if (new_array_length == limit && ecma_is_completion_value_empty (ret_value))
{
should_return = true;
}
ecma_free_completion_value (match_comp_value);
ecma_deref_ecma_string (new_array_idx_str_p);
ecma_deref_ecma_string (idx_str_p);
}
/* 13.c.iii.8 */
curr_pos = start_pos;
ECMA_OP_TO_NUMBER_FINALIZE (array_length_num);
ECMA_FINALIZE (array_length_val);
ecma_deref_ecma_string (magic_length_str_p);
ecma_deref_ecma_string (array_length_str_p);
ecma_deref_ecma_string (substr_str_p);
ecma_deref_ecma_string (magic_index_str_p);
} /* if (!ecma_is_value_null (ecma_get_completion_value_value (match_result))) */
ecma_free_completion_value (match_result);
} /* while (curr_pos < string_length && !should_return && ecma_is_completion_value_empty (ret_value)) */
if (!should_return && !separator_is_empty && ecma_is_completion_value_empty (ret_value))
{
/* 14. */
ecma_string_t *substr_str_p;
substr_str_p = ecma_string_substr (ecma_get_string_from_value (this_to_string_val),
start_pos,
string_length);
/* 15. */
ecma_string_t *array_length_string_p = ecma_new_ecma_string_from_uint32 (new_array_length);
ecma_completion_value_t put_comp = ecma_builtin_helper_def_prop (new_array_p,
array_length_string_p,
ecma_make_string_value (substr_str_p),
true,
true,
true,
false);
JERRY_ASSERT (ecma_is_completion_value_normal_true (put_comp));
ecma_deref_ecma_string (array_length_string_p);
ecma_deref_ecma_string (substr_str_p);
}
} /* if (string_length != 0) || !ecma_is_completion_value_empty (ret_value) */
ecma_free_value (separator, true);
} /* if (!ecma_is_value_undefined (arg1)) */
} /* if (limit != 0) */
} /* if (ecma_is_completion_value_empty (ret_value)) */
if (ecma_is_completion_value_empty (ret_value))
{
ret_value = new_array;
}
else
{
ecma_free_completion_value (new_array);
}
ECMA_FINALIZE (this_to_string_val);
ECMA_FINALIZE (this_check_coercible_val);
return ret_value;
} /* ecma_builtin_string_prototype_object_split */
#endif /* !CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
/**
* The String.prototype object's 'substring' routine
*

View File

@ -72,9 +72,9 @@ ROUTINE (LIT_MAGIC_STRING_LOCALE_COMPARE_UL, ecma_builtin_string_prototype_objec
ROUTINE (LIT_MAGIC_STRING_MATCH, ecma_builtin_string_prototype_object_match, 1, 1)
ROUTINE (LIT_MAGIC_STRING_REPLACE, ecma_builtin_string_prototype_object_replace, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SEARCH, ecma_builtin_string_prototype_object_search, 1, 1)
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_REGEXP_BUILTIN */
ROUTINE (LIT_MAGIC_STRING_SPLIT, ecma_builtin_string_prototype_object_split, 2, 2)
ROUTINE (LIT_MAGIC_STRING_SUBSTRING, ecma_builtin_string_prototype_object_substring, 2, 2)
ROUTINE (LIT_MAGIC_STRING_TO_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_lower_case, 0, 0)
ROUTINE (LIT_MAGIC_STRING_TO_LOCALE_LOWER_CASE_UL, ecma_builtin_string_prototype_object_to_locale_lower_case, 0, 0)

View File

@ -0,0 +1,161 @@
// Copyright 2015 Samsung Electronics Co., Ltd.
// Copyright 2015 University of Szeged.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
var res = "".split();
assert (res[0] === "");
res = "foo".split();
assert (res[0] === "foo");
var str = "foo//bar/baz//foo";
res = str.split("");
assert (res.length === 17);
for (var i = 0; i < res.length; i++)
{
assert (res[i] === str[i]);
}
res = str.split("", "foo");
assert (res.length === 0);
res = str.split("", "4");
assert (res.length === 4);
for (var i = 0; i < res.length; i++)
{
assert (res[i] === str[i]);
}
res = str.split(undefined, undefined)
assert (res.length === 1);
assert (res[0] === "foo//bar/baz//foo");
res = str.split("a", false);
assert (res.length === 0);
res = str.split("a", true);
assert (res.length === 1);
assert (res[0] === "foo//b");
res = str.split("a", NaN);
assert (res.length === 0);
res = str.split("a", Infinity);
assert (res.length === 0);
res = str.split(["o"])
assert (res.length === 5);
assert (res[0] === "f");
assert (res[1] === "");
assert (res[2] === "//bar/baz//f");
assert (res[3] === "");
assert (res[4] === "");
res = str.split(["o", "/"]);
assert (res.length === 1);
assert (res[0] === "foo//bar/baz//foo");
res = str.split("a", ["2"]);
assert (res.length === 2);
assert (res[0] === "foo//b");
assert (res[1] === "r/b");
res = str.split("a", ["2", "3"]);
assert (res.length === 0);
var obj = {x: 12, b: undefined};
res = str.split(obj, 4);
assert (res.length === 1);
assert (res[0] === "foo//bar/baz//foo");
res = str.split("o", obj);
assert (res.length === 0);
res = str.split(false, true);
assert (res.length === 1);
assert (res[0] === "foo//bar/baz//foo");
res = str.split(/\/\//);
assert (res.length === 3);
assert (res[0] === "foo");
assert (res[1] === "bar/baz");
assert (res[2] === "foo");
res = str.split(/\/\//, 1);
assert (res.length === 1);
assert (res[0] === "foo");
res = str.split(/\/\//, -1);
assert (res.length === 3);
assert (res[0] === "foo");
assert (res[1] === "bar/baz");
assert (res[2] === "foo");
str = "fo123o12bar";
res = str.split(12, undefined);
assert (res.length === 3);
assert (res[0] === "fo");
assert (res[1] === "3o");
assert (res[2] === "bar");
str = "aaabababaab";
res = str.split(/aa+/);
assert (res.length === 3);
assert (res[0] === "");
assert (res[1] === "babab");
assert (res[2] === "b");
str = "A<B>bold</B>and<CODE>coded</CODE>";
res = str.split(/<(\/)?([^<>]+)>/);
assert (res.length === 13);
var expected = ["A", undefined, "B", "bold", "/", "B", "and", undefined, "CODE", "coded", "/", "CODE", ""];
for (var i = 0; i < res.length; i++)
{
assert (res[i] === expected[i]);
}
/* Check Object coercible */
try {
String.prototype.split.call(undefined, "");
assert (false);
}
catch (e)
{
assert (e instanceof TypeError);
}
/* Check toString conversion */
try {
var obj = { toString: function() { throw new ReferenceError("foo"); } };
String.prototype.split.call(obj, "");
assert (false);
}
catch (e)
{
assert (e instanceof ReferenceError);
assert (e.message === "foo");
}
/* Check Invalid RegExp */
try {
var obj = { toString: function() { throw new ReferenceError("foo"); } };
"".split(obj);
assert (false);
}
catch (e)
{
assert (e instanceof ReferenceError);
assert (e.message === "foo");
}