Make the string trim method unicode tolerant.

Fixes issue #426

JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
This commit is contained in:
Peter Gal 2015-08-04 10:29:11 +02:00 committed by Dániel Bátyai
parent 57336909cb
commit af56cd8465
2 changed files with 25 additions and 16 deletions

View File

@ -2009,24 +2009,25 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
/* 3 */
const lit_utf8_size_t size = ecma_string_get_size (original_string_p);
const ecma_length_t length = ecma_string_get_size (original_string_p);
/* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */
lit_utf8_byte_t *original_utf8_str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (size + 1,
MEM_HEAP_ALLOC_SHORT_TERM);
ecma_string_to_utf8_string (original_string_p, original_utf8_str_p, (ssize_t) size);
const ecma_length_t length = lit_utf8_string_length (original_utf8_str_p, size);
lit_utf8_iterator_t iter = lit_utf8_iterator_create (original_utf8_str_p, size);
uint32_t prefix = 0, postfix = 0;
uint32_t new_len = 0;
while (prefix < length)
while (!lit_utf8_iterator_is_eos (&iter))
{
ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
size,
prefix);
ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);
if (lit_char_is_white_space (next_char)
|| lit_char_is_line_terminator (next_char))
if (lit_char_is_white_space (current_char)
|| lit_char_is_line_terminator (current_char))
{
prefix++;
}
@ -2036,13 +2037,13 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
}
}
while (postfix < length - prefix)
lit_utf8_iterator_seek_eos (&iter);
while (!lit_utf8_iterator_is_bos (&iter))
{
ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
size,
length - postfix - 1);
if (lit_char_is_white_space (next_char)
|| lit_char_is_line_terminator (next_char))
ecma_char_t current_char = lit_utf8_iterator_read_prev (&iter);
if (lit_char_is_white_space (current_char)
|| lit_char_is_line_terminator (current_char))
{
postfix++;
}
@ -2051,8 +2052,7 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
break;
}
}
new_len = prefix < size ? size - prefix - postfix : 0;
new_len = prefix < length ? length - prefix - postfix : 0;
ecma_string_t *new_str_p = ecma_string_substr (original_string_p, prefix, prefix + new_len);

View File

@ -66,4 +66,13 @@ assert(" ".trim() === "");
assert("".trim() === "");
// FIXME: add unicode tests when unicode support available
assert("\uf389".trim() === "\uf389");
assert(String.prototype.trim.call('\uf389') === "\uf389");
assert("\u20291\u00D0".trim() === "1\u00D0");
assert("\u20291\u00A0".trim() === "1");
assert("\u0009\u000B\u000C\u0020\u00A01".trim() === "1");
assert("\u000A\u000D\u2028\u202911".trim() === "11");
assert("\u0009\u000B\u000C\u0020\u00A01\u0009\u000B\u000C\u0020\u00A0".trim() === "1");
assert("\u000A\u000D\u2028\u202911\u000A\u000D\u2028\u2029".trim() === "11");