Make the string trim method unicode tolerant.

Fixes issue #426 JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
2026-02-07 19:26:25 +00:00 · 2015-08-04 10:29:11 +02:00 · 2015-08-04 10:29:11 +02:00 · af56cd8465
commit af56cd8465
parent 57336909cb
2 changed files with 25 additions and 16 deletions
--- a/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp
+++ b/jerry-core/ecma/builtin-objects/ecma-builtin-string-prototype.cpp
@ -2009,24 +2009,25 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu

  /* 3 */
  const lit_utf8_size_t size = ecma_string_get_size (original_string_p);
-  const ecma_length_t length = ecma_string_get_size (original_string_p);

  /* Workaround: avoid repeated call of ecma_string_get_char_at_pos() because its overhead */
  lit_utf8_byte_t *original_utf8_str_p = (lit_utf8_byte_t *) mem_heap_alloc_block (size + 1,
                                                                                   MEM_HEAP_ALLOC_SHORT_TERM);
  ecma_string_to_utf8_string (original_string_p, original_utf8_str_p, (ssize_t) size);

+  const ecma_length_t length = lit_utf8_string_length (original_utf8_str_p, size);
+
+  lit_utf8_iterator_t iter = lit_utf8_iterator_create (original_utf8_str_p, size);
+
  uint32_t prefix = 0, postfix = 0;
  uint32_t new_len = 0;

-  while (prefix < length)
+  while (!lit_utf8_iterator_is_eos (&iter))
  {
-    ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
-                                                          size,
-                                                          prefix);
+    ecma_char_t current_char = lit_utf8_iterator_read_next (&iter);

-    if (lit_char_is_white_space (next_char)
-        || lit_char_is_line_terminator (next_char))
+    if (lit_char_is_white_space (current_char)
+        || lit_char_is_line_terminator (current_char))
    {
      prefix++;
    }
@ -2036,13 +2037,13 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
    }
  }

-  while (postfix < length - prefix)
+  lit_utf8_iterator_seek_eos (&iter);
+  while (!lit_utf8_iterator_is_bos (&iter))
  {
-    ecma_char_t next_char = lit_utf8_string_code_unit_at (original_utf8_str_p,
-                                                          size,
-                                                          length - postfix - 1);
-    if (lit_char_is_white_space (next_char)
-        || lit_char_is_line_terminator (next_char))
+    ecma_char_t current_char = lit_utf8_iterator_read_prev (&iter);
+
+    if (lit_char_is_white_space (current_char)
+        || lit_char_is_line_terminator (current_char))
    {
      postfix++;
    }
@ -2051,8 +2052,7 @@ ecma_builtin_string_prototype_object_trim (ecma_value_t this_arg) /**< this argu
      break;
    }
  }
-
-  new_len = prefix < size ? size - prefix - postfix : 0;
+  new_len = prefix < length ? length - prefix - postfix : 0;

  ecma_string_t *new_str_p = ecma_string_substr (original_string_p, prefix, prefix + new_len);

--- a/tests/jerry/string-prototype-trim.js
+++ b/tests/jerry/string-prototype-trim.js
@ -66,4 +66,13 @@ assert("          ".trim() === "");

 assert("".trim() === "");

-// FIXME: add unicode tests when unicode support available
+assert("\uf389".trim() === "\uf389");
+assert(String.prototype.trim.call('\uf389') === "\uf389");
+assert("\u20291\u00D0".trim() === "1\u00D0");
+assert("\u20291\u00A0".trim() === "1");
+
+assert("\u0009\u000B\u000C\u0020\u00A01".trim() === "1");
+assert("\u000A\u000D\u2028\u202911".trim() === "11");
+
+assert("\u0009\u000B\u000C\u0020\u00A01\u0009\u000B\u000C\u0020\u00A0".trim() === "1");
+assert("\u000A\u000D\u2028\u202911\u000A\u000D\u2028\u2029".trim() === "11");