From cf2bc459bb59728d4e4dc691f0e042c519e5bcba Mon Sep 17 00:00:00 2001
From: Andrey Shitov <a.shitov@samsung.com>
Date: Fri, 10 Jul 2015 20:01:43 +0300
Subject: [PATCH] Support unicode whitespaces in string-to-number conversion.

JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
---
 .../ecma/base/ecma-helpers-conversion.cpp     | 42 ++++++++++++++-----
 jerry-core/lit/lit-unicode-ranges.inc.h       | 14 +++++++
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/jerry-core/ecma/base/ecma-helpers-conversion.cpp b/jerry-core/ecma/base/ecma-helpers-conversion.cpp
index b84fa0b84..ce344dd70 100644
--- a/jerry-core/ecma/base/ecma-helpers-conversion.cpp
+++ b/jerry-core/ecma/base/ecma-helpers-conversion.cpp
@@ -23,6 +23,7 @@
 #include "ecma-globals.h"
 #include "ecma-helpers.h"
 #include "jrt-libc-includes.h"
+#include "lit-char-helpers.h"
 #include "lit-magic-strings.h"
 
 /*
@@ -343,7 +344,6 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
   const lit_utf8_byte_t hex_lower_digits_range[10] = { 'a', 'f' };
   const lit_utf8_byte_t hex_upper_digits_range[10] = { 'A', 'F' };
   const lit_utf8_byte_t hex_x_chars[2] = { 'x', 'X' };
-  const lit_utf8_byte_t white_space[2] = { ' ', '\n' };
   const lit_utf8_byte_t e_chars[2] = { 'e', 'E' };
   const lit_utf8_byte_t plus_char = '+';
   const lit_utf8_byte_t minus_char = '-';
@@ -354,23 +354,43 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
     return ECMA_NUMBER_ZERO;
   }
 
-  const lit_utf8_byte_t *begin_p = str_p;
-  const lit_utf8_byte_t *end_p = begin_p + str_size - 1;
+  lit_utf8_iterator_t iter = lit_utf8_iterator_create (str_p, str_size);
+  ecma_char_t code_unit;
 
-  while (begin_p <= end_p
-         && (*begin_p == white_space[0]
-             || *begin_p == white_space[1]))
+  while (!lit_utf8_iterator_is_eos (&iter))
   {
-    begin_p++;
+    code_unit = lit_utf8_iterator_peek_next (&iter);
+    if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
+    {
+      lit_utf8_iterator_incr (&iter);
+    }
+    else
+    {
+      break;
+    }
   }
 
-  while (begin_p <= end_p
-         && (*end_p == white_space[0]
-             || *end_p == white_space[1]))
+  JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
+  const lit_utf8_byte_t *begin_p = iter.buf_p + iter.buf_pos.offset;
+
+  iter = lit_utf8_iterator_create (iter.buf_p + iter.buf_pos.offset, str_size - iter.buf_pos.offset);
+  lit_utf8_iterator_seek_eos (&iter);
+  while (!lit_utf8_iterator_is_bos (&iter))
   {
-    end_p--;
+    code_unit = lit_utf8_iterator_peek_prev (&iter);
+    if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
+    {
+      lit_utf8_iterator_decr (&iter);
+    }
+    else
+    {
+      break;
+    }
   }
 
+  JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
+  const lit_utf8_byte_t *end_p = iter.buf_p + iter.buf_pos.offset - 1;
+
   if (begin_p > end_p)
   {
     return ECMA_NUMBER_ZERO;
diff --git a/jerry-core/lit/lit-unicode-ranges.inc.h b/jerry-core/lit/lit-unicode-ranges.inc.h
index de7cb7d31..233343b89 100644
--- a/jerry-core/lit/lit-unicode-ranges.inc.h
+++ b/jerry-core/lit/lit-unicode-ranges.inc.h
@@ -21,6 +21,9 @@
  *          http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
  *
  * The range lists were generated using tools/print-unicode-ranges.sh script from UnicodeData-3.0.0.txt
+ *
+ * NOTE:
+ *   Some ranges in "Separator, Space" category were added manually, see the according definitions.
  */
 
 /**
@@ -2430,6 +2433,17 @@ LIT_UNICODE_RANGE_NO (0x3280, 0x3289) /* CIRCLED IDEOGRAPH ONE
 #ifndef LIT_UNICODE_RANGE_ZS
 # define LIT_UNICODE_RANGE_ZS(range_begin, range_end)
 #endif /* !LIT_UNICODE_RANGE_ZS */
+
+LIT_UNICODE_RANGE_ZS (0x180E, 0x180E) /* MONGOLIAN VOWEL SEPARATOR (manually added)
+                                       * This character doesn't belong to Zs category according
+                                       * UnicodeData-3.0.0.txt, but it should be supported according to
+                                       * ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */
+
+LIT_UNICODE_RANGE_ZS (0x205F, 0x205F) /* MEDIUM MATHEMATICAL SPACE (manually added)
+                                       * This character doesn't belong to Zs category according
+                                       * UnicodeData-3.0.0.txt, but it should be supported according to
+                                       * ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */
+
 LIT_UNICODE_RANGE_ZS (0x0020, 0x0020) /* SPACE */
 
 LIT_UNICODE_RANGE_ZS (0x00A0, 0x00A0) /* NO-BREAK SPACE */