Support unicode whitespaces in string-to-number conversion.

JerryScript-DCO-1.0-Signed-off-by: Andrey Shitov a.shitov@samsung.com
This commit is contained in:
Andrey Shitov 2015-07-10 20:01:43 +03:00
parent b3fa7d9765
commit cf2bc459bb
2 changed files with 45 additions and 11 deletions

View File

@ -23,6 +23,7 @@
#include "ecma-globals.h"
#include "ecma-helpers.h"
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"
#include "lit-magic-strings.h"
/*
@ -343,7 +344,6 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
const lit_utf8_byte_t hex_lower_digits_range[10] = { 'a', 'f' };
const lit_utf8_byte_t hex_upper_digits_range[10] = { 'A', 'F' };
const lit_utf8_byte_t hex_x_chars[2] = { 'x', 'X' };
const lit_utf8_byte_t white_space[2] = { ' ', '\n' };
const lit_utf8_byte_t e_chars[2] = { 'e', 'E' };
const lit_utf8_byte_t plus_char = '+';
const lit_utf8_byte_t minus_char = '-';
@ -354,23 +354,43 @@ ecma_utf8_string_to_number (const lit_utf8_byte_t *str_p, /**< utf-8 string */
return ECMA_NUMBER_ZERO;
}
const lit_utf8_byte_t *begin_p = str_p;
const lit_utf8_byte_t *end_p = begin_p + str_size - 1;
lit_utf8_iterator_t iter = lit_utf8_iterator_create (str_p, str_size);
ecma_char_t code_unit;
while (begin_p <= end_p
&& (*begin_p == white_space[0]
|| *begin_p == white_space[1]))
while (!lit_utf8_iterator_is_eos (&iter))
{
begin_p++;
code_unit = lit_utf8_iterator_peek_next (&iter);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_iterator_incr (&iter);
}
else
{
break;
}
}
while (begin_p <= end_p
&& (*end_p == white_space[0]
|| *end_p == white_space[1]))
JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
const lit_utf8_byte_t *begin_p = iter.buf_p + iter.buf_pos.offset;
iter = lit_utf8_iterator_create (iter.buf_p + iter.buf_pos.offset, str_size - iter.buf_pos.offset);
lit_utf8_iterator_seek_eos (&iter);
while (!lit_utf8_iterator_is_bos (&iter))
{
end_p--;
code_unit = lit_utf8_iterator_peek_prev (&iter);
if (lit_char_is_white_space (code_unit) || lit_char_is_line_terminator (code_unit))
{
lit_utf8_iterator_decr (&iter);
}
else
{
break;
}
}
JERRY_ASSERT (!iter.buf_pos.is_non_bmp_middle);
const lit_utf8_byte_t *end_p = iter.buf_p + iter.buf_pos.offset - 1;
if (begin_p > end_p)
{
return ECMA_NUMBER_ZERO;

View File

@ -21,6 +21,9 @@
* http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
*
* The range lists were generated using tools/print-unicode-ranges.sh script from UnicodeData-3.0.0.txt
*
* NOTE:
* Some ranges in "Separator, Space" category were added manually, see the according definitions.
*/
/**
@ -2430,6 +2433,17 @@ LIT_UNICODE_RANGE_NO (0x3280, 0x3289) /* CIRCLED IDEOGRAPH ONE
#ifndef LIT_UNICODE_RANGE_ZS
# define LIT_UNICODE_RANGE_ZS(range_begin, range_end)
#endif /* !LIT_UNICODE_RANGE_ZS */
LIT_UNICODE_RANGE_ZS (0x180E, 0x180E) /* MONGOLIAN VOWEL SEPARATOR (manually added)
* This character doesn't belong to Zs category according
* UnicodeData-3.0.0.txt, but it should be supported according to
* ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */
LIT_UNICODE_RANGE_ZS (0x205F, 0x205F) /* MEDIUM MATHEMATICAL SPACE (manually added)
* This character doesn't belong to Zs category according
* UnicodeData-3.0.0.txt, but it should be supported according to
* ch09/9.3/9.3.1/S9.3.1_A2.js form test262 suite. */
LIT_UNICODE_RANGE_ZS (0x0020, 0x0020) /* SPACE */
LIT_UNICODE_RANGE_ZS (0x00A0, 0x00A0) /* NO-BREAK SPACE */