Implement \u{hex} support. (#3447)

A large rework because surrogate pairs must be combined.

Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg 2019-12-16 11:26:02 +01:00 committed by Dániel Bátyai
parent 1db16c3a1c
commit 40d930d62c
22 changed files with 765 additions and 370 deletions

View File

@ -1553,31 +1553,59 @@ jerry_append_number_to_buffer (uint8_t *buffer_p, /**< buffer */
static bool
ecma_string_is_valid_identifier (const ecma_string_t *string_p)
{
bool result = false;
ECMA_STRING_TO_UTF8_STRING (string_p, str_buffer_p, str_buffer_size);
if (lit_char_is_identifier_start (str_buffer_p))
const uint8_t *str_p = str_buffer_p;
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
while (str_p < str_end_p)
{
const uint8_t *str_start_p = str_buffer_p;
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
lit_code_point_t code_point = *str_p;
lit_utf8_size_t utf8_length = 1;
result = true;
while (str_start_p < str_end_p)
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
{
if (!lit_char_is_identifier_part (str_start_p))
utf8_length = lit_read_code_point_from_utf8 (str_p,
(lit_utf8_size_t) (str_end_p - str_p),
&code_point);
#if ENABLED (JERRY_ES2015)
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
&& str_p + 3 < str_end_p)
{
lit_code_point_t low_surrogate;
lit_read_code_point_from_utf8 (str_p + 3,
(lit_utf8_size_t) (str_end_p - (str_p + 3)),
&low_surrogate);
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
{
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
(ecma_char_t) low_surrogate);
utf8_length = 2 * 3;
}
}
#endif /* ENABLED (JERRY_ES2015) */
}
if (str_p == str_buffer_p)
{
if (!lit_code_point_is_identifier_start (code_point))
{
result = false;
break;
}
lit_utf8_incr (&str_start_p);
}
else if (!lit_code_point_is_identifier_part (code_point))
{
break;
}
str_p += utf8_length;
}
ECMA_FINALIZE_UTF8_STRING (str_buffer_p, str_buffer_size);
return result;
return str_p == str_end_p;
} /* ecma_string_is_valid_identifier */
#endif /* ENABLED (JERRY_SNAPSHOT_SAVE) */

View File

@ -461,16 +461,9 @@ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string
if ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
/* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
uint32_t character = ((((uint32_t) string_p[pos++]) & 0x7) << 18);
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
character |= (((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK);
JERRY_ASSERT (character >= 0x10000);
character -= 0x10000;
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xd800 | (character >> 10)));
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
lit_four_byte_utf8_char_to_cesu8 (data_p, string_p + pos);
data_p += 3 * 2;
pos += 4;
}
else
{
@ -2683,10 +2676,10 @@ void
ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
const ecma_char_t c) /**< ecma char */
{
const lit_utf8_size_t size = (lit_utf8_size_t) lit_char_get_utf8_length (c);
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
lit_char_to_utf8_bytes (dest_p, c);
lit_code_point_to_cesu8_bytes (dest_p, c);
} /* ecma_stringbuilder_append_char */
/**

View File

@ -61,7 +61,7 @@ ecma_date_parse_date_chars (const lit_utf8_byte_t **str_p, /**< pointer to the c
while (num_of_chars--)
{
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_utf8_read_next (str_p)))
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_cesu8_read_next (str_p)))
{
return ecma_number_make_nan ();
}

View File

@ -150,7 +150,7 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
int sign = 1;
/* 4. */
ecma_char_t current = lit_utf8_read_next (&string_curr_p);
ecma_char_t current = lit_cesu8_read_next (&string_curr_p);
if (current == LIT_CHAR_MINUS)
{
sign = -1;
@ -162,7 +162,7 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
start_p = string_curr_p;
if (string_curr_p < string_end_p)
{
current = lit_utf8_read_next (&string_curr_p);
current = lit_cesu8_read_next (&string_curr_p);
}
}
@ -970,7 +970,7 @@ ecma_builtin_global_object_escape (lit_utf8_byte_t *input_start_p, /**< routine'
while (input_curr_p < input_end_p)
{
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
@ -1005,7 +1005,7 @@ ecma_builtin_global_object_escape (lit_utf8_byte_t *input_start_p, /**< routine'
while (input_curr_p < input_end_p)
{
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
@ -1091,7 +1091,7 @@ ecma_builtin_global_object_unescape (lit_utf8_byte_t *input_start_p, /**< routin
while (input_curr_p < input_end_p)
{
/* 6. */
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
/* 7-8. */
if (status == 0 && chr == LIT_CHAR_PERCENT)

View File

@ -713,7 +713,7 @@ ecma_builtin_helper_string_find_index (ecma_string_t *original_str_p, /**< index
/* iterate original string and try to match at each position */
bool searching = true;
ecma_char_t first_char = lit_utf8_read_next (&search_str_curr_p);
ecma_char_t first_char = lit_cesu8_read_next (&search_str_curr_p);
while (searching)
{
/* match as long as possible */
@ -722,14 +722,14 @@ ecma_builtin_helper_string_find_index (ecma_string_t *original_str_p, /**< index
if (match_len < search_len &&
index + match_len < original_len &&
lit_utf8_read_next (&original_str_curr_p) == first_char)
lit_cesu8_read_next (&original_str_curr_p) == first_char)
{
const lit_utf8_byte_t *nested_search_str_curr_p = search_str_curr_p;
match_len++;
while (match_len < search_len &&
index + match_len < original_len &&
lit_utf8_read_next (&original_str_curr_p) == lit_utf8_read_next (&nested_search_str_curr_p))
lit_cesu8_read_next (&original_str_curr_p) == lit_cesu8_read_next (&nested_search_str_curr_p))
{
match_len++;
}

View File

@ -1155,7 +1155,7 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
while (input_str_curr_p < input_str_end_p)
{
ecma_char_t character = lit_utf8_read_next (&input_str_curr_p);
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
ecma_length_t character_length;
lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT];
@ -1194,7 +1194,7 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
while (input_str_curr_p < input_str_end_p)
{
ecma_char_t character = lit_utf8_read_next (&input_str_curr_p);
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
ecma_length_t character_length;

View File

@ -220,11 +220,11 @@ ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, /**< reference to st
JERRY_ASSERT (str_p != NULL);
const lit_utf8_byte_t *current_p = *str_p;
lit_code_point_t ch = lit_utf8_read_next (&current_p);
lit_code_point_t ch = lit_cesu8_read_next (&current_p);
if (lit_is_code_point_utf16_high_surrogate ((ecma_char_t) ch)
&& current_p < end_p)
{
const ecma_char_t next_ch = lit_utf8_peek_next (current_p);
const ecma_char_t next_ch = lit_cesu8_peek_next (current_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
lit_utf8_incr (&current_p);
@ -425,14 +425,14 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
const bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
lit_code_point_t ch1 = re_get_char (&bc_p); /* Already canonicalized. */
lit_code_point_t ch2 = lit_utf8_read_next (&str_curr_p);
lit_code_point_t ch2 = lit_cesu8_read_next (&str_curr_p);
#if ENABLED (JERRY_ES2015)
if (re_ctx_p->flags & RE_FLAG_UNICODE
&& lit_is_code_point_utf16_high_surrogate (ch2)
&& str_curr_p < re_ctx_p->input_end_p)
{
const ecma_char_t next_ch = lit_utf8_peek_next (str_curr_p);
const ecma_char_t next_ch = lit_cesu8_peek_next (str_curr_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
lit_utf8_incr (&str_curr_p);
@ -460,7 +460,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
return NULL; /* fail */
}
const ecma_char_t ch = lit_utf8_read_next (&str_curr_p);
const ecma_char_t ch = lit_cesu8_read_next (&str_curr_p);
JERRY_TRACE_MSG ("Period matching '.' to %u: ", (unsigned int) ch);
if (lit_char_is_line_terminator (ch))
@ -474,7 +474,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
&& lit_is_code_point_utf16_high_surrogate (ch)
&& str_curr_p < re_ctx_p->input_end_p)
{
const ecma_char_t next_ch = lit_utf8_peek_next (str_curr_p);
const ecma_char_t next_ch = lit_cesu8_peek_next (str_curr_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
lit_utf8_incr (&str_curr_p);
@ -501,7 +501,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
return NULL; /* fail */
}
if (lit_char_is_line_terminator (lit_utf8_peek_prev (str_curr_p)))
if (lit_char_is_line_terminator (lit_cesu8_peek_prev (str_curr_p)))
{
JERRY_TRACE_MSG ("match\n");
break; /* tail merge */
@ -526,7 +526,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
return NULL; /* fail */
}
if (lit_char_is_line_terminator (lit_utf8_peek_next (str_curr_p)))
if (lit_char_is_line_terminator (lit_cesu8_peek_next (str_curr_p)))
{
JERRY_TRACE_MSG ("match\n");
break; /* tail merge */
@ -539,10 +539,10 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
{
const bool is_wordchar_left = ((str_curr_p > re_ctx_p->input_start_p)
&& lit_char_is_word_char (lit_utf8_peek_prev (str_curr_p)));
&& lit_char_is_word_char (lit_cesu8_peek_prev (str_curr_p)));
const bool is_wordchar_right = ((str_curr_p < re_ctx_p->input_end_p)
&& lit_char_is_word_char (lit_utf8_peek_next (str_curr_p)));
&& lit_char_is_word_char (lit_cesu8_peek_next (str_curr_p)));
if (op == RE_OP_ASSERT_WORD_BOUNDARY)
{
@ -659,7 +659,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
else
{
#endif /* ENABLED (JERRY_ES2015) */
const ecma_char_t curr_ch = (ecma_char_t) ecma_regexp_canonicalize (lit_utf8_read_next (&str_curr_p),
const ecma_char_t curr_ch = (ecma_char_t) ecma_regexp_canonicalize (lit_cesu8_read_next (&str_curr_p),
is_ignorecase);
while (range_count-- > 0)
@ -1115,7 +1115,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
break;
}
lit_utf8_read_prev (&str_curr_p);
lit_cesu8_read_prev (&str_curr_p);
iter_count--;
}
}

View File

@ -200,73 +200,33 @@ lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
} /* lit_char_is_unicode_non_letter_ident_part */
/**
* Checks whether the next UTF8 character is a valid identifier start.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_start (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
{
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return lit_char_is_identifier_start_character (*src_p);
}
/* ECMAScript 2015 specification allows some code points in supplementary plane.
* However, we don't permit characters in supplementary characters as start of identifier.
*/
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
return false;
}
return lit_char_is_identifier_start_character (lit_utf8_peek_next (src_p));
} /* lit_char_is_identifier_start */
/**
* Checks whether the character is a valid identifier start.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_start_character (uint16_t chr) /**< EcmaScript character */
lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point */
{
/* Fast path for ASCII-defined letters. */
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|| chr == LIT_CHAR_DOLLAR_SIGN
|| chr == LIT_CHAR_UNDERSCORE);
return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
|| code_point == LIT_CHAR_DOLLAR_SIGN
|| code_point == LIT_CHAR_UNDERSCORE);
}
return lit_char_is_unicode_letter (chr);
} /* lit_char_is_identifier_start_character */
/**
* Checks whether the next UTF8 character is a valid identifier part.
*
* @return true if it is.
*/
bool
lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
{
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
#if ENABLED (JERRY_ES2015)
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
return lit_char_is_identifier_part_character (*src_p);
/* TODO: detect these ranges correctly. */
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
}
#endif /* ENABLED (JERRY_ES2015) */
/* ECMAScript 2015 specification allows some code points in supplementary plane.
* However, we don't permit characters in supplementary characters as part of identifier.
*/
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
return false;
}
return lit_char_is_identifier_part_character (lit_utf8_peek_next (src_p));
} /* lit_char_is_identifier_part */
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
} /* lit_code_point_is_identifier_start */
/**
* Checks whether the character is a valid identifier part.
@ -274,21 +234,29 @@ lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8
* @return true if it is.
*/
bool
lit_char_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point */
{
/* Fast path for ASCII-defined letters. */
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|| (chr >= LIT_CHAR_0 && chr <= LIT_CHAR_9)
|| chr == LIT_CHAR_DOLLAR_SIGN
|| chr == LIT_CHAR_UNDERSCORE);
return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
&& LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
|| (code_point >= LIT_CHAR_0 && code_point <= LIT_CHAR_9)
|| code_point == LIT_CHAR_DOLLAR_SIGN
|| code_point == LIT_CHAR_UNDERSCORE);
}
return (lit_char_is_unicode_letter (chr)
|| lit_char_is_unicode_non_letter_ident_part (chr));
} /* lit_char_is_identifier_part_character */
#if ENABLED (JERRY_ES2015)
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
/* TODO: detect these ranges correctly. */
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
}
#endif /* ENABLED (JERRY_ES2015) */
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|| lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
} /* lit_code_point_is_identifier_part */
/**
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
@ -356,30 +324,47 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
* @return length of the UTF8 representation.
*/
size_t
lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
ecma_char_t chr) /**< EcmaScript character */
lit_code_point_to_cesu8_bytes (uint8_t *dst_p, /**< destination buffer */
lit_code_point_t code_point) /**< code point */
{
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
{
/* 00000000 0xxxxxxx -> 0xxxxxxx */
*dst_p = (uint8_t) chr;
dst_p[0] = (uint8_t) code_point;
return 1;
}
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
{
/* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
*(dst_p++) = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
dst_p[0] = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
return 2;
}
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
*(dst_p++) = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((chr >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
*(dst_p++) = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
return 3;
} /* lit_char_to_utf8_bytes */
if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((code_point >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
return 3;
}
JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
code_point -= LIT_UTF8_4_BYTE_CODE_POINT_MIN;
dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x20 | ((code_point >> 16) & LIT_UTF8_LAST_4_BITS_MASK));
dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 10) & LIT_UTF8_LAST_6_BITS_MASK));
dst_p[3] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
dst_p[4] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x30 | ((code_point >> 6) & LIT_UTF8_LAST_4_BITS_MASK));
dst_p[5] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
return 3 * 2;
} /* lit_code_point_to_cesu8_bytes */
/**
* Returns the length of the UTF8 representation of a character.
@ -387,23 +372,44 @@ lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
* @return length of the UTF8 representation.
*/
size_t
lit_char_get_utf8_length (ecma_char_t chr) /**< EcmaScript character */
lit_code_point_get_cesu8_length (lit_code_point_t code_point) /**< code point */
{
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
{
/* 00000000 0xxxxxxx */
return 1;
}
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
{
/* 00000yyy yyxxxxxx */
return 2;
}
/* zzzzyyyy yyxxxxxx */
return 3;
} /* lit_char_get_utf8_length */
if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
{
/* zzzzyyyy yyxxxxxx */
return 3;
}
/* high + low surrogate */
return 2 * 3;
} /* lit_code_point_get_cesu8_length */
/**
* Convert a four byte long utf8 character to two three byte long cesu8 characters
*/
void
lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, /**< destination buffer */
const uint8_t *source_p) /**< source buffer */
{
lit_code_point_t code_point = ((((uint32_t) source_p[0]) & LIT_UTF8_LAST_3_BITS_MASK) << 18);
code_point |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
code_point |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
code_point |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
lit_code_point_to_cesu8_bytes (dst_p, code_point);
} /* lit_four_byte_utf8_char_to_cesu8 */
/**
* Parse the next number_of_characters hexadecimal character,

View File

@ -75,10 +75,8 @@ bool lit_char_is_line_terminator (ecma_char_t c);
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
/* LIT_CHAR_BACKSLASH defined above */
bool lit_char_is_identifier_start (const uint8_t *src_p);
bool lit_char_is_identifier_part (const uint8_t *src_p);
bool lit_char_is_identifier_start_character (ecma_char_t chr);
bool lit_char_is_identifier_part_character (ecma_char_t chr);
bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
bool lit_code_point_is_identifier_part (lit_code_point_t code_point);
/*
* Punctuator characters (ECMA-262 v5, 7.7)
@ -215,8 +213,9 @@ bool lit_char_is_octal_digit (ecma_char_t c);
bool lit_char_is_decimal_digit (ecma_char_t c);
bool lit_char_is_hex_digit (ecma_char_t c);
uint32_t lit_char_hex_to_int (ecma_char_t c);
size_t lit_char_to_utf8_bytes (uint8_t *dst_p, ecma_char_t chr);
size_t lit_char_get_utf8_length (ecma_char_t chr);
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
/* read a hex encoded code point from a zero terminated buffer */
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t number_of_characters,

View File

@ -481,7 +481,7 @@ lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer wit
* @return next code unit
*/
ecma_char_t
lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
lit_cesu8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
ecma_char_t ch;
@ -489,7 +489,7 @@ lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
*buf_p += lit_read_code_unit_from_utf8 (*buf_p, &ch);
return ch;
} /* lit_utf8_read_next */
} /* lit_cesu8_read_next */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
@ -497,7 +497,7 @@ lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
* @return previous code unit
*/
ecma_char_t
lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
lit_cesu8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (*buf_p);
ecma_char_t ch;
@ -506,7 +506,7 @@ lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
lit_read_code_unit_from_utf8 (*buf_p, &ch);
return ch;
} /* lit_utf8_read_prev */
} /* lit_cesu8_read_prev */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
@ -514,15 +514,15 @@ lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
* @return next code unit
*/
ecma_char_t
lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (buf_p);
JERRY_ASSERT (buf_p != NULL);
ecma_char_t ch;
lit_read_code_unit_from_utf8 (buf_p, &ch);
return ch;
} /* lit_utf8_peek_next */
} /* lit_cesu8_peek_next */
/**
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
@ -530,15 +530,15 @@ lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with char
* @return previous code unit
*/
ecma_char_t
lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
{
JERRY_ASSERT (buf_p);
JERRY_ASSERT (buf_p != NULL);
ecma_char_t ch;
lit_read_prev_code_unit_from_utf8 (buf_p, &ch);
return ch;
} /* lit_utf8_peek_prev */
} /* lit_cesu8_peek_prev */
/**
* Increase cesu-8 encoded string pointer by one code unit.

View File

@ -46,7 +46,6 @@
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
#define LIT_UTF8_5_BYTE_MARKER (0xF8)
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
#define LIT_UTF8_1_BYTE_MASK (0x80)
@ -82,7 +81,7 @@
/**
* Byte values >= LIT_UTF8_FIRST_BYTE_MAX are not allowed in internal strings
*/
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
#define LIT_UTF8_FIRST_BYTE_MAX (0xF8)
/* validation */
bool lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size);
@ -135,10 +134,10 @@ lit_utf8_size_t lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p,
lit_utf8_size_t lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p,
ecma_char_t *code_point);
ecma_char_t lit_utf8_read_next (const lit_utf8_byte_t **buf_p);
ecma_char_t lit_utf8_read_prev (const lit_utf8_byte_t **buf_p);
ecma_char_t lit_utf8_peek_next (const lit_utf8_byte_t *buf_p);
ecma_char_t lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p);
ecma_char_t lit_cesu8_read_next (const lit_utf8_byte_t **buf_p);
ecma_char_t lit_cesu8_read_prev (const lit_utf8_byte_t **buf_p);
ecma_char_t lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p);
ecma_char_t lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p);
void lit_utf8_incr (const lit_utf8_byte_t **buf_p);
void lit_utf8_decr (const lit_utf8_byte_t **buf_p);

View File

@ -53,14 +53,13 @@ align_column_to_tab (parser_line_counter_t column) /**< current column */
/**
* Parse hexadecimal character sequence
*
* @return character value
* @return character value or UINT32_MAX on error
*/
ecma_char_t
lexer_hex_to_character (parser_context_t *context_p, /**< context */
const uint8_t *source_p, /**< current source position */
int length) /**< source length */
static lit_code_point_t
lexer_hex_to_code_point (const uint8_t *source_p, /**< current source position */
parser_line_counter_t length) /**< source length */
{
uint32_t result = 0;
lit_code_point_t result = 0;
do
{
@ -81,29 +80,94 @@ lexer_hex_to_character (parser_context_t *context_p, /**< context */
}
else
{
parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
return UINT32_MAX;
}
}
}
while (--length > 0);
return (ecma_char_t) result;
} /* lexer_hex_to_character */
return result;
} /* lexer_hex_to_code_point */
#if ENABLED (JERRY_ES2015)
/**
* Parse hexadecimal character sequence enclosed in braces
*
* @return character value or UINT32_MAX on error
*/
static lit_code_point_t
lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source position */
const uint8_t *source_end_p, /**< source end */
uint32_t *length_p) /**< [out] length of the sequence */
{
lit_code_point_t result = 0;
/* Four is the size of \u{} sequence. */
uint32_t length = 4;
JERRY_ASSERT (source_p[-1] == LIT_CHAR_LEFT_BRACE);
JERRY_ASSERT (source_p < source_end_p);
do
{
uint32_t byte = *source_p++;
result <<= 4;
if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
{
result += byte - LIT_CHAR_0;
}
else
{
byte = LEXER_TO_ASCII_LOWERCASE (byte);
if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
{
result += byte - (LIT_CHAR_LOWERCASE_A - 10);
}
else
{
return UINT32_MAX;
}
}
if (result >= (LIT_UNICODE_CODE_POINT_MAX + 1) || source_p >= source_end_p)
{
return UINT32_MAX;
}
length++;
}
while (*source_p != LIT_CHAR_RIGHT_BRACE);
*length_p = length;
return result;
} /* lexer_hex_in_braces_to_code_point */
#endif /* ENABLED (JERRY_ES2015) */
/**
* Parse hexadecimal character sequence
*
* @return character value
*/
static ecma_char_t
lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source position */
int length) /**< source length */
static lit_code_point_t
lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */
{
uint32_t result = 0;
lit_code_point_t result = 0;
const uint8_t *char_p = *source_p;
uint32_t length = (char_p[-1] == LIT_CHAR_LOWERCASE_U) ? 4 : 2;
do
#if ENABLED (JERRY_ES2015)
if (char_p[0] == LIT_CHAR_LEFT_BRACE)
{
uint32_t byte = *source_p++;
length = 0;
char_p++;
}
#endif /* ENABLED (JERRY_ES2015) */
while (true)
{
uint32_t byte = *char_p++;
result <<= 4;
@ -118,10 +182,27 @@ lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source p
result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10);
}
}
while (--length > 0);
return (ecma_char_t) result;
JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX);
#if ENABLED (JERRY_ES2015)
if (length == 0)
{
if (*char_p != LIT_CHAR_RIGHT_BRACE)
{
continue;
}
*source_p = char_p + 1;
return result;
}
#endif /* ENABLED (JERRY_ES2015) */
if (--length == 0)
{
*source_p = char_p;
return result;
}
}
} /* lexer_unchecked_hex_to_character */
/**
@ -509,102 +590,188 @@ static const uint8_t keyword_lengths_list[] =
#undef LEXER_KEYWORD_LIST_LENGTH
/**
* Parse identifier.
* Flags for lexer_parse_identifier.
*/
static void
typedef enum
{
LEXER_PARSE_NO_OPTS = 0, /**< no options */
LEXER_PARSE_CHECK_KEYWORDS = (1 << 0), /**< check keywords */
LEXER_PARSE_CHECK_START_AND_RETURN = (1 << 1), /**< check identifier start and return */
LEXER_PARSE_CHECK_PART_AND_RETURN = (1 << 2), /**< check identifier part and return */
} lexer_parse_options_t;
/**
* Parse identifier.
*
* @return true, if an identifier is parsed, false otherwise
*/
static bool
lexer_parse_identifier (parser_context_t *context_p, /**< context */
bool check_keywords) /**< check keywords */
lexer_parse_options_t options) /**< check keywords */
{
/* Only very few identifiers contains \u escape sequences. */
const uint8_t *source_p = context_p->source_p;
const uint8_t *ident_start_p = context_p->source_p;
/* Note: newline or tab cannot be part of an identifier. */
parser_line_counter_t column = context_p->column;
const uint8_t *source_end_p = context_p->source_end_p;
size_t length = 0;
context_p->token.type = LEXER_LITERAL;
context_p->token.ident_is_strict_keyword = false;
context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
context_p->token.lit_location.has_escape = false;
uint8_t has_escape = false;
do
{
if (*source_p == LIT_CHAR_BACKSLASH)
{
uint16_t character;
/* After a backslash an identifier must start. */
lit_code_point_t code_point = UINT32_MAX;
uint32_t escape_length = 6;
context_p->token.lit_location.has_escape = true;
context_p->source_p = source_p;
context_p->token.column = column;
if ((source_p + 6 > source_end_p) || (source_p[1] != LIT_CHAR_LOWERCASE_U))
if (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN))
{
return true;
}
has_escape = true;
#if ENABLED (JERRY_ES2015)
if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
{
if (source_p[2] == LIT_CHAR_LEFT_BRACE)
{
code_point = lexer_hex_in_braces_to_code_point (source_p + 3, source_end_p, &escape_length);
}
else if (source_p + 6 <= source_end_p)
{
code_point = lexer_hex_to_code_point (source_p + 2, 4);
}
}
#else /* !ENABLED (JERRY_ES2015) */
if (source_p + 6 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
{
code_point = lexer_hex_to_code_point (source_p + 2, 4);
}
#endif /* ENABLED (JERRY_ES2015) */
if (code_point == UINT32_MAX)
{
context_p->source_p = source_p;
context_p->token.column = column;
parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
}
character = lexer_hex_to_character (context_p, source_p + 2, 4);
if (length == 0)
{
if (!lit_char_is_identifier_start_character (character))
if (!lit_code_point_is_identifier_start (code_point))
{
parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_START);
}
}
else
{
if (!lit_char_is_identifier_part_character (character))
if (!lit_code_point_is_identifier_part (code_point))
{
parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_PART);
}
}
length += lit_char_get_utf8_length (character);
source_p += 6;
PARSER_PLUS_EQUAL_LC (column, 6);
length += lit_code_point_get_cesu8_length (code_point);
source_p += escape_length;
PARSER_PLUS_EQUAL_LC (column, escape_length);
continue;
}
/* Valid identifiers cannot contain 4 byte long utf-8
* characters, since those characters are represented
* by 2 ecmascript (UTF-16) characters, and those
* characters cannot be literal characters. */
JERRY_ASSERT (source_p[0] < LEXER_UTF8_4BYTE_START);
lit_code_point_t code_point = *source_p;
lit_utf8_size_t utf8_length = 1, decoded_length = 1, char_count = 1;
source_p++;
length++;
column++;
while (source_p < source_end_p
&& IS_UTF8_INTERMEDIATE_OCTET (source_p[0]))
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
{
source_p++;
length++;
}
}
while (source_p < source_end_p
&& (lit_char_is_identifier_part (source_p) || *source_p == LIT_CHAR_BACKSLASH));
utf8_length = lit_read_code_point_from_utf8 (source_p,
(lit_utf8_size_t) (source_end_p - source_p),
&code_point);
decoded_length = utf8_length;
#if ENABLED (JERRY_ES2015)
/* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
&& source_p + 3 < source_end_p)
{
lit_code_point_t low_surrogate;
lit_read_code_point_from_utf8 (source_p + 3,
(lit_utf8_size_t) (source_end_p - (source_p + 3)),
&low_surrogate);
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
{
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
(ecma_char_t) low_surrogate);
utf8_length = 2 * 3;
decoded_length = 2 * 3;
char_count = 2;
}
}
else if (source_p[0] >= LEXER_UTF8_4BYTE_START)
{
decoded_length = 2 * 3;
has_escape = true;
}
#endif /* ENABLED (JERRY_ES2015) */
}
if (length == 0)
{
if (JERRY_UNLIKELY (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN)))
{
if (options & LEXER_PARSE_CHECK_START_AND_RETURN)
{
return lit_code_point_is_identifier_start (code_point);
}
else
{
return lit_code_point_is_identifier_part (code_point);
}
}
if (!lit_code_point_is_identifier_start (code_point))
{
return false;
}
}
else if (!lit_code_point_is_identifier_part (code_point))
{
break;
}
source_p += utf8_length;
length += decoded_length;
PARSER_PLUS_EQUAL_LC (column, char_count);
}
while (source_p < source_end_p);
JERRY_ASSERT (length > 0);
context_p->token.type = LEXER_LITERAL;
context_p->token.ident_is_strict_keyword = false;
context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
context_p->token.lit_location.has_escape = has_escape;
context_p->source_p = ident_start_p;
context_p->token.column = context_p->column;
context_p->token.lit_location.char_p = ident_start_p;
context_p->token.lit_location.char_p = context_p->source_p;
context_p->token.lit_location.length = (prop_length_t) length;
if (length > PARSER_MAXIMUM_IDENT_LENGTH)
if (JERRY_UNLIKELY (length > PARSER_MAXIMUM_IDENT_LENGTH))
{
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_TOO_LONG);
}
/* Check keywords. */
if (check_keywords
if ((options & LEXER_PARSE_CHECK_KEYWORDS)
&& (length >= LEXER_KEYWORD_MIN_LENGTH && length <= LEXER_KEYWORD_MAX_LENGTH))
{
const uint8_t *ident_start_p = context_p->source_p;
uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH];
if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
{
lexer_convert_ident_to_cesu8 (ident_start_p, buffer_p, (prop_length_t) length);
lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length);
ident_start_p = buffer_p;
}
@ -690,6 +857,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
context_p->source_p = source_p;
context_p->column = column;
return true;
} /* lexer_parse_identifier */
/**
@ -840,20 +1008,40 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
{
uint8_t hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
uint32_t escape_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 3 : 5;
lit_code_point_t code_point = UINT32_MAX;
context_p->token.line = line;
context_p->token.column = (parser_line_counter_t) (column - 1);
if (source_p + 1 + hex_part_length > source_end_p)
#if ENABLED (JERRY_ES2015)
if (source_p + 4 <= source_end_p
&& source_p[0] == LIT_CHAR_LOWERCASE_U
&& source_p[1] == LIT_CHAR_LEFT_BRACE)
{
parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
code_point = lexer_hex_in_braces_to_code_point (source_p + 2, source_end_p, &escape_length);
escape_length--;
}
else
{
#endif /* ENABLED (JERRY_ES2015) */
if (source_p + escape_length <= source_end_p)
{
code_point = lexer_hex_to_code_point (source_p + 1, escape_length - 1);
}
#if ENABLED (JERRY_ES2015)
}
#endif /* ENABLED (JERRY_ES2015) */
if (code_point == UINT32_MAX)
{
parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
}
length += lit_char_get_utf8_length (lexer_hex_to_character (context_p,
source_p + 1,
hex_part_length));
source_p += hex_part_length + 1;
PARSER_PLUS_EQUAL_LC (column, hex_part_length + 1u);
length += lit_code_point_get_cesu8_length (code_point);
source_p += escape_length;
PARSER_PLUS_EQUAL_LC (column, escape_length);
continue;
}
}
@ -1120,12 +1308,6 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
}
}
if (source_p < source_end_p
&& (lit_char_is_identifier_start (source_p) || source_p[0] == LIT_CHAR_BACKSLASH))
{
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
}
length = (size_t) (source_p - context_p->source_p);
if (length > PARSER_MAXIMUM_IDENT_LENGTH)
{
@ -1135,6 +1317,11 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
context_p->token.lit_location.length = (prop_length_t) length;
PARSER_PLUS_EQUAL_LC (context_p->column, length);
context_p->source_p = source_p;
if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_START_AND_RETURN))
{
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
}
} /* lexer_parse_number */
/**
@ -1229,10 +1416,8 @@ lexer_next_token (parser_context_t *context_p) /**< context */
return;
}
if (lit_char_is_identifier_start (context_p->source_p)
|| context_p->source_p[0] == LIT_CHAR_BACKSLASH)
if (lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_KEYWORDS))
{
lexer_parse_identifier (context_p, true);
return;
}
@ -1723,8 +1908,8 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */
* Convert an ident with escapes to a utf8 string.
*/
void
lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
uint8_t *destination_p, /**< destination string */
lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
const uint8_t *source_p, /**< source string */
prop_length_t length) /**< length of destination string */
{
const uint8_t *destination_end_p = destination_p + length;
@ -1735,14 +1920,22 @@ lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
{
if (*source_p == LIT_CHAR_BACKSLASH)
{
destination_p += lit_char_to_utf8_bytes (destination_p,
lexer_unchecked_hex_to_character (source_p + 2, 4));
source_p += 6;
source_p += 2;
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
lexer_unchecked_hex_to_character (&source_p));
continue;
}
JERRY_ASSERT (IS_UTF8_INTERMEDIATE_OCTET (*source_p)
|| lit_char_is_identifier_part (source_p));
#if ENABLED (JERRY_ES2015)
if (*source_p >= LEXER_UTF8_4BYTE_START)
{
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
destination_p += 6;
source_p += 4;
continue;
}
#endif /* ENABLED (JERRY_ES2015) */
*destination_p++ = *source_p++;
}
@ -1783,7 +1976,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
if (literal_p->type == LEXER_IDENT_LITERAL)
{
lexer_convert_ident_to_cesu8 (source_p, destination_start_p, literal_p->length);
lexer_convert_ident_to_cesu8 (destination_start_p, source_p, literal_p->length);
}
else
{
@ -1835,7 +2028,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
{
uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
source_p++;
JERRY_ASSERT (source_p < context_p->source_end_p);
@ -1854,7 +2047,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
}
}
destination_p += lit_char_to_utf8_bytes (destination_p, (uint16_t) octal_number);
destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
continue;
}
@ -1878,13 +2071,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
{
int hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
JERRY_ASSERT (source_p + 1 + hex_part_length <= context_p->source_end_p);
destination_p += lit_char_to_utf8_bytes (destination_p,
lexer_unchecked_hex_to_character (source_p + 1,
hex_part_length));
source_p += hex_part_length + 1;
source_p++;
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
lexer_unchecked_hex_to_character (&source_p));
continue;
}
@ -1946,18 +2135,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
/* Processing 4 byte unicode sequence (even if it is
* after a backslash). Always converted to two 3 byte
* long sequence. */
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
uint32_t character = ((((uint32_t) source_p[0]) & 0x7) << 18);
character |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
character |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
character |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
JERRY_ASSERT (character >= 0x10000);
character -= 0x10000;
destination_p += lit_char_to_utf8_bytes (destination_p,
(ecma_char_t) (0xd800 | (character >> 10)));
destination_p += lit_char_to_utf8_bytes (destination_p,
(ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
destination_p += 6;
source_p += 4;
continue;
}
@ -2376,15 +2556,14 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
column++;
}
if (source_p < source_end_p
&& lit_char_is_identifier_part (source_p))
context_p->source_p = source_p;
context_p->column = column;
if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_PART_AND_RETURN))
{
parser_raise_error (context_p, PARSER_ERR_UNKNOWN_REGEXP_FLAG);
}
context_p->source_p = source_p;
context_p->column = column;
length = (lit_utf8_size_t) (regex_end_p - regex_start_p);
if (length > PARSER_MAXIMUM_STRING_LENGTH)
{
@ -2473,10 +2652,9 @@ lexer_expect_identifier (parser_context_t *context_p, /**< context */
context_p->token.column = context_p->column;
if (context_p->source_p < context_p->source_end_p
&& (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
&& lexer_parse_identifier (context_p, (literal_type != LEXER_STRING_LITERAL ? LEXER_PARSE_CHECK_KEYWORDS
: LEXER_PARSE_NO_OPTS)))
{
lexer_parse_identifier (context_p, literal_type != LEXER_STRING_LITERAL);
if (context_p->token.type == LEXER_LITERAL)
{
JERRY_ASSERT (context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
@ -2548,10 +2726,8 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
context_p->token.column = context_p->column;
bool create_literal_object = false;
if (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH)
if (lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
{
lexer_parse_identifier (context_p, false);
if (!(ident_opts & (LEXER_OBJ_IDENT_ONLY_IDENTIFIERS | LEXER_OBJ_IDENT_OBJECT_PATTERN))
&& context_p->token.lit_location.length == 3)
{
@ -2687,10 +2863,8 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
context_p->token.column = context_p->column;
if (context_p->source_p < context_p->source_end_p
&& (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
&& lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
{
lexer_parse_identifier (context_p, false);
if ((ident_opts & LEXER_SCAN_IDENT_PROPERTY)
&& context_p->token.lit_location.length == 3)
{
@ -2726,75 +2900,135 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
* Compares two identifiers.
*
* Note:
* Escape sequences are allowed, size must be the same.
* Escape sequences are allowed in the left identifier, but not in the right
*
* @return true if the two identifiers are the same
*/
bool
lexer_compare_identifiers (const uint8_t *left_p, /**< left identifier */
const uint8_t *right_p, /**< right identifier */
size_t size) /**< byte size of the two identifiers */
static bool
lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier */
const uint8_t *right_p, /**< right identifier string */
size_t size) /**< byte size of the two identifiers */
{
uint8_t utf8_buf[3];
size_t utf8_len, offset;
uint8_t utf8_buf[6];
do
{
/* Backslash cannot be part of a multibyte UTF-8 character. */
if (*left_p != LIT_CHAR_BACKSLASH && *right_p != LIT_CHAR_BACKSLASH)
if (*left_p == *right_p)
{
if (*left_p++ != *right_p++)
{
return false;
}
left_p++;
right_p++;
size--;
continue;
}
if (*left_p == LIT_CHAR_BACKSLASH && *right_p == LIT_CHAR_BACKSLASH)
size_t escape_size;
if (*left_p == LIT_CHAR_BACKSLASH)
{
uint16_t left_chr = lexer_unchecked_hex_to_character (left_p + 2, 4);
left_p += 2;
lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p);
if (left_chr != lexer_unchecked_hex_to_character (right_p + 2, 4))
{
return false;
}
left_p += 6;
right_p += 6;
size -= lit_char_get_utf8_length (left_chr);
continue;
escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
}
else if (*left_p >= LEXER_UTF8_4BYTE_START)
{
lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
escape_size = 3 * 2;
left_p += 4;
}
else
{
return false;
}
/* One character is encoded as unicode sequence. */
if (*right_p == LIT_CHAR_BACKSLASH)
{
/* The pointers can be swapped. */
const uint8_t *swap_p = left_p;
left_p = right_p;
right_p = swap_p;
}
utf8_len = lit_char_to_utf8_bytes (utf8_buf, lexer_unchecked_hex_to_character (left_p + 2, 4));
JERRY_ASSERT (utf8_len > 0);
size -= utf8_len;
offset = 0;
size -= escape_size;
uint8_t *utf8_p = utf8_buf;
do
{
if (utf8_buf[offset] != *right_p++)
if (*right_p++ != *utf8_p++)
{
return false;
}
offset++;
}
while (offset < utf8_len);
left_p += 6;
while (--escape_size > 0);
}
while (size > 0);
return true;
} /* lexer_compare_identifier_to_chars */
/**
* Compares an identifier to a string.
*
* Note:
* Escape sequences are allowed in the left identifier, but not in the right
*
* @return true if the identifier equals to string
*/
bool
lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< left literal */
const uint8_t *right_p, /**< right identifier string */
size_t size) /**< byte size of the right identifier */
{
if (left_p->length != size)
{
return false;
}
if (!left_p->has_escape)
{
return memcmp (left_p->char_p, right_p, size) == 0;
}
return lexer_compare_identifier_to_chars (left_p->char_p, right_p, size);
} /* lexer_compare_identifier_to_string */
/**
* Compares two identifiers.
*
* Note:
* Escape sequences are allowed in both identifiers
*
* @return true if the two identifiers are the same
*/
bool
lexer_compare_identifiers (parser_context_t *context_p, /**< context */
const lexer_lit_location_t *left_p, /**< left literal */
const lexer_lit_location_t *right_p) /**< right literal */
{
prop_length_t length = left_p->length;
if (length != right_p->length)
{
return false;
}
if (!left_p->has_escape)
{
return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length);
}
if (!right_p->has_escape)
{
return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length);
}
uint8_t buf_p[64];
if (length <= 64)
{
lexer_convert_ident_to_cesu8 (buf_p, left_p->char_p, length);
return lexer_compare_identifier_to_chars (right_p->char_p, buf_p, length);
}
uint8_t *dynamic_buf_p = parser_malloc (context_p, length);
lexer_convert_ident_to_cesu8 (dynamic_buf_p, left_p->char_p, length);
bool result = lexer_compare_identifier_to_chars (right_p->char_p, dynamic_buf_p, length);
parser_free (dynamic_buf_p, length);
return result;
} /* lexer_compare_identifiers */
/**
@ -2818,7 +3052,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
if (left_ident_p->length != right_ident_p->length)
{
return 0;
return false;
}
if (!left_ident_p->has_escape && !right_ident_p->has_escape)
@ -2826,7 +3060,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0;
}
return lexer_compare_identifiers (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length);
return lexer_compare_identifiers (context_p, left_ident_p, right_ident_p);
} /* lexer_current_is_literal */
#if ENABLED (JERRY_ES2015)

View File

@ -637,8 +637,7 @@ bool lexer_check_yield_no_arg (parser_context_t *context_p);
void lexer_parse_string (parser_context_t *context_p);
void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type);
void lexer_scan_identifier (parser_context_t *context_p, uint32_t ident_opts);
ecma_char_t lexer_hex_to_character (parser_context_t *context_p, const uint8_t *source_p, int length);
void lexer_convert_ident_to_cesu8 (const uint8_t *source_p, uint8_t *destination_p, prop_length_t length);
void lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length);
void lexer_expect_object_literal_id (parser_context_t *context_p, uint32_t ident_opts);
void lexer_construct_literal_object (parser_context_t *context_p, const lexer_lit_location_t *literal_p,
uint8_t literal_type);
@ -646,7 +645,9 @@ bool lexer_construct_number_object (parser_context_t *context_p, bool is_expr, b
void lexer_convert_push_number_to_push_literal (parser_context_t *context_p);
uint16_t lexer_construct_function_object (parser_context_t *context_p, uint32_t extra_status_flags);
void lexer_construct_regexp_object (parser_context_t *context_p, bool parse_only);
bool lexer_compare_identifiers (const uint8_t *left_p, const uint8_t *right_p, size_t size);
bool lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, const uint8_t *right_p, size_t size);
bool lexer_compare_identifiers (parser_context_t *context_p, const lexer_lit_location_t *left_p,
const lexer_lit_location_t *right_p);
bool lexer_current_is_literal (parser_context_t *context_p, const lexer_lit_location_t *right_ident_p);
#if ENABLED (JERRY_ES2015)
bool lexer_token_is_identifier (parser_context_t *context_p, const char *identifier_p,

View File

@ -434,8 +434,7 @@ JERRY_STATIC_ASSERT (PARSER_MAXIMUM_IDENT_LENGTH <= UINT8_MAX,
static inline bool JERRY_ATTR_ALWAYS_INLINE
scanner_literal_is_arguments (lexer_lit_location_t *literal_p) /**< literal */
{
return (literal_p->length == 9
&& lexer_compare_identifiers (literal_p->char_p, (const uint8_t *) "arguments", 9));
return lexer_compare_identifier_to_string (literal_p, (const uint8_t *) "arguments", 9);
} /* scanner_literal_is_arguments */
/**
@ -986,7 +985,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
return literal_p;
}
}
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
{
/* The non-escaped version is preferred. */
literal_p->char_p = char_p;
@ -1000,8 +999,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
{
while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
{
if (literal_p->length == length
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
{
return literal_p;
}
@ -1065,10 +1063,11 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
scanner_literal_pool_t *literal_pool_p = scanner_context_p->active_literal_pool_p;
parser_list_iterator_t literal_iterator;
parser_list_iterator_init (&literal_pool_p->literal_pool, &literal_iterator);
lexer_lit_location_t *literal_location_p = &context_p->token.lit_location;
lexer_lit_location_t *literal_p;
const uint8_t *char_p = context_p->token.lit_location.char_p;
prop_length_t length = context_p->token.lit_location.length;
const uint8_t *char_p = literal_location_p->char_p;
prop_length_t length = literal_location_p->length;
if (JERRY_LIKELY (!context_p->token.lit_location.has_escape))
{
@ -1084,7 +1083,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
break;
}
}
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
{
literal_p->length = 0;
break;
@ -1096,8 +1095,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
{
while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
{
if (literal_p->length == length
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
{
literal_p->length = 0;
break;
@ -1118,8 +1116,7 @@ void
scanner_detect_eval_call (parser_context_t *context_p, /**< context */
scanner_context_t *scanner_context_p) /**< scanner context */
{
if (context_p->token.lit_location.length == 4
&& lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4)
if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4)
&& lexer_check_next_character (context_p, LIT_CHAR_LEFT_PAREN))
{
scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
@ -1147,7 +1144,7 @@ scanner_scope_find_let_declaration (parser_context_t *context_p, /**< context */
{
uint8_t *destination_p = (uint8_t *) scanner_malloc (context_p, literal_p->length);
lexer_convert_ident_to_cesu8 (literal_p->char_p, destination_p, literal_p->length);
lexer_convert_ident_to_cesu8 (destination_p, literal_p->char_p, literal_p->length);
name_p = ecma_new_ecma_string_from_utf8 (destination_p, literal_p->length);
scanner_free (destination_p, literal_p->length);
@ -1231,7 +1228,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
return;
}
}
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
{
scanner_raise_redeclaration_error (context_p);
return;
@ -1246,8 +1243,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
if (literal_p->type & SCANNER_LITERAL_IS_LOCAL
&& !(literal_p->type & SCANNER_LITERAL_IS_ARG)
&& (literal_p->type & SCANNER_LITERAL_IS_LOCAL) != SCANNER_LITERAL_IS_LOCAL
&& literal_p->length == length
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
&& lexer_compare_identifiers (context_p, literal_p, var_literal_p))
{
scanner_raise_redeclaration_error (context_p);
return;

View File

@ -376,8 +376,7 @@ scanner_handle_bracket (parser_context_t *context_p, /**< context */
arrow_source_p = NULL;
#endif /* ENABLED (JERRY_ES2015) */
if (context_p->token.lit_location.length == 4
&& lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4))
if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4))
{
scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
}

View File

@ -272,7 +272,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
const bool is_char_class = (re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
|| re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS);
const ecma_char_t prev_char = lit_utf8_peek_prev (parser_ctx_p->input_curr_p);
const ecma_char_t prev_char = lit_cesu8_peek_prev (parser_ctx_p->input_curr_p);
if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
{
lit_utf8_decr (&parser_ctx_p->input_curr_p);
@ -286,7 +286,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
}
lit_code_point_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
lit_code_point_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
if (ch == LIT_CHAR_RIGHT_SQUARE)
{
@ -318,7 +318,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
}
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
if (ch == LIT_CHAR_LOWERCASE_B)
{
@ -376,7 +376,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
parser_ctx_p->input_curr_p += 2;
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
&& is_range == false
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
{
start = code_unit;
continue;
@ -396,7 +396,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
parser_ctx_p->input_curr_p += 4;
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
&& is_range == false
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
{
start = code_unit;
continue;
@ -481,7 +481,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
&& lit_is_code_point_utf16_high_surrogate (ch)
&& parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
{
const ecma_char_t next_ch = lit_utf8_peek_next (parser_ctx_p->input_curr_p);
const ecma_char_t next_ch = lit_cesu8_peek_next (parser_ctx_p->input_curr_p);
if (lit_is_code_point_utf16_low_surrogate (next_ch))
{
ch = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) ch, next_ch);

View File

@ -315,7 +315,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
return ret_value;
}
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
ecma_char_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
switch (ch)
{
@ -348,7 +348,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
}
out_token_p->type = RE_TOK_CHAR;
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
if (ch == LIT_CHAR_LOWERCASE_B)
{

View File

@ -0,0 +1,36 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
function check_syntax_error (code) {
try {
eval(code)
assert (false)
} catch (e) {
assert (e instanceof SyntaxError)
}
}
eval("\u{000010C80}: break \ud803\udc80")
eval("\\u{10C80}: break \ud803\udc80")
eval("$\u{000010C80}$: break $\ud803\udc80$")
eval("$\\u{10C82}$: break $\ud803\udc82$")
assert("\u{000010C80}".length === 2)
assert("x\u{010C80}y".length === 4)
assert("\u{10C80}" === "\ud803\u{dc80}")
assert("\u{0}\x01" === "\u0000\u0001")
/* Surrogate pairs are not combined if they passed as \u sequences. */
check_syntax_error("\\u{10C80}: break \\ud803\\udc80");

View File

@ -21,6 +21,39 @@
#include "test-common.h"
static lit_code_point_t
lexer_hex_to_character (const uint8_t *source_p) /**< current source position */
{
lit_code_point_t result = 0;
do
{
uint32_t byte = *source_p++;
result <<= 4;
if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
{
result += byte - LIT_CHAR_0;
}
else
{
byte = LEXER_TO_ASCII_LOWERCASE (byte);
if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
{
result += byte - (LIT_CHAR_LOWERCASE_A - 10);
}
else
{
return UINT32_MAX;
}
}
}
while (*source_p);
return result;
} /* lexer_hex_to_character */
int
main (void)
{
@ -29,50 +62,59 @@ main (void)
jmem_init ();
ecma_init ();
const uint8_t _1_byte_long1[] = "\\u007F";
const uint8_t _1_byte_long2[] = "\\u0000";
const uint8_t _1_byte_long3[] = "\\u0065";
const uint8_t _1_byte_long1[] = "007F";
const uint8_t _1_byte_long2[] = "0000";
const uint8_t _1_byte_long3[] = "0065";
const uint8_t _2_byte_long1[] = "\\u008F";
const uint8_t _2_byte_long2[] = "\\u00FF";
const uint8_t _2_byte_long3[] = "\\u07FF";
const uint8_t _2_byte_long1[] = "008F";
const uint8_t _2_byte_long2[] = "00FF";
const uint8_t _2_byte_long3[] = "07FF";
const uint8_t _3_byte_long1[] = "\\u08FF";
const uint8_t _3_byte_long2[] = "\\u0FFF";
const uint8_t _3_byte_long3[] = "\\uFFFF";
const uint8_t _3_byte_long1[] = "08FF";
const uint8_t _3_byte_long2[] = "0FFF";
const uint8_t _3_byte_long3[] = "FFFF";
const uint8_t _6_byte_long1[] = "10000";
const uint8_t _6_byte_long2[] = "10FFFF";
size_t length;
/* Test 1-byte-long unicode sequences. */
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long1 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long1));
TEST_ASSERT (length == 1);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long2 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long2));
TEST_ASSERT (length == 1);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long3 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long3));
TEST_ASSERT (length == 1);
/* Test 2-byte-long unicode sequences. */
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long1 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long1));
TEST_ASSERT (length == 2);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long2 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long2));
TEST_ASSERT (length == 2);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long3 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long3));
TEST_ASSERT (length == 2);
/* Test 3-byte-long unicode sequences. */
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long1 + 2, 4));
TEST_ASSERT (length != 2);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long2 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long1));
TEST_ASSERT (length == 3);
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long3 + 2, 4));
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long2));
TEST_ASSERT (length == 3);
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long3));
TEST_ASSERT (length == 3);
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long1));
TEST_ASSERT (length == 6);
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long2));
TEST_ASSERT (length == 6);
ecma_finalize ();
jmem_finalize ();

View File

@ -131,7 +131,7 @@ main (void)
while (curr_p < end_p)
{
code_units[code_units_count] = lit_utf8_peek_next (curr_p);
code_units[code_units_count] = lit_cesu8_peek_next (curr_p);
saved_positions[code_units_count] = curr_p;
code_units_count++;
calculated_length++;
@ -147,7 +147,7 @@ main (void)
{
ecma_length_t index = (ecma_length_t) rand () % code_units_count;
curr_p = saved_positions[index];
TEST_ASSERT (lit_utf8_peek_next (curr_p) == code_units[index]);
TEST_ASSERT (lit_cesu8_peek_next (curr_p) == code_units[index]);
}
}
@ -156,7 +156,7 @@ main (void)
{
TEST_ASSERT (code_units_count > 0);
calculated_length--;
TEST_ASSERT (code_units[calculated_length] == lit_utf8_peek_prev (curr_p));
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_peek_prev (curr_p));
lit_utf8_decr (&curr_p);
}
@ -164,7 +164,7 @@ main (void)
while (curr_p < end_p)
{
ecma_char_t code_unit = lit_utf8_read_next (&curr_p);
ecma_char_t code_unit = lit_cesu8_read_next (&curr_p);
TEST_ASSERT (code_unit == code_units[calculated_length]);
calculated_length++;
}
@ -175,7 +175,7 @@ main (void)
{
TEST_ASSERT (code_units_count > 0);
calculated_length--;
TEST_ASSERT (code_units[calculated_length] == lit_utf8_read_prev (&curr_p));
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_read_prev (&curr_p));
}
TEST_ASSERT (calculated_length == 0);

View File

@ -0,0 +1,61 @@
/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "jerryscript.h"
#include "test-common.h"
static bool
test_syntax_error (char *script_p) /**< script */
{
jerry_value_t parse_result = jerry_parse (NULL,
0,
(const jerry_char_t *) script_p,
strlen (script_p),
JERRY_PARSE_NO_OPTS);
bool result = false;
if (jerry_value_is_error (parse_result))
{
result = true;
TEST_ASSERT (jerry_get_error_type (parse_result) == JERRY_ERROR_SYNTAX);
}
jerry_release_value (parse_result);
return result;
} /* test_syntax_error */
int
main (void)
{
jerry_init (JERRY_INIT_EMPTY);
if (!test_syntax_error ("\\u{61}"))
{
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \\u{10C80}"));
/* The \u surrogate pairs are ignored. The \u{hex} form must be used. */
TEST_ASSERT (test_syntax_error ("\xF0\x90\xB2\x80: break \\ud803\\udc80"));
/* The utf8 code point and the cesu8 surrogate pair must match. */
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \xed\xa0\x83\xed\xb2\x80"));
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\u{10C80}$"));
TEST_ASSERT (test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\ud803\\udc80$"));
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\xed\xa0\x83\xed\xb2\x80$"));
}
jerry_cleanup ();
return 0;
} /* main */

View File

@ -284,6 +284,7 @@ def create_binary(job, options):
subprocess.check_output(build_cmd)
ret = 0
except subprocess.CalledProcessError as err:
print(err.output)
ret = err.returncode
BINARY_CACHE[binary_key] = (ret, build_dir_path)