mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Implement \u{hex} support. (#3447)
A large rework because surrogate pairs must be combined. Currently only the 0x10C80..0x10CF2 is accepted as valid identifier character from the non-basic plane. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
parent
1db16c3a1c
commit
40d930d62c
@ -1553,31 +1553,59 @@ jerry_append_number_to_buffer (uint8_t *buffer_p, /**< buffer */
|
||||
static bool
|
||||
ecma_string_is_valid_identifier (const ecma_string_t *string_p)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
ECMA_STRING_TO_UTF8_STRING (string_p, str_buffer_p, str_buffer_size);
|
||||
|
||||
if (lit_char_is_identifier_start (str_buffer_p))
|
||||
const uint8_t *str_p = str_buffer_p;
|
||||
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
|
||||
|
||||
while (str_p < str_end_p)
|
||||
{
|
||||
const uint8_t *str_start_p = str_buffer_p;
|
||||
const uint8_t *str_end_p = str_buffer_p + str_buffer_size;
|
||||
lit_code_point_t code_point = *str_p;
|
||||
lit_utf8_size_t utf8_length = 1;
|
||||
|
||||
result = true;
|
||||
|
||||
while (str_start_p < str_end_p)
|
||||
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
|
||||
{
|
||||
if (!lit_char_is_identifier_part (str_start_p))
|
||||
utf8_length = lit_read_code_point_from_utf8 (str_p,
|
||||
(lit_utf8_size_t) (str_end_p - str_p),
|
||||
&code_point);
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
||||
&& str_p + 3 < str_end_p)
|
||||
{
|
||||
lit_code_point_t low_surrogate;
|
||||
lit_read_code_point_from_utf8 (str_p + 3,
|
||||
(lit_utf8_size_t) (str_end_p - (str_p + 3)),
|
||||
&low_surrogate);
|
||||
|
||||
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
|
||||
{
|
||||
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
|
||||
(ecma_char_t) low_surrogate);
|
||||
utf8_length = 2 * 3;
|
||||
}
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
}
|
||||
|
||||
if (str_p == str_buffer_p)
|
||||
{
|
||||
if (!lit_code_point_is_identifier_start (code_point))
|
||||
{
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
lit_utf8_incr (&str_start_p);
|
||||
}
|
||||
else if (!lit_code_point_is_identifier_part (code_point))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
str_p += utf8_length;
|
||||
}
|
||||
|
||||
ECMA_FINALIZE_UTF8_STRING (str_buffer_p, str_buffer_size);
|
||||
|
||||
return result;
|
||||
return str_p == str_end_p;
|
||||
} /* ecma_string_is_valid_identifier */
|
||||
|
||||
#endif /* ENABLED (JERRY_SNAPSHOT_SAVE) */
|
||||
|
||||
@ -461,16 +461,9 @@ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string
|
||||
if ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
/* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
|
||||
uint32_t character = ((((uint32_t) string_p[pos++]) & 0x7) << 18);
|
||||
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
|
||||
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
|
||||
character |= (((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
|
||||
JERRY_ASSERT (character >= 0x10000);
|
||||
character -= 0x10000;
|
||||
|
||||
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xd800 | (character >> 10)));
|
||||
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
|
||||
lit_four_byte_utf8_char_to_cesu8 (data_p, string_p + pos);
|
||||
data_p += 3 * 2;
|
||||
pos += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2683,10 +2676,10 @@ void
|
||||
ecma_stringbuilder_append_char (ecma_stringbuilder_t *builder_p, /**< string builder */
|
||||
const ecma_char_t c) /**< ecma char */
|
||||
{
|
||||
const lit_utf8_size_t size = (lit_utf8_size_t) lit_char_get_utf8_length (c);
|
||||
const lit_utf8_size_t size = (lit_utf8_size_t) lit_code_point_get_cesu8_length (c);
|
||||
lit_utf8_byte_t *dest_p = ecma_stringbuilder_grow (builder_p, size);
|
||||
|
||||
lit_char_to_utf8_bytes (dest_p, c);
|
||||
lit_code_point_to_cesu8_bytes (dest_p, c);
|
||||
} /* ecma_stringbuilder_append_char */
|
||||
|
||||
/**
|
||||
|
||||
@ -61,7 +61,7 @@ ecma_date_parse_date_chars (const lit_utf8_byte_t **str_p, /**< pointer to the c
|
||||
|
||||
while (num_of_chars--)
|
||||
{
|
||||
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_utf8_read_next (str_p)))
|
||||
if (*str_p >= str_end_p || !lit_char_is_decimal_digit (lit_cesu8_read_next (str_p)))
|
||||
{
|
||||
return ecma_number_make_nan ();
|
||||
}
|
||||
|
||||
@ -150,7 +150,7 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
|
||||
int sign = 1;
|
||||
|
||||
/* 4. */
|
||||
ecma_char_t current = lit_utf8_read_next (&string_curr_p);
|
||||
ecma_char_t current = lit_cesu8_read_next (&string_curr_p);
|
||||
if (current == LIT_CHAR_MINUS)
|
||||
{
|
||||
sign = -1;
|
||||
@ -162,7 +162,7 @@ ecma_builtin_global_object_parse_int (const lit_utf8_byte_t *string_buff, /**< r
|
||||
start_p = string_curr_p;
|
||||
if (string_curr_p < string_end_p)
|
||||
{
|
||||
current = lit_utf8_read_next (&string_curr_p);
|
||||
current = lit_cesu8_read_next (&string_curr_p);
|
||||
}
|
||||
}
|
||||
|
||||
@ -970,7 +970,7 @@ ecma_builtin_global_object_escape (lit_utf8_byte_t *input_start_p, /**< routine'
|
||||
|
||||
while (input_curr_p < input_end_p)
|
||||
{
|
||||
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
|
||||
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
|
||||
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
@ -1005,7 +1005,7 @@ ecma_builtin_global_object_escape (lit_utf8_byte_t *input_start_p, /**< routine'
|
||||
|
||||
while (input_curr_p < input_end_p)
|
||||
{
|
||||
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
|
||||
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
|
||||
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
@ -1091,7 +1091,7 @@ ecma_builtin_global_object_unescape (lit_utf8_byte_t *input_start_p, /**< routin
|
||||
while (input_curr_p < input_end_p)
|
||||
{
|
||||
/* 6. */
|
||||
ecma_char_t chr = lit_utf8_read_next (&input_curr_p);
|
||||
ecma_char_t chr = lit_cesu8_read_next (&input_curr_p);
|
||||
|
||||
/* 7-8. */
|
||||
if (status == 0 && chr == LIT_CHAR_PERCENT)
|
||||
|
||||
@ -713,7 +713,7 @@ ecma_builtin_helper_string_find_index (ecma_string_t *original_str_p, /**< index
|
||||
|
||||
/* iterate original string and try to match at each position */
|
||||
bool searching = true;
|
||||
ecma_char_t first_char = lit_utf8_read_next (&search_str_curr_p);
|
||||
ecma_char_t first_char = lit_cesu8_read_next (&search_str_curr_p);
|
||||
while (searching)
|
||||
{
|
||||
/* match as long as possible */
|
||||
@ -722,14 +722,14 @@ ecma_builtin_helper_string_find_index (ecma_string_t *original_str_p, /**< index
|
||||
|
||||
if (match_len < search_len &&
|
||||
index + match_len < original_len &&
|
||||
lit_utf8_read_next (&original_str_curr_p) == first_char)
|
||||
lit_cesu8_read_next (&original_str_curr_p) == first_char)
|
||||
{
|
||||
const lit_utf8_byte_t *nested_search_str_curr_p = search_str_curr_p;
|
||||
match_len++;
|
||||
|
||||
while (match_len < search_len &&
|
||||
index + match_len < original_len &&
|
||||
lit_utf8_read_next (&original_str_curr_p) == lit_utf8_read_next (&nested_search_str_curr_p))
|
||||
lit_cesu8_read_next (&original_str_curr_p) == lit_cesu8_read_next (&nested_search_str_curr_p))
|
||||
{
|
||||
match_len++;
|
||||
}
|
||||
|
||||
@ -1155,7 +1155,7 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
|
||||
|
||||
while (input_str_curr_p < input_str_end_p)
|
||||
{
|
||||
ecma_char_t character = lit_utf8_read_next (&input_str_curr_p);
|
||||
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
|
||||
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||
ecma_length_t character_length;
|
||||
lit_utf8_byte_t utf8_byte_buffer[LIT_CESU8_MAX_BYTES_IN_CODE_POINT];
|
||||
@ -1194,7 +1194,7 @@ ecma_builtin_string_prototype_object_conversion_helper (ecma_string_t *input_str
|
||||
|
||||
while (input_str_curr_p < input_str_end_p)
|
||||
{
|
||||
ecma_char_t character = lit_utf8_read_next (&input_str_curr_p);
|
||||
ecma_char_t character = lit_cesu8_read_next (&input_str_curr_p);
|
||||
ecma_char_t character_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||
ecma_length_t character_length;
|
||||
|
||||
|
||||
@ -220,11 +220,11 @@ ecma_regexp_unicode_advance (const lit_utf8_byte_t **str_p, /**< reference to st
|
||||
JERRY_ASSERT (str_p != NULL);
|
||||
const lit_utf8_byte_t *current_p = *str_p;
|
||||
|
||||
lit_code_point_t ch = lit_utf8_read_next (¤t_p);
|
||||
lit_code_point_t ch = lit_cesu8_read_next (¤t_p);
|
||||
if (lit_is_code_point_utf16_high_surrogate ((ecma_char_t) ch)
|
||||
&& current_p < end_p)
|
||||
{
|
||||
const ecma_char_t next_ch = lit_utf8_peek_next (current_p);
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (current_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
lit_utf8_incr (¤t_p);
|
||||
@ -425,14 +425,14 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
|
||||
const bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||
lit_code_point_t ch1 = re_get_char (&bc_p); /* Already canonicalized. */
|
||||
lit_code_point_t ch2 = lit_utf8_read_next (&str_curr_p);
|
||||
lit_code_point_t ch2 = lit_cesu8_read_next (&str_curr_p);
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (re_ctx_p->flags & RE_FLAG_UNICODE
|
||||
&& lit_is_code_point_utf16_high_surrogate (ch2)
|
||||
&& str_curr_p < re_ctx_p->input_end_p)
|
||||
{
|
||||
const ecma_char_t next_ch = lit_utf8_peek_next (str_curr_p);
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (str_curr_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
lit_utf8_incr (&str_curr_p);
|
||||
@ -460,7 +460,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
return NULL; /* fail */
|
||||
}
|
||||
|
||||
const ecma_char_t ch = lit_utf8_read_next (&str_curr_p);
|
||||
const ecma_char_t ch = lit_cesu8_read_next (&str_curr_p);
|
||||
JERRY_TRACE_MSG ("Period matching '.' to %u: ", (unsigned int) ch);
|
||||
|
||||
if (lit_char_is_line_terminator (ch))
|
||||
@ -474,7 +474,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
&& lit_is_code_point_utf16_high_surrogate (ch)
|
||||
&& str_curr_p < re_ctx_p->input_end_p)
|
||||
{
|
||||
const ecma_char_t next_ch = lit_utf8_peek_next (str_curr_p);
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (str_curr_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
lit_utf8_incr (&str_curr_p);
|
||||
@ -501,7 +501,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
return NULL; /* fail */
|
||||
}
|
||||
|
||||
if (lit_char_is_line_terminator (lit_utf8_peek_prev (str_curr_p)))
|
||||
if (lit_char_is_line_terminator (lit_cesu8_peek_prev (str_curr_p)))
|
||||
{
|
||||
JERRY_TRACE_MSG ("match\n");
|
||||
break; /* tail merge */
|
||||
@ -526,7 +526,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
return NULL; /* fail */
|
||||
}
|
||||
|
||||
if (lit_char_is_line_terminator (lit_utf8_peek_next (str_curr_p)))
|
||||
if (lit_char_is_line_terminator (lit_cesu8_peek_next (str_curr_p)))
|
||||
{
|
||||
JERRY_TRACE_MSG ("match\n");
|
||||
break; /* tail merge */
|
||||
@ -539,10 +539,10 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
|
||||
{
|
||||
const bool is_wordchar_left = ((str_curr_p > re_ctx_p->input_start_p)
|
||||
&& lit_char_is_word_char (lit_utf8_peek_prev (str_curr_p)));
|
||||
&& lit_char_is_word_char (lit_cesu8_peek_prev (str_curr_p)));
|
||||
|
||||
const bool is_wordchar_right = ((str_curr_p < re_ctx_p->input_end_p)
|
||||
&& lit_char_is_word_char (lit_utf8_peek_next (str_curr_p)));
|
||||
&& lit_char_is_word_char (lit_cesu8_peek_next (str_curr_p)));
|
||||
|
||||
if (op == RE_OP_ASSERT_WORD_BOUNDARY)
|
||||
{
|
||||
@ -659,7 +659,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
else
|
||||
{
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
const ecma_char_t curr_ch = (ecma_char_t) ecma_regexp_canonicalize (lit_utf8_read_next (&str_curr_p),
|
||||
const ecma_char_t curr_ch = (ecma_char_t) ecma_regexp_canonicalize (lit_cesu8_read_next (&str_curr_p),
|
||||
is_ignorecase);
|
||||
|
||||
while (range_count-- > 0)
|
||||
@ -1115,7 +1115,7 @@ ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
break;
|
||||
}
|
||||
|
||||
lit_utf8_read_prev (&str_curr_p);
|
||||
lit_cesu8_read_prev (&str_curr_p);
|
||||
iter_count--;
|
||||
}
|
||||
}
|
||||
|
||||
@ -200,73 +200,33 @@ lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
|
||||
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
|
||||
} /* lit_char_is_unicode_non_letter_ident_part */
|
||||
|
||||
/**
|
||||
* Checks whether the next UTF8 character is a valid identifier start.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_start (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
||||
{
|
||||
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return lit_char_is_identifier_start_character (*src_p);
|
||||
}
|
||||
|
||||
/* ECMAScript 2015 specification allows some code points in supplementary plane.
|
||||
* However, we don't permit characters in supplementary characters as start of identifier.
|
||||
*/
|
||||
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return lit_char_is_identifier_start_character (lit_utf8_peek_next (src_p));
|
||||
} /* lit_char_is_identifier_start */
|
||||
|
||||
/**
|
||||
* Checks whether the character is a valid identifier start.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_start_character (uint16_t chr) /**< EcmaScript character */
|
||||
lit_code_point_is_identifier_start (lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
/* Fast path for ASCII-defined letters. */
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| chr == LIT_CHAR_DOLLAR_SIGN
|
||||
|| chr == LIT_CHAR_UNDERSCORE);
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| code_point == LIT_CHAR_DOLLAR_SIGN
|
||||
|| code_point == LIT_CHAR_UNDERSCORE);
|
||||
}
|
||||
|
||||
return lit_char_is_unicode_letter (chr);
|
||||
} /* lit_char_is_identifier_start_character */
|
||||
|
||||
/**
|
||||
* Checks whether the next UTF8 character is a valid identifier part.
|
||||
*
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
||||
{
|
||||
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
return lit_char_is_identifier_part_character (*src_p);
|
||||
/* TODO: detect these ranges correctly. */
|
||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
/* ECMAScript 2015 specification allows some code points in supplementary plane.
|
||||
* However, we don't permit characters in supplementary characters as part of identifier.
|
||||
*/
|
||||
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return lit_char_is_identifier_part_character (lit_utf8_peek_next (src_p));
|
||||
} /* lit_char_is_identifier_part */
|
||||
return lit_char_is_unicode_letter ((ecma_char_t) code_point);
|
||||
} /* lit_code_point_is_identifier_start */
|
||||
|
||||
/**
|
||||
* Checks whether the character is a valid identifier part.
|
||||
@ -274,21 +234,29 @@ lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8
|
||||
* @return true if it is.
|
||||
*/
|
||||
bool
|
||||
lit_char_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
|
||||
lit_code_point_is_identifier_part (lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
/* Fast path for ASCII-defined letters. */
|
||||
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
||||
{
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| (chr >= LIT_CHAR_0 && chr <= LIT_CHAR_9)
|
||||
|| chr == LIT_CHAR_DOLLAR_SIGN
|
||||
|| chr == LIT_CHAR_UNDERSCORE);
|
||||
return ((LEXER_TO_ASCII_LOWERCASE (code_point) >= LIT_CHAR_LOWERCASE_A
|
||||
&& LEXER_TO_ASCII_LOWERCASE (code_point) <= LIT_CHAR_LOWERCASE_Z)
|
||||
|| (code_point >= LIT_CHAR_0 && code_point <= LIT_CHAR_9)
|
||||
|| code_point == LIT_CHAR_DOLLAR_SIGN
|
||||
|| code_point == LIT_CHAR_UNDERSCORE);
|
||||
}
|
||||
|
||||
return (lit_char_is_unicode_letter (chr)
|
||||
|| lit_char_is_unicode_non_letter_ident_part (chr));
|
||||
} /* lit_char_is_identifier_part_character */
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (code_point >= LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* TODO: detect these ranges correctly. */
|
||||
return (code_point >= 0x10C80 && code_point <= 0x10CF2);
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
return (lit_char_is_unicode_letter ((ecma_char_t) code_point)
|
||||
|| lit_char_is_unicode_non_letter_ident_part ((ecma_char_t) code_point));
|
||||
} /* lit_code_point_is_identifier_part */
|
||||
|
||||
/**
|
||||
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
|
||||
@ -356,30 +324,47 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
|
||||
* @return length of the UTF8 representation.
|
||||
*/
|
||||
size_t
|
||||
lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
|
||||
ecma_char_t chr) /**< EcmaScript character */
|
||||
lit_code_point_to_cesu8_bytes (uint8_t *dst_p, /**< destination buffer */
|
||||
lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
||||
if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* 00000000 0xxxxxxx -> 0xxxxxxx */
|
||||
*dst_p = (uint8_t) chr;
|
||||
dst_p[0] = (uint8_t) code_point;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
||||
if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
dst_p[0] = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
|
||||
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((chr >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
*(dst_p++) = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
return 3;
|
||||
} /* lit_char_to_utf8_bytes */
|
||||
if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
|
||||
dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((code_point >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
return 3;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (code_point <= LIT_UNICODE_CODE_POINT_MAX);
|
||||
|
||||
code_point -= LIT_UTF8_4_BYTE_CODE_POINT_MIN;
|
||||
|
||||
dst_p[0] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
|
||||
dst_p[1] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x20 | ((code_point >> 16) & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
dst_p[2] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((code_point >> 10) & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
|
||||
dst_p[3] = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | 0xd);
|
||||
dst_p[4] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | 0x30 | ((code_point >> 6) & LIT_UTF8_LAST_4_BITS_MASK));
|
||||
dst_p[5] = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (code_point & LIT_UTF8_LAST_6_BITS_MASK));
|
||||
|
||||
return 3 * 2;
|
||||
} /* lit_code_point_to_cesu8_bytes */
|
||||
|
||||
/**
|
||||
* Returns the length of the UTF8 representation of a character.
|
||||
@ -387,23 +372,44 @@ lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
|
||||
* @return length of the UTF8 representation.
|
||||
*/
|
||||
size_t
|
||||
lit_char_get_utf8_length (ecma_char_t chr) /**< EcmaScript character */
|
||||
lit_code_point_get_cesu8_length (lit_code_point_t code_point) /**< code point */
|
||||
{
|
||||
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
||||
if (code_point < LIT_UTF8_2_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* 00000000 0xxxxxxx */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
||||
if (code_point < LIT_UTF8_3_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* 00000yyy yyxxxxxx */
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* zzzzyyyy yyxxxxxx */
|
||||
return 3;
|
||||
} /* lit_char_get_utf8_length */
|
||||
if (code_point < LIT_UTF8_4_BYTE_CODE_POINT_MIN)
|
||||
{
|
||||
/* zzzzyyyy yyxxxxxx */
|
||||
return 3;
|
||||
}
|
||||
|
||||
/* high + low surrogate */
|
||||
return 2 * 3;
|
||||
} /* lit_code_point_get_cesu8_length */
|
||||
|
||||
/**
|
||||
* Convert a four byte long utf8 character to two three byte long cesu8 characters
|
||||
*/
|
||||
void
|
||||
lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, /**< destination buffer */
|
||||
const uint8_t *source_p) /**< source buffer */
|
||||
{
|
||||
lit_code_point_t code_point = ((((uint32_t) source_p[0]) & LIT_UTF8_LAST_3_BITS_MASK) << 18);
|
||||
code_point |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
|
||||
code_point |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
|
||||
code_point |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
|
||||
lit_code_point_to_cesu8_bytes (dst_p, code_point);
|
||||
} /* lit_four_byte_utf8_char_to_cesu8 */
|
||||
|
||||
/**
|
||||
* Parse the next number_of_characters hexadecimal character,
|
||||
|
||||
@ -75,10 +75,8 @@ bool lit_char_is_line_terminator (ecma_char_t c);
|
||||
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
|
||||
/* LIT_CHAR_BACKSLASH defined above */
|
||||
|
||||
bool lit_char_is_identifier_start (const uint8_t *src_p);
|
||||
bool lit_char_is_identifier_part (const uint8_t *src_p);
|
||||
bool lit_char_is_identifier_start_character (ecma_char_t chr);
|
||||
bool lit_char_is_identifier_part_character (ecma_char_t chr);
|
||||
bool lit_code_point_is_identifier_start (lit_code_point_t code_point);
|
||||
bool lit_code_point_is_identifier_part (lit_code_point_t code_point);
|
||||
|
||||
/*
|
||||
* Punctuator characters (ECMA-262 v5, 7.7)
|
||||
@ -215,8 +213,9 @@ bool lit_char_is_octal_digit (ecma_char_t c);
|
||||
bool lit_char_is_decimal_digit (ecma_char_t c);
|
||||
bool lit_char_is_hex_digit (ecma_char_t c);
|
||||
uint32_t lit_char_hex_to_int (ecma_char_t c);
|
||||
size_t lit_char_to_utf8_bytes (uint8_t *dst_p, ecma_char_t chr);
|
||||
size_t lit_char_get_utf8_length (ecma_char_t chr);
|
||||
size_t lit_code_point_to_cesu8_bytes (uint8_t *dst_p, lit_code_point_t code_point);
|
||||
size_t lit_code_point_get_cesu8_length (lit_code_point_t code_point);
|
||||
void lit_four_byte_utf8_char_to_cesu8 (uint8_t *dst_p, const uint8_t *source_p);
|
||||
|
||||
/* read a hex encoded code point from a zero terminated buffer */
|
||||
bool lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, lit_utf8_size_t number_of_characters,
|
||||
|
||||
@ -481,7 +481,7 @@ lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer wit
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
lit_cesu8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
ecma_char_t ch;
|
||||
@ -489,7 +489,7 @@ lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
|
||||
*buf_p += lit_read_code_unit_from_utf8 (*buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_read_next */
|
||||
} /* lit_cesu8_read_next */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
@ -497,7 +497,7 @@ lit_utf8_read_next (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
|
||||
* @return previous code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
lit_cesu8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (*buf_p);
|
||||
ecma_char_t ch;
|
||||
@ -506,7 +506,7 @@ lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
|
||||
lit_read_code_unit_from_utf8 (*buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_read_prev */
|
||||
} /* lit_cesu8_read_prev */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
@ -514,15 +514,15 @@ lit_utf8_read_prev (const lit_utf8_byte_t **buf_p) /**< [in,out] buffer with cha
|
||||
* @return next code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
JERRY_ASSERT (buf_p != NULL);
|
||||
ecma_char_t ch;
|
||||
|
||||
lit_read_code_unit_from_utf8 (buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_peek_next */
|
||||
} /* lit_cesu8_peek_next */
|
||||
|
||||
/**
|
||||
* Decodes a unicode code unit from non-empty cesu-8-encoded buffer
|
||||
@ -530,15 +530,15 @@ lit_utf8_peek_next (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with char
|
||||
* @return previous code unit
|
||||
*/
|
||||
ecma_char_t
|
||||
lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p) /**< [in,out] buffer with characters */
|
||||
{
|
||||
JERRY_ASSERT (buf_p);
|
||||
JERRY_ASSERT (buf_p != NULL);
|
||||
ecma_char_t ch;
|
||||
|
||||
lit_read_prev_code_unit_from_utf8 (buf_p, &ch);
|
||||
|
||||
return ch;
|
||||
} /* lit_utf8_peek_prev */
|
||||
} /* lit_cesu8_peek_prev */
|
||||
|
||||
/**
|
||||
* Increase cesu-8 encoded string pointer by one code unit.
|
||||
|
||||
@ -46,7 +46,6 @@
|
||||
#define LIT_UTF8_2_BYTE_MARKER (0xC0)
|
||||
#define LIT_UTF8_3_BYTE_MARKER (0xE0)
|
||||
#define LIT_UTF8_4_BYTE_MARKER (0xF0)
|
||||
#define LIT_UTF8_5_BYTE_MARKER (0xF8)
|
||||
#define LIT_UTF8_EXTRA_BYTE_MARKER (0x80)
|
||||
|
||||
#define LIT_UTF8_1_BYTE_MASK (0x80)
|
||||
@ -82,7 +81,7 @@
|
||||
/**
|
||||
* Byte values >= LIT_UTF8_FIRST_BYTE_MAX are not allowed in internal strings
|
||||
*/
|
||||
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
|
||||
#define LIT_UTF8_FIRST_BYTE_MAX (0xF8)
|
||||
|
||||
/* validation */
|
||||
bool lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size);
|
||||
@ -135,10 +134,10 @@ lit_utf8_size_t lit_read_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p,
|
||||
lit_utf8_size_t lit_read_prev_code_unit_from_utf8 (const lit_utf8_byte_t *buf_p,
|
||||
ecma_char_t *code_point);
|
||||
|
||||
ecma_char_t lit_utf8_read_next (const lit_utf8_byte_t **buf_p);
|
||||
ecma_char_t lit_utf8_read_prev (const lit_utf8_byte_t **buf_p);
|
||||
ecma_char_t lit_utf8_peek_next (const lit_utf8_byte_t *buf_p);
|
||||
ecma_char_t lit_utf8_peek_prev (const lit_utf8_byte_t *buf_p);
|
||||
ecma_char_t lit_cesu8_read_next (const lit_utf8_byte_t **buf_p);
|
||||
ecma_char_t lit_cesu8_read_prev (const lit_utf8_byte_t **buf_p);
|
||||
ecma_char_t lit_cesu8_peek_next (const lit_utf8_byte_t *buf_p);
|
||||
ecma_char_t lit_cesu8_peek_prev (const lit_utf8_byte_t *buf_p);
|
||||
void lit_utf8_incr (const lit_utf8_byte_t **buf_p);
|
||||
void lit_utf8_decr (const lit_utf8_byte_t **buf_p);
|
||||
|
||||
|
||||
@ -53,14 +53,13 @@ align_column_to_tab (parser_line_counter_t column) /**< current column */
|
||||
/**
|
||||
* Parse hexadecimal character sequence
|
||||
*
|
||||
* @return character value
|
||||
* @return character value or UINT32_MAX on error
|
||||
*/
|
||||
ecma_char_t
|
||||
lexer_hex_to_character (parser_context_t *context_p, /**< context */
|
||||
const uint8_t *source_p, /**< current source position */
|
||||
int length) /**< source length */
|
||||
static lit_code_point_t
|
||||
lexer_hex_to_code_point (const uint8_t *source_p, /**< current source position */
|
||||
parser_line_counter_t length) /**< source length */
|
||||
{
|
||||
uint32_t result = 0;
|
||||
lit_code_point_t result = 0;
|
||||
|
||||
do
|
||||
{
|
||||
@ -81,29 +80,94 @@ lexer_hex_to_character (parser_context_t *context_p, /**< context */
|
||||
}
|
||||
else
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (--length > 0);
|
||||
|
||||
return (ecma_char_t) result;
|
||||
} /* lexer_hex_to_character */
|
||||
return result;
|
||||
} /* lexer_hex_to_code_point */
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
|
||||
/**
|
||||
* Parse hexadecimal character sequence enclosed in braces
|
||||
*
|
||||
* @return character value or UINT32_MAX on error
|
||||
*/
|
||||
static lit_code_point_t
|
||||
lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source position */
|
||||
const uint8_t *source_end_p, /**< source end */
|
||||
uint32_t *length_p) /**< [out] length of the sequence */
|
||||
{
|
||||
lit_code_point_t result = 0;
|
||||
/* Four is the size of \u{} sequence. */
|
||||
uint32_t length = 4;
|
||||
|
||||
JERRY_ASSERT (source_p[-1] == LIT_CHAR_LEFT_BRACE);
|
||||
JERRY_ASSERT (source_p < source_end_p);
|
||||
|
||||
do
|
||||
{
|
||||
uint32_t byte = *source_p++;
|
||||
|
||||
result <<= 4;
|
||||
|
||||
if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
|
||||
{
|
||||
result += byte - LIT_CHAR_0;
|
||||
}
|
||||
else
|
||||
{
|
||||
byte = LEXER_TO_ASCII_LOWERCASE (byte);
|
||||
if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
result += byte - (LIT_CHAR_LOWERCASE_A - 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
if (result >= (LIT_UNICODE_CODE_POINT_MAX + 1) || source_p >= source_end_p)
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
length++;
|
||||
}
|
||||
while (*source_p != LIT_CHAR_RIGHT_BRACE);
|
||||
|
||||
*length_p = length;
|
||||
return result;
|
||||
} /* lexer_hex_in_braces_to_code_point */
|
||||
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
/**
|
||||
* Parse hexadecimal character sequence
|
||||
*
|
||||
* @return character value
|
||||
*/
|
||||
static ecma_char_t
|
||||
lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source position */
|
||||
int length) /**< source length */
|
||||
static lit_code_point_t
|
||||
lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */
|
||||
{
|
||||
uint32_t result = 0;
|
||||
lit_code_point_t result = 0;
|
||||
const uint8_t *char_p = *source_p;
|
||||
uint32_t length = (char_p[-1] == LIT_CHAR_LOWERCASE_U) ? 4 : 2;
|
||||
|
||||
do
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (char_p[0] == LIT_CHAR_LEFT_BRACE)
|
||||
{
|
||||
uint32_t byte = *source_p++;
|
||||
length = 0;
|
||||
char_p++;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
while (true)
|
||||
{
|
||||
uint32_t byte = *char_p++;
|
||||
|
||||
result <<= 4;
|
||||
|
||||
@ -118,10 +182,27 @@ lexer_unchecked_hex_to_character (const uint8_t *source_p, /**< current source p
|
||||
|
||||
result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10);
|
||||
}
|
||||
}
|
||||
while (--length > 0);
|
||||
|
||||
return (ecma_char_t) result;
|
||||
JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX);
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (length == 0)
|
||||
{
|
||||
if (*char_p != LIT_CHAR_RIGHT_BRACE)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
*source_p = char_p + 1;
|
||||
return result;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
if (--length == 0)
|
||||
{
|
||||
*source_p = char_p;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} /* lexer_unchecked_hex_to_character */
|
||||
|
||||
/**
|
||||
@ -509,102 +590,188 @@ static const uint8_t keyword_lengths_list[] =
|
||||
#undef LEXER_KEYWORD_LIST_LENGTH
|
||||
|
||||
/**
|
||||
* Parse identifier.
|
||||
* Flags for lexer_parse_identifier.
|
||||
*/
|
||||
static void
|
||||
typedef enum
|
||||
{
|
||||
LEXER_PARSE_NO_OPTS = 0, /**< no options */
|
||||
LEXER_PARSE_CHECK_KEYWORDS = (1 << 0), /**< check keywords */
|
||||
LEXER_PARSE_CHECK_START_AND_RETURN = (1 << 1), /**< check identifier start and return */
|
||||
LEXER_PARSE_CHECK_PART_AND_RETURN = (1 << 2), /**< check identifier part and return */
|
||||
} lexer_parse_options_t;
|
||||
|
||||
/**
|
||||
* Parse identifier.
|
||||
*
|
||||
* @return true, if an identifier is parsed, false otherwise
|
||||
*/
|
||||
static bool
|
||||
lexer_parse_identifier (parser_context_t *context_p, /**< context */
|
||||
bool check_keywords) /**< check keywords */
|
||||
lexer_parse_options_t options) /**< check keywords */
|
||||
{
|
||||
/* Only very few identifiers contains \u escape sequences. */
|
||||
const uint8_t *source_p = context_p->source_p;
|
||||
const uint8_t *ident_start_p = context_p->source_p;
|
||||
/* Note: newline or tab cannot be part of an identifier. */
|
||||
parser_line_counter_t column = context_p->column;
|
||||
const uint8_t *source_end_p = context_p->source_end_p;
|
||||
size_t length = 0;
|
||||
|
||||
context_p->token.type = LEXER_LITERAL;
|
||||
context_p->token.ident_is_strict_keyword = false;
|
||||
context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
|
||||
context_p->token.lit_location.has_escape = false;
|
||||
uint8_t has_escape = false;
|
||||
|
||||
do
|
||||
{
|
||||
if (*source_p == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
uint16_t character;
|
||||
/* After a backslash an identifier must start. */
|
||||
lit_code_point_t code_point = UINT32_MAX;
|
||||
uint32_t escape_length = 6;
|
||||
|
||||
context_p->token.lit_location.has_escape = true;
|
||||
context_p->source_p = source_p;
|
||||
context_p->token.column = column;
|
||||
|
||||
if ((source_p + 6 > source_end_p) || (source_p[1] != LIT_CHAR_LOWERCASE_U))
|
||||
if (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
has_escape = true;
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (source_p + 5 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
|
||||
{
|
||||
if (source_p[2] == LIT_CHAR_LEFT_BRACE)
|
||||
{
|
||||
code_point = lexer_hex_in_braces_to_code_point (source_p + 3, source_end_p, &escape_length);
|
||||
}
|
||||
else if (source_p + 6 <= source_end_p)
|
||||
{
|
||||
code_point = lexer_hex_to_code_point (source_p + 2, 4);
|
||||
}
|
||||
}
|
||||
#else /* !ENABLED (JERRY_ES2015) */
|
||||
if (source_p + 6 <= source_end_p && source_p[1] == LIT_CHAR_LOWERCASE_U)
|
||||
{
|
||||
code_point = lexer_hex_to_code_point (source_p + 2, 4);
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
if (code_point == UINT32_MAX)
|
||||
{
|
||||
context_p->source_p = source_p;
|
||||
context_p->token.column = column;
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
|
||||
}
|
||||
|
||||
character = lexer_hex_to_character (context_p, source_p + 2, 4);
|
||||
|
||||
if (length == 0)
|
||||
{
|
||||
if (!lit_char_is_identifier_start_character (character))
|
||||
if (!lit_code_point_is_identifier_start (code_point))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_START);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!lit_char_is_identifier_part_character (character))
|
||||
if (!lit_code_point_is_identifier_part (code_point))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_IDENTIFIER_PART);
|
||||
}
|
||||
}
|
||||
|
||||
length += lit_char_get_utf8_length (character);
|
||||
source_p += 6;
|
||||
PARSER_PLUS_EQUAL_LC (column, 6);
|
||||
length += lit_code_point_get_cesu8_length (code_point);
|
||||
source_p += escape_length;
|
||||
PARSER_PLUS_EQUAL_LC (column, escape_length);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Valid identifiers cannot contain 4 byte long utf-8
|
||||
* characters, since those characters are represented
|
||||
* by 2 ecmascript (UTF-16) characters, and those
|
||||
* characters cannot be literal characters. */
|
||||
JERRY_ASSERT (source_p[0] < LEXER_UTF8_4BYTE_START);
|
||||
lit_code_point_t code_point = *source_p;
|
||||
lit_utf8_size_t utf8_length = 1, decoded_length = 1, char_count = 1;
|
||||
|
||||
source_p++;
|
||||
length++;
|
||||
column++;
|
||||
|
||||
while (source_p < source_end_p
|
||||
&& IS_UTF8_INTERMEDIATE_OCTET (source_p[0]))
|
||||
if (JERRY_UNLIKELY (code_point >= LIT_UTF8_2_BYTE_MARKER))
|
||||
{
|
||||
source_p++;
|
||||
length++;
|
||||
}
|
||||
}
|
||||
while (source_p < source_end_p
|
||||
&& (lit_char_is_identifier_part (source_p) || *source_p == LIT_CHAR_BACKSLASH));
|
||||
utf8_length = lit_read_code_point_from_utf8 (source_p,
|
||||
(lit_utf8_size_t) (source_end_p - source_p),
|
||||
&code_point);
|
||||
decoded_length = utf8_length;
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
/* Only ES2015 supports code points outside of the basic plane which can be part of an identifier. */
|
||||
if ((code_point >= LIT_UTF16_HIGH_SURROGATE_MIN && code_point <= LIT_UTF16_HIGH_SURROGATE_MAX)
|
||||
&& source_p + 3 < source_end_p)
|
||||
{
|
||||
lit_code_point_t low_surrogate;
|
||||
lit_read_code_point_from_utf8 (source_p + 3,
|
||||
(lit_utf8_size_t) (source_end_p - (source_p + 3)),
|
||||
&low_surrogate);
|
||||
|
||||
if (low_surrogate >= LIT_UTF16_LOW_SURROGATE_MIN && low_surrogate <= LIT_UTF16_LOW_SURROGATE_MAX)
|
||||
{
|
||||
code_point = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) code_point,
|
||||
(ecma_char_t) low_surrogate);
|
||||
utf8_length = 2 * 3;
|
||||
decoded_length = 2 * 3;
|
||||
char_count = 2;
|
||||
}
|
||||
}
|
||||
else if (source_p[0] >= LEXER_UTF8_4BYTE_START)
|
||||
{
|
||||
decoded_length = 2 * 3;
|
||||
has_escape = true;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
}
|
||||
|
||||
if (length == 0)
|
||||
{
|
||||
if (JERRY_UNLIKELY (options & (LEXER_PARSE_CHECK_START_AND_RETURN | LEXER_PARSE_CHECK_PART_AND_RETURN)))
|
||||
{
|
||||
if (options & LEXER_PARSE_CHECK_START_AND_RETURN)
|
||||
{
|
||||
return lit_code_point_is_identifier_start (code_point);
|
||||
}
|
||||
else
|
||||
{
|
||||
return lit_code_point_is_identifier_part (code_point);
|
||||
}
|
||||
}
|
||||
|
||||
if (!lit_code_point_is_identifier_start (code_point))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (!lit_code_point_is_identifier_part (code_point))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
source_p += utf8_length;
|
||||
length += decoded_length;
|
||||
PARSER_PLUS_EQUAL_LC (column, char_count);
|
||||
}
|
||||
while (source_p < source_end_p);
|
||||
|
||||
JERRY_ASSERT (length > 0);
|
||||
|
||||
context_p->token.type = LEXER_LITERAL;
|
||||
context_p->token.ident_is_strict_keyword = false;
|
||||
context_p->token.lit_location.type = LEXER_IDENT_LITERAL;
|
||||
context_p->token.lit_location.has_escape = has_escape;
|
||||
|
||||
context_p->source_p = ident_start_p;
|
||||
context_p->token.column = context_p->column;
|
||||
context_p->token.lit_location.char_p = ident_start_p;
|
||||
context_p->token.lit_location.char_p = context_p->source_p;
|
||||
context_p->token.lit_location.length = (prop_length_t) length;
|
||||
|
||||
if (length > PARSER_MAXIMUM_IDENT_LENGTH)
|
||||
if (JERRY_UNLIKELY (length > PARSER_MAXIMUM_IDENT_LENGTH))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_TOO_LONG);
|
||||
}
|
||||
|
||||
/* Check keywords. */
|
||||
if (check_keywords
|
||||
if ((options & LEXER_PARSE_CHECK_KEYWORDS)
|
||||
&& (length >= LEXER_KEYWORD_MIN_LENGTH && length <= LEXER_KEYWORD_MAX_LENGTH))
|
||||
{
|
||||
const uint8_t *ident_start_p = context_p->source_p;
|
||||
uint8_t buffer_p[LEXER_KEYWORD_MAX_LENGTH];
|
||||
|
||||
if (JERRY_UNLIKELY (context_p->token.lit_location.has_escape))
|
||||
{
|
||||
lexer_convert_ident_to_cesu8 (ident_start_p, buffer_p, (prop_length_t) length);
|
||||
lexer_convert_ident_to_cesu8 (buffer_p, ident_start_p, (prop_length_t) length);
|
||||
ident_start_p = buffer_p;
|
||||
}
|
||||
|
||||
@ -690,6 +857,7 @@ lexer_parse_identifier (parser_context_t *context_p, /**< context */
|
||||
|
||||
context_p->source_p = source_p;
|
||||
context_p->column = column;
|
||||
return true;
|
||||
} /* lexer_parse_identifier */
|
||||
|
||||
/**
|
||||
@ -840,20 +1008,40 @@ lexer_parse_string (parser_context_t *context_p) /**< context */
|
||||
|
||||
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
|
||||
{
|
||||
uint8_t hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
|
||||
uint32_t escape_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 3 : 5;
|
||||
lit_code_point_t code_point = UINT32_MAX;
|
||||
|
||||
context_p->token.line = line;
|
||||
context_p->token.column = (parser_line_counter_t) (column - 1);
|
||||
if (source_p + 1 + hex_part_length > source_end_p)
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (source_p + 4 <= source_end_p
|
||||
&& source_p[0] == LIT_CHAR_LOWERCASE_U
|
||||
&& source_p[1] == LIT_CHAR_LEFT_BRACE)
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_ESCAPE_SEQUENCE);
|
||||
code_point = lexer_hex_in_braces_to_code_point (source_p + 2, source_end_p, &escape_length);
|
||||
escape_length--;
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
if (source_p + escape_length <= source_end_p)
|
||||
{
|
||||
code_point = lexer_hex_to_code_point (source_p + 1, escape_length - 1);
|
||||
}
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
if (code_point == UINT32_MAX)
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
|
||||
}
|
||||
|
||||
length += lit_char_get_utf8_length (lexer_hex_to_character (context_p,
|
||||
source_p + 1,
|
||||
hex_part_length));
|
||||
source_p += hex_part_length + 1;
|
||||
PARSER_PLUS_EQUAL_LC (column, hex_part_length + 1u);
|
||||
length += lit_code_point_get_cesu8_length (code_point);
|
||||
|
||||
source_p += escape_length;
|
||||
PARSER_PLUS_EQUAL_LC (column, escape_length);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -1120,12 +1308,6 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
|
||||
}
|
||||
}
|
||||
|
||||
if (source_p < source_end_p
|
||||
&& (lit_char_is_identifier_start (source_p) || source_p[0] == LIT_CHAR_BACKSLASH))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
|
||||
}
|
||||
|
||||
length = (size_t) (source_p - context_p->source_p);
|
||||
if (length > PARSER_MAXIMUM_IDENT_LENGTH)
|
||||
{
|
||||
@ -1135,6 +1317,11 @@ lexer_parse_number (parser_context_t *context_p) /**< context */
|
||||
context_p->token.lit_location.length = (prop_length_t) length;
|
||||
PARSER_PLUS_EQUAL_LC (context_p->column, length);
|
||||
context_p->source_p = source_p;
|
||||
|
||||
if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_START_AND_RETURN))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_IDENTIFIER_AFTER_NUMBER);
|
||||
}
|
||||
} /* lexer_parse_number */
|
||||
|
||||
/**
|
||||
@ -1229,10 +1416,8 @@ lexer_next_token (parser_context_t *context_p) /**< context */
|
||||
return;
|
||||
}
|
||||
|
||||
if (lit_char_is_identifier_start (context_p->source_p)
|
||||
|| context_p->source_p[0] == LIT_CHAR_BACKSLASH)
|
||||
if (lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_KEYWORDS))
|
||||
{
|
||||
lexer_parse_identifier (context_p, true);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1723,8 +1908,8 @@ lexer_process_char_literal (parser_context_t *context_p, /**< context */
|
||||
* Convert an ident with escapes to a utf8 string.
|
||||
*/
|
||||
void
|
||||
lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
|
||||
uint8_t *destination_p, /**< destination string */
|
||||
lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
|
||||
const uint8_t *source_p, /**< source string */
|
||||
prop_length_t length) /**< length of destination string */
|
||||
{
|
||||
const uint8_t *destination_end_p = destination_p + length;
|
||||
@ -1735,14 +1920,22 @@ lexer_convert_ident_to_cesu8 (const uint8_t *source_p, /**< source string */
|
||||
{
|
||||
if (*source_p == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
destination_p += lit_char_to_utf8_bytes (destination_p,
|
||||
lexer_unchecked_hex_to_character (source_p + 2, 4));
|
||||
source_p += 6;
|
||||
source_p += 2;
|
||||
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
|
||||
lexer_unchecked_hex_to_character (&source_p));
|
||||
continue;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (IS_UTF8_INTERMEDIATE_OCTET (*source_p)
|
||||
|| lit_char_is_identifier_part (source_p));
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
if (*source_p >= LEXER_UTF8_4BYTE_START)
|
||||
{
|
||||
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
|
||||
|
||||
destination_p += 6;
|
||||
source_p += 4;
|
||||
continue;
|
||||
}
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
*destination_p++ = *source_p++;
|
||||
}
|
||||
@ -1783,7 +1976,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
|
||||
|
||||
if (literal_p->type == LEXER_IDENT_LITERAL)
|
||||
{
|
||||
lexer_convert_ident_to_cesu8 (source_p, destination_start_p, literal_p->length);
|
||||
lexer_convert_ident_to_cesu8 (destination_start_p, source_p, literal_p->length);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1835,7 +2028,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
|
||||
|
||||
if (*source_p >= LIT_CHAR_0 && *source_p <= LIT_CHAR_3)
|
||||
{
|
||||
uint32_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
|
||||
lit_code_point_t octal_number = (uint32_t) (*source_p - LIT_CHAR_0);
|
||||
|
||||
source_p++;
|
||||
JERRY_ASSERT (source_p < context_p->source_end_p);
|
||||
@ -1854,7 +2047,7 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
|
||||
}
|
||||
}
|
||||
|
||||
destination_p += lit_char_to_utf8_bytes (destination_p, (uint16_t) octal_number);
|
||||
destination_p += lit_code_point_to_cesu8_bytes (destination_p, octal_number);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1878,13 +2071,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
|
||||
|
||||
if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
|
||||
{
|
||||
int hex_part_length = (*source_p == LIT_CHAR_LOWERCASE_X) ? 2 : 4;
|
||||
JERRY_ASSERT (source_p + 1 + hex_part_length <= context_p->source_end_p);
|
||||
|
||||
destination_p += lit_char_to_utf8_bytes (destination_p,
|
||||
lexer_unchecked_hex_to_character (source_p + 1,
|
||||
hex_part_length));
|
||||
source_p += hex_part_length + 1;
|
||||
source_p++;
|
||||
destination_p += lit_code_point_to_cesu8_bytes (destination_p,
|
||||
lexer_unchecked_hex_to_character (&source_p));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1946,18 +2135,9 @@ lexer_construct_literal_object (parser_context_t *context_p, /**< context */
|
||||
/* Processing 4 byte unicode sequence (even if it is
|
||||
* after a backslash). Always converted to two 3 byte
|
||||
* long sequence. */
|
||||
lit_four_byte_utf8_char_to_cesu8 (destination_p, source_p);
|
||||
|
||||
uint32_t character = ((((uint32_t) source_p[0]) & 0x7) << 18);
|
||||
character |= ((((uint32_t) source_p[1]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
|
||||
character |= ((((uint32_t) source_p[2]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
|
||||
character |= (((uint32_t) source_p[3]) & LIT_UTF8_LAST_6_BITS_MASK);
|
||||
|
||||
JERRY_ASSERT (character >= 0x10000);
|
||||
character -= 0x10000;
|
||||
destination_p += lit_char_to_utf8_bytes (destination_p,
|
||||
(ecma_char_t) (0xd800 | (character >> 10)));
|
||||
destination_p += lit_char_to_utf8_bytes (destination_p,
|
||||
(ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
|
||||
destination_p += 6;
|
||||
source_p += 4;
|
||||
continue;
|
||||
}
|
||||
@ -2376,15 +2556,14 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
|
||||
column++;
|
||||
}
|
||||
|
||||
if (source_p < source_end_p
|
||||
&& lit_char_is_identifier_part (source_p))
|
||||
context_p->source_p = source_p;
|
||||
context_p->column = column;
|
||||
|
||||
if (source_p < source_end_p && lexer_parse_identifier (context_p, LEXER_PARSE_CHECK_PART_AND_RETURN))
|
||||
{
|
||||
parser_raise_error (context_p, PARSER_ERR_UNKNOWN_REGEXP_FLAG);
|
||||
}
|
||||
|
||||
context_p->source_p = source_p;
|
||||
context_p->column = column;
|
||||
|
||||
length = (lit_utf8_size_t) (regex_end_p - regex_start_p);
|
||||
if (length > PARSER_MAXIMUM_STRING_LENGTH)
|
||||
{
|
||||
@ -2473,10 +2652,9 @@ lexer_expect_identifier (parser_context_t *context_p, /**< context */
|
||||
context_p->token.column = context_p->column;
|
||||
|
||||
if (context_p->source_p < context_p->source_end_p
|
||||
&& (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
|
||||
&& lexer_parse_identifier (context_p, (literal_type != LEXER_STRING_LITERAL ? LEXER_PARSE_CHECK_KEYWORDS
|
||||
: LEXER_PARSE_NO_OPTS)))
|
||||
{
|
||||
lexer_parse_identifier (context_p, literal_type != LEXER_STRING_LITERAL);
|
||||
|
||||
if (context_p->token.type == LEXER_LITERAL)
|
||||
{
|
||||
JERRY_ASSERT (context_p->token.lit_location.type == LEXER_IDENT_LITERAL);
|
||||
@ -2548,10 +2726,8 @@ lexer_expect_object_literal_id (parser_context_t *context_p, /**< context */
|
||||
context_p->token.column = context_p->column;
|
||||
bool create_literal_object = false;
|
||||
|
||||
if (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH)
|
||||
if (lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
|
||||
{
|
||||
lexer_parse_identifier (context_p, false);
|
||||
|
||||
if (!(ident_opts & (LEXER_OBJ_IDENT_ONLY_IDENTIFIERS | LEXER_OBJ_IDENT_OBJECT_PATTERN))
|
||||
&& context_p->token.lit_location.length == 3)
|
||||
{
|
||||
@ -2687,10 +2863,8 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
|
||||
context_p->token.column = context_p->column;
|
||||
|
||||
if (context_p->source_p < context_p->source_end_p
|
||||
&& (lit_char_is_identifier_start (context_p->source_p) || context_p->source_p[0] == LIT_CHAR_BACKSLASH))
|
||||
&& lexer_parse_identifier (context_p, LEXER_PARSE_NO_OPTS))
|
||||
{
|
||||
lexer_parse_identifier (context_p, false);
|
||||
|
||||
if ((ident_opts & LEXER_SCAN_IDENT_PROPERTY)
|
||||
&& context_p->token.lit_location.length == 3)
|
||||
{
|
||||
@ -2726,75 +2900,135 @@ lexer_scan_identifier (parser_context_t *context_p, /**< context */
|
||||
* Compares two identifiers.
|
||||
*
|
||||
* Note:
|
||||
* Escape sequences are allowed, size must be the same.
|
||||
* Escape sequences are allowed in the left identifier, but not in the right
|
||||
*
|
||||
* @return true if the two identifiers are the same
|
||||
*/
|
||||
bool
|
||||
lexer_compare_identifiers (const uint8_t *left_p, /**< left identifier */
|
||||
const uint8_t *right_p, /**< right identifier */
|
||||
size_t size) /**< byte size of the two identifiers */
|
||||
static bool
|
||||
lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier */
|
||||
const uint8_t *right_p, /**< right identifier string */
|
||||
size_t size) /**< byte size of the two identifiers */
|
||||
{
|
||||
uint8_t utf8_buf[3];
|
||||
size_t utf8_len, offset;
|
||||
uint8_t utf8_buf[6];
|
||||
|
||||
do
|
||||
{
|
||||
/* Backslash cannot be part of a multibyte UTF-8 character. */
|
||||
if (*left_p != LIT_CHAR_BACKSLASH && *right_p != LIT_CHAR_BACKSLASH)
|
||||
if (*left_p == *right_p)
|
||||
{
|
||||
if (*left_p++ != *right_p++)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
left_p++;
|
||||
right_p++;
|
||||
size--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*left_p == LIT_CHAR_BACKSLASH && *right_p == LIT_CHAR_BACKSLASH)
|
||||
size_t escape_size;
|
||||
|
||||
if (*left_p == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
uint16_t left_chr = lexer_unchecked_hex_to_character (left_p + 2, 4);
|
||||
left_p += 2;
|
||||
lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p);
|
||||
|
||||
if (left_chr != lexer_unchecked_hex_to_character (right_p + 2, 4))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
left_p += 6;
|
||||
right_p += 6;
|
||||
size -= lit_char_get_utf8_length (left_chr);
|
||||
continue;
|
||||
escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
|
||||
}
|
||||
else if (*left_p >= LEXER_UTF8_4BYTE_START)
|
||||
{
|
||||
lit_four_byte_utf8_char_to_cesu8 (utf8_buf, left_p);
|
||||
escape_size = 3 * 2;
|
||||
left_p += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* One character is encoded as unicode sequence. */
|
||||
if (*right_p == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
/* The pointers can be swapped. */
|
||||
const uint8_t *swap_p = left_p;
|
||||
left_p = right_p;
|
||||
right_p = swap_p;
|
||||
}
|
||||
|
||||
utf8_len = lit_char_to_utf8_bytes (utf8_buf, lexer_unchecked_hex_to_character (left_p + 2, 4));
|
||||
JERRY_ASSERT (utf8_len > 0);
|
||||
size -= utf8_len;
|
||||
offset = 0;
|
||||
size -= escape_size;
|
||||
|
||||
uint8_t *utf8_p = utf8_buf;
|
||||
do
|
||||
{
|
||||
if (utf8_buf[offset] != *right_p++)
|
||||
if (*right_p++ != *utf8_p++)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
while (offset < utf8_len);
|
||||
|
||||
left_p += 6;
|
||||
while (--escape_size > 0);
|
||||
}
|
||||
while (size > 0);
|
||||
|
||||
return true;
|
||||
} /* lexer_compare_identifier_to_chars */
|
||||
|
||||
/**
|
||||
* Compares an identifier to a string.
|
||||
*
|
||||
* Note:
|
||||
* Escape sequences are allowed in the left identifier, but not in the right
|
||||
*
|
||||
* @return true if the identifier equals to string
|
||||
*/
|
||||
bool
|
||||
lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, /**< left literal */
|
||||
const uint8_t *right_p, /**< right identifier string */
|
||||
size_t size) /**< byte size of the right identifier */
|
||||
{
|
||||
if (left_p->length != size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!left_p->has_escape)
|
||||
{
|
||||
return memcmp (left_p->char_p, right_p, size) == 0;
|
||||
}
|
||||
|
||||
return lexer_compare_identifier_to_chars (left_p->char_p, right_p, size);
|
||||
} /* lexer_compare_identifier_to_string */
|
||||
|
||||
/**
|
||||
* Compares two identifiers.
|
||||
*
|
||||
* Note:
|
||||
* Escape sequences are allowed in both identifiers
|
||||
*
|
||||
* @return true if the two identifiers are the same
|
||||
*/
|
||||
bool
|
||||
lexer_compare_identifiers (parser_context_t *context_p, /**< context */
|
||||
const lexer_lit_location_t *left_p, /**< left literal */
|
||||
const lexer_lit_location_t *right_p) /**< right literal */
|
||||
{
|
||||
prop_length_t length = left_p->length;
|
||||
|
||||
if (length != right_p->length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!left_p->has_escape)
|
||||
{
|
||||
return lexer_compare_identifier_to_chars (right_p->char_p, left_p->char_p, length);
|
||||
}
|
||||
|
||||
if (!right_p->has_escape)
|
||||
{
|
||||
return lexer_compare_identifier_to_chars (left_p->char_p, right_p->char_p, length);
|
||||
}
|
||||
|
||||
uint8_t buf_p[64];
|
||||
|
||||
if (length <= 64)
|
||||
{
|
||||
lexer_convert_ident_to_cesu8 (buf_p, left_p->char_p, length);
|
||||
return lexer_compare_identifier_to_chars (right_p->char_p, buf_p, length);
|
||||
}
|
||||
|
||||
uint8_t *dynamic_buf_p = parser_malloc (context_p, length);
|
||||
|
||||
lexer_convert_ident_to_cesu8 (dynamic_buf_p, left_p->char_p, length);
|
||||
bool result = lexer_compare_identifier_to_chars (right_p->char_p, dynamic_buf_p, length);
|
||||
parser_free (dynamic_buf_p, length);
|
||||
|
||||
return result;
|
||||
} /* lexer_compare_identifiers */
|
||||
|
||||
/**
|
||||
@ -2818,7 +3052,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
|
||||
|
||||
if (left_ident_p->length != right_ident_p->length)
|
||||
{
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!left_ident_p->has_escape && !right_ident_p->has_escape)
|
||||
@ -2826,7 +3060,7 @@ lexer_current_is_literal (parser_context_t *context_p, /**< context */
|
||||
return memcmp (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length) == 0;
|
||||
}
|
||||
|
||||
return lexer_compare_identifiers (left_ident_p->char_p, right_ident_p->char_p, left_ident_p->length);
|
||||
return lexer_compare_identifiers (context_p, left_ident_p, right_ident_p);
|
||||
} /* lexer_current_is_literal */
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
|
||||
@ -637,8 +637,7 @@ bool lexer_check_yield_no_arg (parser_context_t *context_p);
|
||||
void lexer_parse_string (parser_context_t *context_p);
|
||||
void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type);
|
||||
void lexer_scan_identifier (parser_context_t *context_p, uint32_t ident_opts);
|
||||
ecma_char_t lexer_hex_to_character (parser_context_t *context_p, const uint8_t *source_p, int length);
|
||||
void lexer_convert_ident_to_cesu8 (const uint8_t *source_p, uint8_t *destination_p, prop_length_t length);
|
||||
void lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length);
|
||||
void lexer_expect_object_literal_id (parser_context_t *context_p, uint32_t ident_opts);
|
||||
void lexer_construct_literal_object (parser_context_t *context_p, const lexer_lit_location_t *literal_p,
|
||||
uint8_t literal_type);
|
||||
@ -646,7 +645,9 @@ bool lexer_construct_number_object (parser_context_t *context_p, bool is_expr, b
|
||||
void lexer_convert_push_number_to_push_literal (parser_context_t *context_p);
|
||||
uint16_t lexer_construct_function_object (parser_context_t *context_p, uint32_t extra_status_flags);
|
||||
void lexer_construct_regexp_object (parser_context_t *context_p, bool parse_only);
|
||||
bool lexer_compare_identifiers (const uint8_t *left_p, const uint8_t *right_p, size_t size);
|
||||
bool lexer_compare_identifier_to_string (const lexer_lit_location_t *left_p, const uint8_t *right_p, size_t size);
|
||||
bool lexer_compare_identifiers (parser_context_t *context_p, const lexer_lit_location_t *left_p,
|
||||
const lexer_lit_location_t *right_p);
|
||||
bool lexer_current_is_literal (parser_context_t *context_p, const lexer_lit_location_t *right_ident_p);
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
bool lexer_token_is_identifier (parser_context_t *context_p, const char *identifier_p,
|
||||
|
||||
@ -434,8 +434,7 @@ JERRY_STATIC_ASSERT (PARSER_MAXIMUM_IDENT_LENGTH <= UINT8_MAX,
|
||||
static inline bool JERRY_ATTR_ALWAYS_INLINE
|
||||
scanner_literal_is_arguments (lexer_lit_location_t *literal_p) /**< literal */
|
||||
{
|
||||
return (literal_p->length == 9
|
||||
&& lexer_compare_identifiers (literal_p->char_p, (const uint8_t *) "arguments", 9));
|
||||
return lexer_compare_identifier_to_string (literal_p, (const uint8_t *) "arguments", 9);
|
||||
} /* scanner_literal_is_arguments */
|
||||
|
||||
/**
|
||||
@ -986,7 +985,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
|
||||
return literal_p;
|
||||
}
|
||||
}
|
||||
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
|
||||
{
|
||||
/* The non-escaped version is preferred. */
|
||||
literal_p->char_p = char_p;
|
||||
@ -1000,8 +999,7 @@ scanner_add_custom_literal (parser_context_t *context_p, /**< context */
|
||||
{
|
||||
while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
|
||||
{
|
||||
if (literal_p->length == length
|
||||
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
|
||||
{
|
||||
return literal_p;
|
||||
}
|
||||
@ -1065,10 +1063,11 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
|
||||
scanner_literal_pool_t *literal_pool_p = scanner_context_p->active_literal_pool_p;
|
||||
parser_list_iterator_t literal_iterator;
|
||||
parser_list_iterator_init (&literal_pool_p->literal_pool, &literal_iterator);
|
||||
lexer_lit_location_t *literal_location_p = &context_p->token.lit_location;
|
||||
lexer_lit_location_t *literal_p;
|
||||
|
||||
const uint8_t *char_p = context_p->token.lit_location.char_p;
|
||||
prop_length_t length = context_p->token.lit_location.length;
|
||||
const uint8_t *char_p = literal_location_p->char_p;
|
||||
prop_length_t length = literal_location_p->length;
|
||||
|
||||
if (JERRY_LIKELY (!context_p->token.lit_location.has_escape))
|
||||
{
|
||||
@ -1084,7 +1083,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
|
||||
{
|
||||
literal_p->length = 0;
|
||||
break;
|
||||
@ -1096,8 +1095,7 @@ scanner_append_argument (parser_context_t *context_p, /**< context */
|
||||
{
|
||||
while ((literal_p = (lexer_lit_location_t *) parser_list_iterator_next (&literal_iterator)) != NULL)
|
||||
{
|
||||
if (literal_p->length == length
|
||||
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
if (lexer_compare_identifiers (context_p, literal_p, literal_location_p))
|
||||
{
|
||||
literal_p->length = 0;
|
||||
break;
|
||||
@ -1118,8 +1116,7 @@ void
|
||||
scanner_detect_eval_call (parser_context_t *context_p, /**< context */
|
||||
scanner_context_t *scanner_context_p) /**< scanner context */
|
||||
{
|
||||
if (context_p->token.lit_location.length == 4
|
||||
&& lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4)
|
||||
if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4)
|
||||
&& lexer_check_next_character (context_p, LIT_CHAR_LEFT_PAREN))
|
||||
{
|
||||
scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
|
||||
@ -1147,7 +1144,7 @@ scanner_scope_find_let_declaration (parser_context_t *context_p, /**< context */
|
||||
{
|
||||
uint8_t *destination_p = (uint8_t *) scanner_malloc (context_p, literal_p->length);
|
||||
|
||||
lexer_convert_ident_to_cesu8 (literal_p->char_p, destination_p, literal_p->length);
|
||||
lexer_convert_ident_to_cesu8 (destination_p, literal_p->char_p, literal_p->length);
|
||||
|
||||
name_p = ecma_new_ecma_string_from_utf8 (destination_p, literal_p->length);
|
||||
scanner_free (destination_p, literal_p->length);
|
||||
@ -1231,7 +1228,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
else if (lexer_compare_identifier_to_string (literal_p, char_p, length))
|
||||
{
|
||||
scanner_raise_redeclaration_error (context_p);
|
||||
return;
|
||||
@ -1246,8 +1243,7 @@ scanner_detect_invalid_var (parser_context_t *context_p, /**< context */
|
||||
if (literal_p->type & SCANNER_LITERAL_IS_LOCAL
|
||||
&& !(literal_p->type & SCANNER_LITERAL_IS_ARG)
|
||||
&& (literal_p->type & SCANNER_LITERAL_IS_LOCAL) != SCANNER_LITERAL_IS_LOCAL
|
||||
&& literal_p->length == length
|
||||
&& lexer_compare_identifiers (literal_p->char_p, char_p, length))
|
||||
&& lexer_compare_identifiers (context_p, literal_p, var_literal_p))
|
||||
{
|
||||
scanner_raise_redeclaration_error (context_p);
|
||||
return;
|
||||
|
||||
@ -376,8 +376,7 @@ scanner_handle_bracket (parser_context_t *context_p, /**< context */
|
||||
arrow_source_p = NULL;
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
if (context_p->token.lit_location.length == 4
|
||||
&& lexer_compare_identifiers (context_p->token.lit_location.char_p, (const uint8_t *) "eval", 4))
|
||||
if (lexer_compare_identifier_to_string (&context_p->token.lit_location, (const uint8_t *) "eval", 4))
|
||||
{
|
||||
scanner_context_p->active_literal_pool_p->status_flags |= SCANNER_LITERAL_POOL_NO_REG;
|
||||
}
|
||||
|
||||
@ -272,7 +272,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
const bool is_char_class = (re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
|
||||
|| re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS);
|
||||
|
||||
const ecma_char_t prev_char = lit_utf8_peek_prev (parser_ctx_p->input_curr_p);
|
||||
const ecma_char_t prev_char = lit_cesu8_peek_prev (parser_ctx_p->input_curr_p);
|
||||
if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
@ -286,7 +286,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
|
||||
}
|
||||
|
||||
lit_code_point_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
lit_code_point_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
@ -318,7 +318,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
|
||||
}
|
||||
|
||||
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_LOWERCASE_B)
|
||||
{
|
||||
@ -376,7 +376,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
parser_ctx_p->input_curr_p += 2;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
@ -396,7 +396,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
parser_ctx_p->input_curr_p += 4;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
&& lit_cesu8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
@ -481,7 +481,7 @@ re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
&& lit_is_code_point_utf16_high_surrogate (ch)
|
||||
&& parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
const ecma_char_t next_ch = lit_utf8_peek_next (parser_ctx_p->input_curr_p);
|
||||
const ecma_char_t next_ch = lit_cesu8_peek_next (parser_ctx_p->input_curr_p);
|
||||
if (lit_is_code_point_utf16_low_surrogate (next_ch))
|
||||
{
|
||||
ch = lit_convert_surrogate_pair_to_code_point ((ecma_char_t) ch, next_ch);
|
||||
|
||||
@ -315,7 +315,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
ecma_char_t ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
switch (ch)
|
||||
{
|
||||
@ -348,7 +348,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
}
|
||||
|
||||
out_token_p->type = RE_TOK_CHAR;
|
||||
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
ch = lit_cesu8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_LOWERCASE_B)
|
||||
{
|
||||
|
||||
36
tests/jerry/es2015/identifier-escape.js
Normal file
36
tests/jerry/es2015/identifier-escape.js
Normal file
@ -0,0 +1,36 @@
|
||||
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
function check_syntax_error (code) {
|
||||
try {
|
||||
eval(code)
|
||||
assert (false)
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError)
|
||||
}
|
||||
}
|
||||
|
||||
eval("\u{000010C80}: break \ud803\udc80")
|
||||
eval("\\u{10C80}: break \ud803\udc80")
|
||||
eval("$\u{000010C80}$: break $\ud803\udc80$")
|
||||
eval("$\\u{10C82}$: break $\ud803\udc82$")
|
||||
|
||||
assert("\u{000010C80}".length === 2)
|
||||
assert("x\u{010C80}y".length === 4)
|
||||
assert("\u{10C80}" === "\ud803\u{dc80}")
|
||||
assert("\u{0}\x01" === "\u0000\u0001")
|
||||
|
||||
/* Surrogate pairs are not combined if they passed as \u sequences. */
|
||||
check_syntax_error("\\u{10C80}: break \\ud803\\udc80");
|
||||
@ -21,6 +21,39 @@
|
||||
|
||||
#include "test-common.h"
|
||||
|
||||
static lit_code_point_t
|
||||
lexer_hex_to_character (const uint8_t *source_p) /**< current source position */
|
||||
{
|
||||
lit_code_point_t result = 0;
|
||||
|
||||
do
|
||||
{
|
||||
uint32_t byte = *source_p++;
|
||||
|
||||
result <<= 4;
|
||||
|
||||
if (byte >= LIT_CHAR_0 && byte <= LIT_CHAR_9)
|
||||
{
|
||||
result += byte - LIT_CHAR_0;
|
||||
}
|
||||
else
|
||||
{
|
||||
byte = LEXER_TO_ASCII_LOWERCASE (byte);
|
||||
if (byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
result += byte - (LIT_CHAR_LOWERCASE_A - 10);
|
||||
}
|
||||
else
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (*source_p);
|
||||
|
||||
return result;
|
||||
} /* lexer_hex_to_character */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
@ -29,50 +62,59 @@ main (void)
|
||||
jmem_init ();
|
||||
ecma_init ();
|
||||
|
||||
const uint8_t _1_byte_long1[] = "\\u007F";
|
||||
const uint8_t _1_byte_long2[] = "\\u0000";
|
||||
const uint8_t _1_byte_long3[] = "\\u0065";
|
||||
const uint8_t _1_byte_long1[] = "007F";
|
||||
const uint8_t _1_byte_long2[] = "0000";
|
||||
const uint8_t _1_byte_long3[] = "0065";
|
||||
|
||||
const uint8_t _2_byte_long1[] = "\\u008F";
|
||||
const uint8_t _2_byte_long2[] = "\\u00FF";
|
||||
const uint8_t _2_byte_long3[] = "\\u07FF";
|
||||
const uint8_t _2_byte_long1[] = "008F";
|
||||
const uint8_t _2_byte_long2[] = "00FF";
|
||||
const uint8_t _2_byte_long3[] = "07FF";
|
||||
|
||||
const uint8_t _3_byte_long1[] = "\\u08FF";
|
||||
const uint8_t _3_byte_long2[] = "\\u0FFF";
|
||||
const uint8_t _3_byte_long3[] = "\\uFFFF";
|
||||
const uint8_t _3_byte_long1[] = "08FF";
|
||||
const uint8_t _3_byte_long2[] = "0FFF";
|
||||
const uint8_t _3_byte_long3[] = "FFFF";
|
||||
|
||||
const uint8_t _6_byte_long1[] = "10000";
|
||||
const uint8_t _6_byte_long2[] = "10FFFF";
|
||||
|
||||
size_t length;
|
||||
|
||||
/* Test 1-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long1 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long1));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long2));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _1_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_1_byte_long3));
|
||||
TEST_ASSERT (length == 1);
|
||||
|
||||
/* Test 2-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long1 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long1));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long2));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _2_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_2_byte_long3));
|
||||
TEST_ASSERT (length == 2);
|
||||
|
||||
/* Test 3-byte-long unicode sequences. */
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long1 + 2, 4));
|
||||
TEST_ASSERT (length != 2);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long2 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long1));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_char_get_utf8_length (lexer_hex_to_character (0, _3_byte_long3 + 2, 4));
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long2));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_3_byte_long3));
|
||||
TEST_ASSERT (length == 3);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long1));
|
||||
TEST_ASSERT (length == 6);
|
||||
|
||||
length = lit_code_point_get_cesu8_length (lexer_hex_to_character (_6_byte_long2));
|
||||
TEST_ASSERT (length == 6);
|
||||
|
||||
ecma_finalize ();
|
||||
jmem_finalize ();
|
||||
|
||||
|
||||
@ -131,7 +131,7 @@ main (void)
|
||||
|
||||
while (curr_p < end_p)
|
||||
{
|
||||
code_units[code_units_count] = lit_utf8_peek_next (curr_p);
|
||||
code_units[code_units_count] = lit_cesu8_peek_next (curr_p);
|
||||
saved_positions[code_units_count] = curr_p;
|
||||
code_units_count++;
|
||||
calculated_length++;
|
||||
@ -147,7 +147,7 @@ main (void)
|
||||
{
|
||||
ecma_length_t index = (ecma_length_t) rand () % code_units_count;
|
||||
curr_p = saved_positions[index];
|
||||
TEST_ASSERT (lit_utf8_peek_next (curr_p) == code_units[index]);
|
||||
TEST_ASSERT (lit_cesu8_peek_next (curr_p) == code_units[index]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -156,7 +156,7 @@ main (void)
|
||||
{
|
||||
TEST_ASSERT (code_units_count > 0);
|
||||
calculated_length--;
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_utf8_peek_prev (curr_p));
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_peek_prev (curr_p));
|
||||
lit_utf8_decr (&curr_p);
|
||||
}
|
||||
|
||||
@ -164,7 +164,7 @@ main (void)
|
||||
|
||||
while (curr_p < end_p)
|
||||
{
|
||||
ecma_char_t code_unit = lit_utf8_read_next (&curr_p);
|
||||
ecma_char_t code_unit = lit_cesu8_read_next (&curr_p);
|
||||
TEST_ASSERT (code_unit == code_units[calculated_length]);
|
||||
calculated_length++;
|
||||
}
|
||||
@ -175,7 +175,7 @@ main (void)
|
||||
{
|
||||
TEST_ASSERT (code_units_count > 0);
|
||||
calculated_length--;
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_utf8_read_prev (&curr_p));
|
||||
TEST_ASSERT (code_units[calculated_length] == lit_cesu8_read_prev (&curr_p));
|
||||
}
|
||||
|
||||
TEST_ASSERT (calculated_length == 0);
|
||||
|
||||
61
tests/unit-core/test-unicode.c
Normal file
61
tests/unit-core/test-unicode.c
Normal file
@ -0,0 +1,61 @@
|
||||
/* Copyright JS Foundation and other contributors, http://js.foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "jerryscript.h"
|
||||
#include "test-common.h"
|
||||
|
||||
static bool
|
||||
test_syntax_error (char *script_p) /**< script */
|
||||
{
|
||||
jerry_value_t parse_result = jerry_parse (NULL,
|
||||
0,
|
||||
(const jerry_char_t *) script_p,
|
||||
strlen (script_p),
|
||||
JERRY_PARSE_NO_OPTS);
|
||||
|
||||
bool result = false;
|
||||
|
||||
if (jerry_value_is_error (parse_result))
|
||||
{
|
||||
result = true;
|
||||
TEST_ASSERT (jerry_get_error_type (parse_result) == JERRY_ERROR_SYNTAX);
|
||||
}
|
||||
|
||||
jerry_release_value (parse_result);
|
||||
return result;
|
||||
} /* test_syntax_error */
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
jerry_init (JERRY_INIT_EMPTY);
|
||||
|
||||
if (!test_syntax_error ("\\u{61}"))
|
||||
{
|
||||
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \\u{10C80}"));
|
||||
/* The \u surrogate pairs are ignored. The \u{hex} form must be used. */
|
||||
TEST_ASSERT (test_syntax_error ("\xF0\x90\xB2\x80: break \\ud803\\udc80"));
|
||||
/* The utf8 code point and the cesu8 surrogate pair must match. */
|
||||
TEST_ASSERT (!test_syntax_error ("\xF0\x90\xB2\x80: break \xed\xa0\x83\xed\xb2\x80"));
|
||||
|
||||
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\u{10C80}$"));
|
||||
TEST_ASSERT (test_syntax_error ("$\xF0\x90\xB2\x80$: break $\\ud803\\udc80$"));
|
||||
TEST_ASSERT (!test_syntax_error ("$\xF0\x90\xB2\x80$: break $\xed\xa0\x83\xed\xb2\x80$"));
|
||||
}
|
||||
|
||||
jerry_cleanup ();
|
||||
|
||||
return 0;
|
||||
} /* main */
|
||||
@ -284,6 +284,7 @@ def create_binary(job, options):
|
||||
subprocess.check_output(build_cmd)
|
||||
ret = 0
|
||||
except subprocess.CalledProcessError as err:
|
||||
print(err.output)
|
||||
ret = err.returncode
|
||||
|
||||
BINARY_CACHE[binary_key] = (ret, build_dir_path)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user