mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Support unicode in lexer_parse_identifier_name_or_keyword (renamed from parse_name).
JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com
This commit is contained in:
parent
6432e4d7cf
commit
baf3748d1b
@ -28,6 +28,11 @@ static token saved_token, prev_token, sent_token, empty_token;
|
|||||||
static bool allow_dump_lines = false, strict_mode;
|
static bool allow_dump_lines = false, strict_mode;
|
||||||
static size_t buffer_size = 0;
|
static size_t buffer_size = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME:
|
||||||
|
* jerry_api_char_t should not be used outside of API implementation
|
||||||
|
*/
|
||||||
|
|
||||||
/* Represents the contents of a script. */
|
/* Represents the contents of a script. */
|
||||||
static const jerry_api_char_t *buffer_start = NULL;
|
static const jerry_api_char_t *buffer_start = NULL;
|
||||||
static const jerry_api_char_t *token_start;
|
static const jerry_api_char_t *token_start;
|
||||||
@ -157,6 +162,34 @@ lexer_create_token_for_charset (token_type tt, /**< token type */
|
|||||||
return create_token_from_lit (tt, lit);
|
return create_token_from_lit (tt, lit);
|
||||||
} /* lexer_create_token_for_charset */
|
} /* lexer_create_token_for_charset */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the character falls into IdentifierStart group (ECMA-262 v5, 7.6)
|
||||||
|
*
|
||||||
|
* @return true / false
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lexer_is_char_can_be_identifier_start (ecma_char_t c) /**< a character */
|
||||||
|
{
|
||||||
|
return (lit_char_is_unicode_letter (c)
|
||||||
|
|| c == LIT_CHAR_DOLLAR_SIGN
|
||||||
|
|| c == LIT_CHAR_UNDERSCORE
|
||||||
|
|| c == LIT_CHAR_BACKSLASH);
|
||||||
|
} /* lexer_is_char_can_be_identifier_start */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the character falls into IdentifierPart group (ECMA-262 v5, 7.6)
|
||||||
|
*
|
||||||
|
* @return true / false
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
lexer_is_char_can_be_identifier_part (ecma_char_t c) /**< a character */
|
||||||
|
{
|
||||||
|
return (lexer_is_char_can_be_identifier_start (c)
|
||||||
|
|| lit_char_is_unicode_combining_mark (c)
|
||||||
|
|| lit_char_is_unicode_digit (c)
|
||||||
|
|| lit_char_is_unicode_connector_punctuation (c));
|
||||||
|
} /* lexer_is_char_can_be_identifier_part */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4)
|
* Try to decode specified character as SingleEscapeCharacter (ECMA-262, v5, 7.8.4)
|
||||||
*
|
*
|
||||||
@ -652,28 +685,29 @@ consume_char (void)
|
|||||||
* TOK_BOOL - for BooleanLiteral
|
* TOK_BOOL - for BooleanLiteral
|
||||||
*/
|
*/
|
||||||
static token
|
static token
|
||||||
parse_name (void)
|
lexer_parse_identifier_or_keyword (void)
|
||||||
{
|
{
|
||||||
ecma_char_t c = (ecma_char_t) LA (0);
|
ecma_char_t c = LA (0);
|
||||||
|
|
||||||
JERRY_ASSERT (isalpha (c) || c == '$' || c == '_' || c == '\\');
|
JERRY_ASSERT (lexer_is_char_can_be_identifier_start (c));
|
||||||
|
|
||||||
new_token ();
|
new_token ();
|
||||||
|
|
||||||
|
bool is_correct_identifier_name = true;
|
||||||
bool is_escape_sequence_occured = false;
|
bool is_escape_sequence_occured = false;
|
||||||
bool is_all_chars_were_lowercase_ascii = true;
|
bool is_all_chars_were_lowercase_ascii = true;
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
c = (ecma_char_t) LA (0);
|
c = LA (0);
|
||||||
|
|
||||||
if (c == '\\')
|
if (c == LIT_CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
consume_char ();
|
consume_char ();
|
||||||
|
|
||||||
is_escape_sequence_occured = true;
|
is_escape_sequence_occured = true;
|
||||||
|
|
||||||
bool is_unicode_escape_sequence = (LA (0) == 'u');
|
bool is_unicode_escape_sequence = (LA (0) == LIT_CHAR_LOWERCASE_U);
|
||||||
consume_char ();
|
consume_char ();
|
||||||
|
|
||||||
if (is_unicode_escape_sequence)
|
if (is_unicode_escape_sequence)
|
||||||
@ -684,36 +718,35 @@ parse_name (void)
|
|||||||
true,
|
true,
|
||||||
&c))
|
&c))
|
||||||
{
|
{
|
||||||
PARSE_ERROR ("Malformed escape sequence", token_start - buffer_start);
|
is_correct_identifier_name = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* c now contains character, encoded in the UnicodeEscapeSequence */
|
/* c now contains character, encoded in the UnicodeEscapeSequence */
|
||||||
if (!isalpha (c)
|
|
||||||
&& !isdigit (c)
|
// Check character, converted from UnicodeEscapeSequence
|
||||||
&& c != '$'
|
if (!lexer_is_char_can_be_identifier_part (c))
|
||||||
&& c != '_')
|
|
||||||
{
|
{
|
||||||
PARSE_ERROR ("Invalid character in identifier", token_start - buffer_start);
|
is_correct_identifier_name = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PARSE_ERROR ("Only unicode escape sequences are allowed in identifiers",
|
is_correct_identifier_name = false;
|
||||||
token_start - buffer_start);
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!isalpha (c)
|
else if (!lexer_is_char_can_be_identifier_part (c))
|
||||||
&& !isdigit (c)
|
|
||||||
&& c != '$'
|
|
||||||
&& c != '_')
|
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!islower (c))
|
if (!(c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN
|
||||||
|
&& c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END))
|
||||||
{
|
{
|
||||||
is_all_chars_were_lowercase_ascii = false;
|
is_all_chars_were_lowercase_ascii = false;
|
||||||
}
|
}
|
||||||
@ -722,6 +755,11 @@ parse_name (void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_correct_identifier_name)
|
||||||
|
{
|
||||||
|
PARSE_ERROR ("Illegal identifier name", lit_utf8_iterator_get_offset (&src_iter));
|
||||||
|
}
|
||||||
|
|
||||||
const lit_utf8_size_t charset_size = (lit_utf8_size_t) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
|
const lit_utf8_size_t charset_size = (lit_utf8_size_t) (lit_utf8_iterator_get_ptr (&src_iter) - token_start);
|
||||||
|
|
||||||
token ret = empty_token;
|
token ret = empty_token;
|
||||||
@ -754,7 +792,7 @@ parse_name (void)
|
|||||||
token_start = NULL;
|
token_start = NULL;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
} /* parse_name */
|
} /* lexer_parse_identifier_or_keyword */
|
||||||
|
|
||||||
/* In this function we cannot use strtol function
|
/* In this function we cannot use strtol function
|
||||||
since there is no octal literals in ECMAscript. */
|
since there is no octal literals in ECMAscript. */
|
||||||
@ -1199,9 +1237,10 @@ lexer_next_token_private (void)
|
|||||||
|
|
||||||
JERRY_ASSERT (token_start == NULL);
|
JERRY_ASSERT (token_start == NULL);
|
||||||
|
|
||||||
if (isalpha (c) || c == '$' || c == '_' || c == '\\')
|
/* ECMA-262 v5, 7.6, Identifier */
|
||||||
|
if (lexer_is_char_can_be_identifier_start (c))
|
||||||
{
|
{
|
||||||
return parse_name ();
|
return lexer_parse_identifier_or_keyword ();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isdigit (c) || (c == '.' && isdigit (LA (1))))
|
if (isdigit (c) || (c == '.' && isdigit (LA (1))))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user