Add preprocessor definitions for various character codes, referenced in ECMA-262, and helpers for checking if a character falls into a ECMA-defined character category; remove lit_char_is_carriage_return and lit_char_is_new_line helpers.

JerryScript-DCO-1.0-Signed-off-by: Ruben Ayrapetyan r.ayrapetyan@samsung.com
This commit is contained in:
Ruben Ayrapetyan 2015-07-01 21:57:15 +03:00
parent 7d53133fcb
commit 2a78d24fce
3 changed files with 419 additions and 48 deletions

View File

@ -16,44 +16,242 @@
#include "lit-char-helpers.h"
/**
* Check if specified character is the newline character
* Check if specified character is one of the Format-Control characters
*
* @return true - if the character is "<LF>" character according to ECMA-262 v5, Table 3,
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 1,
* false - otherwise.
*/
bool
lit_char_is_new_line (ecma_char_t c) /**< code unit */
lit_char_is_format_control (ecma_char_t c) /**< code unit */
{
return (c == '\x0A');
} /* lit_char_is_new_line */
return (c == LIT_CHAR_ZWNJ
|| c == LIT_CHAR_ZWJ
|| c == LIT_CHAR_BOM);
} /* lit_char_is_format_control */
/**
* Check if specified character the carriage return character
* Check if specified character is the Space Separator character
*
* @return true - if the character is "<CR>" character according to ECMA-262 v5, Table 3,
* See also:
* ECMA-262 v5, Table 2
*
* @return true - if the character falls into "Space, Separator" ("Zs") character category,
* false - otherwise.
*/
bool
lit_char_is_carriage_return (ecma_char_t c) /**< code unit */
lit_char_is_space_separator (ecma_char_t c) /**< code unit */
{
return (c == '\x0D');
} /* lit_char_is_carriage_return */
/* Zs */
#define LIT_UNICODE_RANGE_ZS(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
#include "lit-unicode-ranges.inc.h"
return false;
} /* lit_char_is_space_separator */
/**
* Check if specified character is one of LineTerminator (ECMA-262 v5, Table 3) characters
* Check if specified character is one of the Whitespace characters
*
* @return true - if the character is one of LineTerminator characters,
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
* false - otherwise.
*/
bool
lit_char_is_white_space (ecma_char_t c) /**< code unit */
{
return (c == LIT_CHAR_TAB
|| c == LIT_CHAR_VTAB
|| c == LIT_CHAR_FF
|| c == LIT_CHAR_SP
|| c == LIT_CHAR_NBSP
|| c == LIT_CHAR_BOM
|| lit_char_is_space_separator (c));
} /* lit_char_is_white_space */
/**
* Check if specified character is one of LineTerminator characters
*
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
* false - otherwise.
*/
bool
lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
{
/* FIXME: Handle <LS> and <PS> (ECMA-262 v5, 7.3, Table 3) when Unicode would be supported */
return (lit_char_is_carriage_return (c)
|| lit_char_is_new_line (c));
return (c == LIT_CHAR_LF
|| c == LIT_CHAR_CR
|| c == LIT_CHAR_LS
|| c == LIT_CHAR_PS);
} /* lit_char_is_line_terminator */
/**
* Check if specified character is a unicode letter
*
* Note:
* Unicode letter is a character, included into one of the following categories:
* - Uppercase letter (Lu);
* - Lowercase letter (Ll);
* - Titlecase letter (Lt);
* - Modifier letter (Lm);
* - Other letter (Lo);
* - Letter number (Nl).
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into one of the listed categories,
* false - otherwise.
*/
bool
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
{
/* Fast path for ASCII-defined letters */
if ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END))
{
return true;
}
/* Lu */
#define LIT_UNICODE_RANGE_LU(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Ll */
#define LIT_UNICODE_RANGE_LL(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Lt */
#define LIT_UNICODE_RANGE_LT(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Lm */
#define LIT_UNICODE_RANGE_LM(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Lo */
#define LIT_UNICODE_RANGE_LO(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Nl */
#define LIT_UNICODE_RANGE_NL(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
#include "lit-unicode-ranges.inc.h"
return false;
} /* lit_char_is_unicode_letter */
/**
* Check if specified character is a unicode combining mark
*
* Note:
* Unicode combining mark is a character, included into one of the following categories:
* - Non-spacing mark (Mn);
* - Combining spacing mark (Mc).
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into one of the listed categories,
* false - otherwise.
*/
bool
lit_char_is_unicode_combining_mark (ecma_char_t c) /**< code unit */
{
/* Mn */
#define LIT_UNICODE_RANGE_MN(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
/* Mc */
#define LIT_UNICODE_RANGE_MC(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
#include "lit-unicode-ranges.inc.h"
return false;
} /* lit_char_is_unicode_combining_mark */
/**
* Check if specified character is a unicode digit
*
* Note:
* Unicode digit is a character, included into the following category:
* - Decimal number (Nd).
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into the specified category,
* false - otherwise.
*/
bool
lit_char_is_unicode_digit (ecma_char_t c) /**< code unit */
{
/* Nd */
#define LIT_UNICODE_RANGE_ND(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
#include "lit-unicode-ranges.inc.h"
return false;
} /* lit_char_is_unicode_digit */
/**
* Check if specified character is a unicode connector punctuation
*
* Note:
* Unicode connector punctuation is a character, included into the following category:
* - Connector punctuation (Pc).
*
* See also:
* ECMA-262 v5, 7.6
*
* @return true - if specified character falls into the specified category,
* false - otherwise.
*/
bool
lit_char_is_unicode_connector_punctuation (ecma_char_t c) /**< code unit */
{
/* Pc */
#define LIT_UNICODE_RANGE_PC(range_begin, range_end) \
if (c >= (range_begin) && c <= (range_end)) \
{ \
return true; \
}
#include "lit-unicode-ranges.inc.h"
return false;
} /* lit_char_is_unicode_connector_punctuation */
/**
* Check if specified character is a word character (part of IsWordChar abstract operation)
*
@ -65,10 +263,10 @@ lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
bool
lit_char_is_word_char (ecma_char_t c) /**< code unit */
{
if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '_')
if ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|| c == LIT_CHAR_UNDERSCORE)
{
return true;
}
@ -85,30 +283,18 @@ uint32_t
lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
* one of [0-9A-Fa-f] characters */
{
switch (c)
if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
{
case '0': return 0x0;
case '1': return 0x1;
case '2': return 0x2;
case '3': return 0x3;
case '4': return 0x4;
case '5': return 0x5;
case '6': return 0x6;
case '7': return 0x7;
case '8': return 0x8;
case '9': return 0x9;
case 'a':
case 'A': return 0xA;
case 'b':
case 'B': return 0xB;
case 'c':
case 'C': return 0xC;
case 'd':
case 'D': return 0xD;
case 'e':
case 'E': return 0xE;
case 'f':
case 'F': return 0xF;
default: JERRY_UNREACHABLE ();
return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
}
else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
{
return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
}
else
{
JERRY_ASSERT (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END);
return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
}
} /* lit_char_hex_to_int */

View File

@ -18,10 +18,195 @@
#include "lit-globals.h"
extern bool lit_char_is_new_line (ecma_char_t);
extern bool lit_char_is_carriage_return (ecma_char_t);
/*
* Format control characters (ECMA-262 v5, Table 1)
*/
#define LIT_CHAR_ZWNJ ((ecma_char_t) 0x200C) /* zero width non-joiner */
#define LIT_CHAR_ZWJ ((ecma_char_t) 0x200D) /* zero width joiner */
#define LIT_CHAR_BOM ((ecma_char_t) 0xFEFF) /* byte order mark */
extern bool lit_char_is_format_control (ecma_char_t);
/*
* Whitespace characters (ECMA-262 v5, Table 2)
*/
#define LIT_CHAR_TAB ((ecma_char_t) 0x0009) /* tab */
#define LIT_CHAR_VTAB ((ecma_char_t) 0x000B) /* vertical tab */
#define LIT_CHAR_FF ((ecma_char_t) 0x000C) /* form feed */
#define LIT_CHAR_SP ((ecma_char_t) 0x0020) /* space */
#define LIT_CHAR_NBSP ((ecma_char_t) 0x00A0) /* no-break space */
/* LIT_CHAR_BOM is defined above */
extern bool lit_char_is_space_separator (ecma_char_t);
extern bool lit_char_is_white_space (ecma_char_t);
/*
* Line terminator characters (ECMA-262 v5, Table 3)
*/
#define LIT_CHAR_LF ((ecma_char_t) 0x000A) /* line feed */
#define LIT_CHAR_CR ((ecma_char_t) 0x000D) /* carriage return */
#define LIT_CHAR_LS ((ecma_char_t) 0x2028) /* line separator */
#define LIT_CHAR_PS ((ecma_char_t) 0x2029) /* paragraph separator */
extern bool lit_char_is_line_terminator (ecma_char_t);
/*
* String Single Character Escape Sequences (ECMA-262 v5, Table 4)
*/
#define LIT_CHAR_BS ((ecma_char_t) 0x0008) /* backspace */
/* LIT_CHAR_TAB is defined above */
/* LIT_CHAR_LF is defined above */
/* LIT_CHAR_VTAB is defined above */
/* LIT_CHAR_FF is defined above */
/* LIT_CHAR_CR is defined above */
#define LIT_CHAR_DOUBLE_QUOTE ((ecma_char_t) '"') /* double quote */
#define LIT_CHAR_SINGLE_QUOTE ((ecma_char_t) '\'') /* single quote */
#define LIT_CHAR_BACKSLASH ((ecma_char_t) '\\') /* reverse solidus (backslash) */
/*
* Comment characters (ECMA-262 v5, 7.4)
*/
#define LIT_CHAR_SLASH ((ecma_char_t) '/') /* solidus */
#define LIT_CHAR_ASTERISK ((ecma_char_t) '*') /* asterisk */
/*
* Identifier name characters (ECMA-262 v5, 7.6)
*/
#define LIT_CHAR_DOLLAR_SIGN ((ecma_char_t) '$') /* dollar sign */
#define LIT_CHAR_UNDERSCORE ((ecma_char_t) '_') /* low line (underscore) */
/* LIT_CHAR_BACKSLASH defined above */
extern bool lit_char_is_unicode_letter (ecma_char_t);
extern bool lit_char_is_unicode_combining_mark (ecma_char_t);
extern bool lit_char_is_unicode_digit (ecma_char_t);
extern bool lit_char_is_unicode_connector_punctuation (ecma_char_t);
/*
* Punctuator characters (ECMA-262 v5, 7.7)
*/
#define LIT_CHAR_LEFT_BRACE ((ecma_char_t) '{') /* left curly bracket */
#define LIT_CHAR_RIGHT_BRACE ((ecma_char_t) '}') /* right curly bracket */
#define LIT_CHAR_LEFT_PAREN ((ecma_char_t) '(') /* left parenthesis */
#define LIT_CHAR_RIGHT_PAREN ((ecma_char_t) ')') /* right parenthesis */
#define LIT_CHAR_LEFT_SQUARE ((ecma_char_t) '[') /* left square bracket */
#define LIT_CHAR_RIGHT_SQUARE ((ecma_char_t) ']') /* right square bracket */
#define LIT_CHAR_DOT ((ecma_char_t) '.') /* dot */
#define LIT_CHAR_SEMICOLON ((ecma_char_t) ';') /* semicolon */
#define LIT_CHAR_COMMA ((ecma_char_t) ',') /* comma */
#define LIT_CHAR_LESS_THAN ((ecma_char_t) '<') /* less-than sign */
#define LIT_CHAR_GREATER_THAN ((ecma_char_t) '>') /* greater-than sign */
#define LIT_CHAR_EQUALS ((ecma_char_t) '=') /* equals sign */
#define LIT_CHAR_PLUS ((ecma_char_t) '+') /* plus sign */
#define LIT_CHAR_MINUS ((ecma_char_t) '-') /* hyphen-minus */
/* LIT_CHAR_ASTERISK is defined above */
#define LIT_CHAR_PERCENT ((ecma_char_t) '%') /* percent sign */
#define LIT_CHAR_AMPERSAND ((ecma_char_t) '&') /* ampersand */
#define LIT_CHAR_VLINE ((ecma_char_t) '|') /* vertical line */
#define LIT_CHAR_CIRCUMFLEX ((ecma_char_t) '^') /* circumflex accent */
#define LIT_CHAR_EXCLAMATION ((ecma_char_t) '!') /* exclamation mark */
#define LIT_CHAR_TILDE ((ecma_char_t) '~') /* tilde */
#define LIT_CHAR_QUESTION ((ecma_char_t) '?') /* question mark */
#define LIT_CHAR_COLON ((ecma_char_t) ':') /* colon */
/**
* Uppercase ASCII letters
*/
#define LIT_CHAR_UPPERCASE_A ((ecma_char_t) 'A')
#define LIT_CHAR_UPPERCASE_B ((ecma_char_t) 'B')
#define LIT_CHAR_UPPERCASE_C ((ecma_char_t) 'C')
#define LIT_CHAR_UPPERCASE_D ((ecma_char_t) 'D')
#define LIT_CHAR_UPPERCASE_E ((ecma_char_t) 'E')
#define LIT_CHAR_UPPERCASE_F ((ecma_char_t) 'F')
#define LIT_CHAR_UPPERCASE_G ((ecma_char_t) 'G')
#define LIT_CHAR_UPPERCASE_H ((ecma_char_t) 'H')
#define LIT_CHAR_UPPERCASE_I ((ecma_char_t) 'I')
#define LIT_CHAR_UPPERCASE_J ((ecma_char_t) 'J')
#define LIT_CHAR_UPPERCASE_K ((ecma_char_t) 'K')
#define LIT_CHAR_UPPERCASE_L ((ecma_char_t) 'L')
#define LIT_CHAR_UPPERCASE_M ((ecma_char_t) 'M')
#define LIT_CHAR_UPPERCASE_N ((ecma_char_t) 'N')
#define LIT_CHAR_UPPERCASE_O ((ecma_char_t) 'O')
#define LIT_CHAR_UPPERCASE_P ((ecma_char_t) 'P')
#define LIT_CHAR_UPPERCASE_Q ((ecma_char_t) 'Q')
#define LIT_CHAR_UPPERCASE_R ((ecma_char_t) 'R')
#define LIT_CHAR_UPPERCASE_S ((ecma_char_t) 'S')
#define LIT_CHAR_UPPERCASE_T ((ecma_char_t) 'T')
#define LIT_CHAR_UPPERCASE_U ((ecma_char_t) 'U')
#define LIT_CHAR_UPPERCASE_V ((ecma_char_t) 'V')
#define LIT_CHAR_UPPERCASE_W ((ecma_char_t) 'W')
#define LIT_CHAR_UPPERCASE_X ((ecma_char_t) 'X')
#define LIT_CHAR_UPPERCASE_Y ((ecma_char_t) 'Y')
#define LIT_CHAR_UPPERCASE_Z ((ecma_char_t) 'Z')
/**
* Lowercase ASCII letters
*/
#define LIT_CHAR_LOWERCASE_A ((ecma_char_t) 'a')
#define LIT_CHAR_LOWERCASE_B ((ecma_char_t) 'b')
#define LIT_CHAR_LOWERCASE_C ((ecma_char_t) 'c')
#define LIT_CHAR_LOWERCASE_D ((ecma_char_t) 'd')
#define LIT_CHAR_LOWERCASE_E ((ecma_char_t) 'e')
#define LIT_CHAR_LOWERCASE_F ((ecma_char_t) 'f')
#define LIT_CHAR_LOWERCASE_G ((ecma_char_t) 'g')
#define LIT_CHAR_LOWERCASE_H ((ecma_char_t) 'h')
#define LIT_CHAR_LOWERCASE_I ((ecma_char_t) 'i')
#define LIT_CHAR_LOWERCASE_J ((ecma_char_t) 'j')
#define LIT_CHAR_LOWERCASE_K ((ecma_char_t) 'k')
#define LIT_CHAR_LOWERCASE_L ((ecma_char_t) 'l')
#define LIT_CHAR_LOWERCASE_M ((ecma_char_t) 'm')
#define LIT_CHAR_LOWERCASE_N ((ecma_char_t) 'n')
#define LIT_CHAR_LOWERCASE_O ((ecma_char_t) 'o')
#define LIT_CHAR_LOWERCASE_P ((ecma_char_t) 'p')
#define LIT_CHAR_LOWERCASE_Q ((ecma_char_t) 'q')
#define LIT_CHAR_LOWERCASE_R ((ecma_char_t) 'r')
#define LIT_CHAR_LOWERCASE_S ((ecma_char_t) 's')
#define LIT_CHAR_LOWERCASE_T ((ecma_char_t) 't')
#define LIT_CHAR_LOWERCASE_U ((ecma_char_t) 'u')
#define LIT_CHAR_LOWERCASE_V ((ecma_char_t) 'v')
#define LIT_CHAR_LOWERCASE_W ((ecma_char_t) 'w')
#define LIT_CHAR_LOWERCASE_X ((ecma_char_t) 'x')
#define LIT_CHAR_LOWERCASE_Y ((ecma_char_t) 'y')
#define LIT_CHAR_LOWERCASE_Z ((ecma_char_t) 'z')
/**
* ASCII decimal digits
*/
#define LIT_CHAR_0 ((ecma_char_t) '0')
#define LIT_CHAR_1 ((ecma_char_t) '1')
#define LIT_CHAR_2 ((ecma_char_t) '2')
#define LIT_CHAR_3 ((ecma_char_t) '3')
#define LIT_CHAR_4 ((ecma_char_t) '4')
#define LIT_CHAR_5 ((ecma_char_t) '5')
#define LIT_CHAR_6 ((ecma_char_t) '6')
#define LIT_CHAR_7 ((ecma_char_t) '7')
#define LIT_CHAR_8 ((ecma_char_t) '8')
#define LIT_CHAR_9 ((ecma_char_t) '9')
/**
* ASCII character ranges
*/
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters range */
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_END LIT_CHAR_UPPERCASE_Z
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters range */
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_END LIT_CHAR_LOWERCASE_Z
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN LIT_CHAR_UPPERCASE_A /* uppercase letters for
* hexadecimal digits range */
#define LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END LIT_CHAR_UPPERCASE_F
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN LIT_CHAR_LOWERCASE_A /* lowercase letters for
* hexadecimal digits range */
#define LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END LIT_CHAR_LOWERCASE_F
#define LIT_CHAR_ASCII_DIGITS_BEGIN LIT_CHAR_0 /* decimal digits range */
#define LIT_CHAR_ASCII_DIGITS_END LIT_CHAR_9
/*
* Part of IsWordChar abstract operation (ECMA-262 v5, 15.10.2.6, step 3)
*/
extern bool lit_char_is_word_char (ecma_char_t);
extern uint32_t lit_char_hex_to_int (ecma_char_t);
#endif /* LIT_CHAR_HELPERS_H */

View File

@ -934,11 +934,11 @@ parse_string (void)
{
consume_char ();
if (lit_char_is_carriage_return (nc))
if (nc == LIT_CHAR_CR)
{
nc = (ecma_char_t) LA (0);
if (lit_char_is_new_line (nc))
if (nc == LIT_CHAR_LF)
{
consume_char ();
}