Use code unit instead of code point

JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó 2016-03-11 09:44:23 +01:00
parent 005f73a6f0
commit e1f20ad474
7 changed files with 70 additions and 72 deletions

View File

@ -1,4 +1,4 @@
/* Copyright 2014-2015 Samsung Electronics Co., Ltd.
/* Copyright 2014-2016 Samsung Electronics Co., Ltd.
* Copyright 2015-2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -96,25 +96,25 @@ ecma_builtin_global_object_print (ecma_value_t this_arg __attr_unused___, /**< t
while (utf8_str_curr_p < utf8_str_end_p)
{
ecma_char_t code_point = lit_utf8_read_next (&utf8_str_curr_p);
ecma_char_t code_unit = lit_utf8_read_next (&utf8_str_curr_p);
if (code_point == LIT_CHAR_NULL)
if (code_unit == LIT_CHAR_NULL)
{
printf ("\\u0000");
}
else if (code_point <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
else if (code_unit <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
{
printf ("%c", (char) code_point);
printf ("%c", (char) code_unit);
}
else
{
JERRY_STATIC_ASSERT (sizeof (code_point) == 2,
JERRY_STATIC_ASSERT (sizeof (code_unit) == 2,
size_of_code_point_must_be_equal_to_2_bytes);
uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
uint32_t byte_high = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
JERRY_BITSINBYTE,
JERRY_BITSINBYTE);
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_point,
uint32_t byte_low = (uint32_t) JRT_EXTRACT_BIT_FIELD (ecma_char_t, code_unit,
0,
JERRY_BITSINBYTE);
@ -801,9 +801,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
continue;
}
lit_code_point_t decoded_byte;
ecma_char_t decoded_byte;
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
{
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
break;
@ -857,9 +857,9 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
continue;
}
lit_code_point_t decoded_byte;
ecma_char_t decoded_byte;
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &decoded_byte))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &decoded_byte))
{
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG (""));
break;
@ -916,16 +916,16 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___,
}
else
{
lit_code_point_t cp;
ecma_char_t chr;
if (!lit_read_code_point_from_hex (input_char_p + 1, 2, &cp)
|| ((cp & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
if (!lit_read_code_unit_from_hex (input_char_p + 1, 2, &chr)
|| ((chr & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER))
{
is_valid = false;
break;
}
octets[i] = (lit_utf8_byte_t) cp;
octets[i] = (lit_utf8_byte_t) chr;
input_char_p += URI_ENCODED_BYTE_SIZE;
}
}

View File

@ -178,15 +178,15 @@ ecma_builtin_json_parse_string (ecma_json_token_t *token_p) /**< token argument
}
case LIT_CHAR_LOWERCASE_U:
{
lit_code_point_t code_point;
ecma_char_t code_unit;
if (!(lit_read_code_point_from_hex (current_p + 1, 4, &code_point)))
if (!(lit_read_code_unit_from_hex (current_p + 1, 4, &code_unit)))
{
return;
}
current_p += 5;
write_p += lit_code_point_to_cesu8 (code_point, write_p);
write_p += lit_code_unit_to_utf8 (code_unit, write_p);
continue;
}
default:

View File

@ -1,4 +1,5 @@
/* Copyright 2015 Samsung Electronics Co., Ltd.
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -283,38 +284,38 @@ lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
/**
* Parse the next number_of_characters hexadecimal character,
* and construct a code point from them. The buffer must
* and construct a code unit from them. The buffer must
* be zero terminated.
*
* @return true if decoding was successful, false otherwise
*/
bool
lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
lit_code_point_t *out_code_point_p) /**< [out] decoded result */
lit_read_code_unit_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with characters */
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
ecma_char_ptr_t out_code_unit_p) /**< [out] decoded result */
{
lit_code_point_t code_point = 0;
ecma_char_t code_unit = LIT_CHAR_NULL;
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
{
code_point <<= 4;
code_unit = (ecma_char_t) (code_unit << 4u);
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
{
code_point |= (uint32_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
}
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
{
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
}
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
{
code_point |= (uint32_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
}
else
{
@ -324,9 +325,9 @@ lit_read_code_point_from_hex (lit_utf8_byte_t *buf_p, /**< buffer with character
buf_p++;
}
*out_code_point_p = code_point;
*out_code_unit_p = code_unit;
return true;
} /* lit_read_code_point_from_hex */
} /* lit_read_code_unit_from_hex */
/**
* Check if specified character is a word character (part of IsWordChar abstract operation)

View File

@ -1,4 +1,5 @@
/* Copyright 2015-2016 Samsung Electronics Co., Ltd.
* Copyright 2016 University of Szeged.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +19,8 @@
#include "lit-globals.h"
#define LIT_CHAR_UNDEF ((ecma_char_t) 0xFFFF) /* undefined character */
/*
* Format control characters (ECMA-262 v5, Table 1)
*/
@ -213,7 +216,7 @@ extern bool lit_char_is_hex_digit (ecma_char_t);
extern uint32_t lit_char_hex_to_int (ecma_char_t);
/* read a hex encoded code point from a zero terminated buffer */
bool lit_read_code_point_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, lit_code_point_t *);
bool lit_read_code_unit_from_hex (lit_utf8_byte_t *, lit_utf8_size_t, ecma_char_ptr_t);
/**
* Null character

View File

@ -41,12 +41,12 @@
*/
static void
re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */
uint32_t start, /**< character class range from */
uint32_t end) /**< character class range to */
ecma_char_t start, /**< character class range from */
ecma_char_t end) /**< character class range to */
{
re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p;
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) start);
re_append_char (ctx_p->bytecode_ctx_p, (ecma_char_t) end);
re_append_char (ctx_p->bytecode_ctx_p, start);
re_append_char (ctx_p->bytecode_ctx_p, end);
ctx_p->parser_ctx_p->num_of_classes++;
} /* re_append_char_class */

View File

@ -315,7 +315,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
{
re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type;
out_token_p->qmax = out_token_p->qmin = 1;
uint32_t start = RE_CHAR_UNDEF;
ecma_char_t start = LIT_CHAR_UNDEF;
bool is_range = false;
parser_ctx_p->num_of_classes = 0;
@ -332,11 +332,11 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
}
uint32_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
if (ch == LIT_CHAR_RIGHT_SQUARE)
{
if (start != RE_CHAR_UNDEF)
if (start != LIT_CHAR_UNDEF)
{
append_char_class (re_ctx_p, start, start);
}
@ -349,7 +349,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '-'"));
}
if (start != RE_CHAR_UNDEF
if (start != LIT_CHAR_UNDEF
&& !is_range
&& *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE)
{
@ -412,40 +412,40 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
}
else if (ch == LIT_CHAR_LOWERCASE_X)
{
lit_code_point_t code_point;
ecma_char_t code_unit;
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
}
parser_ctx_p->input_curr_p += 2;
append_char_class (re_ctx_p, code_point, code_point);
append_char_class (re_ctx_p, code_unit, code_unit);
}
else if (ch == LIT_CHAR_LOWERCASE_U)
{
lit_code_point_t code_point;
ecma_char_t code_unit;
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
}
parser_ctx_p->input_curr_p += 4;
append_char_class (re_ctx_p, code_point, code_point);
append_char_class (re_ctx_p, code_unit, code_unit);
}
else if (ch == LIT_CHAR_LOWERCASE_D)
{
/* See ECMA-262 v5, 15.10.2.12 */
append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch == LIT_CHAR_UPPERCASE_D)
{
/* See ECMA-262 v5, 15.10.2.12 */
append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1);
append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch == LIT_CHAR_LOWERCASE_S)
{
@ -461,7 +461,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */
append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */
append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch == LIT_CHAR_UPPERCASE_S)
{
@ -478,7 +478,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1);
append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch == LIT_CHAR_LOWERCASE_W)
{
@ -487,7 +487,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z);
append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE);
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch == LIT_CHAR_UPPERCASE_W)
{
@ -497,20 +497,19 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1);
append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1);
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
ch = RE_CHAR_UNDEF;
ch = LIT_CHAR_UNDEF;
}
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
&& lit_char_is_octal_digit ((ecma_char_t) ch)
else if (lit_char_is_octal_digit ((ecma_char_t) ch)
&& ch != LIT_CHAR_0)
{
parser_ctx_p->input_curr_p--;
ch = re_parse_octal (parser_ctx_p);
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
}
} /* ch == LIT_CHAR_BACKSLASH */
if (ch == RE_CHAR_UNDEF)
if (ch == LIT_CHAR_UNDEF)
{
if (start != RE_CHAR_UNDEF)
if (start != LIT_CHAR_UNDEF)
{
if (is_range)
{
@ -519,13 +518,13 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
else
{
append_char_class (re_ctx_p, start, start);
start = RE_CHAR_UNDEF;
start = LIT_CHAR_UNDEF;
}
}
}
else
{
if (start != RE_CHAR_UNDEF)
if (start != LIT_CHAR_UNDEF)
{
if (is_range)
{
@ -536,7 +535,7 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
else
{
append_char_class (re_ctx_p, start, ch);
start = RE_CHAR_UNDEF;
start = LIT_CHAR_UNDEF;
is_range = false;
}
}
@ -667,28 +666,28 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
else if (ch == LIT_CHAR_LOWERCASE_X
&& re_hex_lookup (parser_ctx_p, 2))
{
lit_code_point_t code_point;
ecma_char_t code_unit;
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 2, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
}
parser_ctx_p->input_curr_p += 2;
out_token_p->value = code_point;
out_token_p->value = code_unit;
}
else if (ch == LIT_CHAR_LOWERCASE_U
&& re_hex_lookup (parser_ctx_p, 4))
{
lit_code_point_t code_point;
ecma_char_t code_unit;
if (!lit_read_code_point_from_hex (parser_ctx_p->input_curr_p, 4, &code_point))
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
{
return ecma_raise_syntax_error (ECMA_ERR_MSG ("decode error"));
}
parser_ctx_p->input_curr_p += 4;
out_token_p->value = code_point;
out_token_p->value = code_unit;
}
else if (ch == LIT_CHAR_LOWERCASE_D)
{

View File

@ -75,11 +75,6 @@ typedef enum
*/
#define RE_MAX_RE_DECESC_DIGITS 9
/**
* Undefined character (out of the range of the codeunit)
*/
#define RE_CHAR_UNDEF 0xFFFFFFFF
/**
* RegExp token type
*/
@ -104,7 +99,7 @@ typedef struct
uint32_t num_of_classes; /**< number of character classes */
} re_parser_ctx_t;
typedef void (*re_char_class_callback) (void *re_ctx_p, uint32_t start, uint32_t end);
typedef void (*re_char_class_callback) (void *re_ctx_p, ecma_char_t start, ecma_char_t end);
ecma_value_t
re_parse_char_class (re_parser_ctx_t *, re_char_class_callback, void *, re_token_t *);