mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
There are some leftover global functions in the code that are not referenced at all anymore. These functions are removed by this patch. There are also some global functions that are only used in their own modules. These functions are made static by this patch. JerryScript-DCO-1.0-Signed-off-by: Akos Kiss akiss@inf.u-szeged.hu
769 lines
23 KiB
C
769 lines
23 KiB
C
/* Copyright JS Foundation and other contributors, http://js.foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "lit-char-helpers.h"
|
|
#include "lit/lit-unicode-ranges.inc.h"
|
|
#include "lit-strings.h"
|
|
|
|
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
|
#include "lit-unicode-conversions.inc.h"
|
|
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
|
|
|
#define NUM_OF_ELEMENTS(array) (sizeof (array) / sizeof ((array)[0]))
|
|
|
|
/**
|
|
* Binary search algorithm that searches the a
|
|
* character in the given char array.
|
|
*
|
|
* @return true - if the character is in the given array
|
|
* false - otherwise
|
|
*/
|
|
static bool
|
|
search_char_in_char_array (ecma_char_t c, /**< code unit */
|
|
const ecma_char_t *array, /**< array */
|
|
int size_of_array) /**< length of the array */
|
|
{
|
|
int bottom = 0;
|
|
int top = size_of_array - 1;
|
|
|
|
while (bottom <= top)
|
|
{
|
|
int middle = (bottom + top) / 2;
|
|
ecma_char_t current = array[middle];
|
|
|
|
if (current == c)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (c < current)
|
|
{
|
|
top = middle - 1;
|
|
}
|
|
else
|
|
{
|
|
bottom = middle + 1;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
} /* search_char_in_char_array */
|
|
|
|
/**
|
|
* Binary search algorithm that searches a character in the given intervals.
|
|
* Intervals specifed by two arrays. The first one contains the starting points
|
|
* of the intervals, the second one contains the length of them.
|
|
*
|
|
* @return true - if the the character is included (inclusively) in one of the intervals in the given array
|
|
* false - otherwise
|
|
*/
|
|
static bool
|
|
search_char_in_interval_array (ecma_char_t c, /**< code unit */
|
|
const ecma_char_t *array_sp, /**< array of interval starting points */
|
|
const uint8_t *lengths, /**< array of interval lengths */
|
|
int size_of_array) /**< length of the array */
|
|
{
|
|
int bottom = 0;
|
|
int top = size_of_array - 1;
|
|
|
|
while (bottom <= top)
|
|
{
|
|
int middle = (bottom + top) / 2;
|
|
ecma_char_t current_sp = array_sp[middle];
|
|
|
|
if (current_sp <= c && c <= current_sp + lengths[middle])
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (c > current_sp)
|
|
{
|
|
bottom = middle + 1;
|
|
}
|
|
else
|
|
{
|
|
top = middle - 1;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
} /* search_char_in_interval_array */
|
|
|
|
/**
|
|
* Check if specified character is one of the Whitespace characters including those
|
|
* that fall into "Space, Separator" ("Zs") Unicode character category.
|
|
*
|
|
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 2,
|
|
* false - otherwise
|
|
*/
|
|
bool
|
|
lit_char_is_white_space (ecma_char_t c) /**< code unit */
|
|
{
|
|
if (c <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
|
{
|
|
return (c == LIT_CHAR_TAB
|
|
|| c == LIT_CHAR_VTAB
|
|
|| c == LIT_CHAR_FF
|
|
|| c == LIT_CHAR_SP);
|
|
}
|
|
else
|
|
{
|
|
return (c == LIT_CHAR_NBSP
|
|
|| c == LIT_CHAR_BOM
|
|
|| (c >= lit_unicode_separator_char_interval_sps[0]
|
|
&& c <= lit_unicode_separator_char_interval_sps[0] + lit_unicode_separator_char_interval_lengths[0])
|
|
|| search_char_in_char_array (c,
|
|
lit_unicode_separator_chars,
|
|
NUM_OF_ELEMENTS (lit_unicode_separator_chars)));
|
|
}
|
|
} /* lit_char_is_white_space */
|
|
|
|
/**
|
|
* Check if specified character is one of LineTerminator characters
|
|
*
|
|
* @return true - if the character is one of characters, listed in ECMA-262 v5, Table 3,
|
|
* false - otherwise
|
|
*/
|
|
bool
|
|
lit_char_is_line_terminator (ecma_char_t c) /**< code unit */
|
|
{
|
|
return (c == LIT_CHAR_LF
|
|
|| c == LIT_CHAR_CR
|
|
|| c == LIT_CHAR_LS
|
|
|| c == LIT_CHAR_PS);
|
|
} /* lit_char_is_line_terminator */
|
|
|
|
/**
|
|
* Check if specified character is a unicode letter
|
|
*
|
|
* Note:
|
|
* Unicode letter is a character, included into one of the following categories:
|
|
* - Uppercase letter (Lu);
|
|
* - Lowercase letter (Ll);
|
|
* - Titlecase letter (Lt);
|
|
* - Modifier letter (Lm);
|
|
* - Other letter (Lo);
|
|
* - Letter number (Nl).
|
|
*
|
|
* See also:
|
|
* ECMA-262 v5, 7.6
|
|
*
|
|
* @return true - if specified character falls into one of the listed categories,
|
|
* false - otherwise
|
|
*/
|
|
static bool
|
|
lit_char_is_unicode_letter (ecma_char_t c) /**< code unit */
|
|
{
|
|
return (search_char_in_interval_array (c,
|
|
lit_unicode_letter_interval_sps,
|
|
lit_unicode_letter_interval_lengths,
|
|
NUM_OF_ELEMENTS (lit_unicode_letter_interval_sps))
|
|
|| search_char_in_char_array (c, lit_unicode_letter_chars, NUM_OF_ELEMENTS (lit_unicode_letter_chars)));
|
|
} /* lit_char_is_unicode_letter */
|
|
|
|
/**
|
|
* Check if specified character is a non-letter character and can be used as a
|
|
* non-first character of an identifier.
|
|
* These characters coverd by the following unicode categories:
|
|
* - digit (Nd)
|
|
* - punctuation mark (Mn, Mc)
|
|
* - connector punctuation (Pc)
|
|
*
|
|
* See also:
|
|
* ECMA-262 v5, 7.6
|
|
*
|
|
* @return true - if specified character falls into one of the listed categories,
|
|
* false - otherwise
|
|
*/
|
|
static bool
|
|
lit_char_is_unicode_non_letter_ident_part (ecma_char_t c) /**< code unit */
|
|
{
|
|
return (search_char_in_interval_array (c,
|
|
lit_unicode_non_letter_ident_part_interval_sps,
|
|
lit_unicode_non_letter_ident_part_interval_lengths,
|
|
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_interval_sps))
|
|
|| search_char_in_char_array (c,
|
|
lit_unicode_non_letter_ident_part_chars,
|
|
NUM_OF_ELEMENTS (lit_unicode_non_letter_ident_part_chars)));
|
|
} /* lit_char_is_unicode_non_letter_ident_part */
|
|
|
|
/**
|
|
* Checks whether the next UTF8 character is a valid identifier start.
|
|
*
|
|
* @return true if it is.
|
|
*/
|
|
bool
|
|
lit_char_is_identifier_start (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
|
{
|
|
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
|
{
|
|
return lit_char_is_identifier_start_character (*src_p);
|
|
}
|
|
|
|
/* ECMAScript 2015 specification allows some code points in supplementary plane.
|
|
* However, we don't permit characters in supplementary characters as start of identifier.
|
|
*/
|
|
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return lit_char_is_identifier_start_character (lit_utf8_peek_next (src_p));
|
|
} /* lit_char_is_identifier_start */
|
|
|
|
/**
|
|
* Checks whether the character is a valid identifier start.
|
|
*
|
|
* @return true if it is.
|
|
*/
|
|
bool
|
|
lit_char_is_identifier_start_character (uint16_t chr) /**< EcmaScript character */
|
|
{
|
|
/* Fast path for ASCII-defined letters. */
|
|
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
|
{
|
|
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
|
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
|
|| chr == LIT_CHAR_DOLLAR_SIGN
|
|
|| chr == LIT_CHAR_UNDERSCORE);
|
|
}
|
|
|
|
return lit_char_is_unicode_letter (chr);
|
|
} /* lit_char_is_identifier_start_character */
|
|
|
|
/**
|
|
* Checks whether the next UTF8 character is a valid identifier part.
|
|
*
|
|
* @return true if it is.
|
|
*/
|
|
bool
|
|
lit_char_is_identifier_part (const uint8_t *src_p) /**< pointer to a vaild UTF8 character */
|
|
{
|
|
if (*src_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
|
{
|
|
return lit_char_is_identifier_part_character (*src_p);
|
|
}
|
|
|
|
/* ECMAScript 2015 specification allows some code points in supplementary plane.
|
|
* However, we don't permit characters in supplementary characters as part of identifier.
|
|
*/
|
|
if ((*src_p & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return lit_char_is_identifier_part_character (lit_utf8_peek_next (src_p));
|
|
} /* lit_char_is_identifier_part */
|
|
|
|
/**
|
|
* Checks whether the character is a valid identifier part.
|
|
*
|
|
* @return true if it is.
|
|
*/
|
|
bool
|
|
lit_char_is_identifier_part_character (uint16_t chr) /**< EcmaScript character */
|
|
{
|
|
/* Fast path for ASCII-defined letters. */
|
|
if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX)
|
|
{
|
|
return ((LEXER_TO_ASCII_LOWERCASE (chr) >= LIT_CHAR_LOWERCASE_A
|
|
&& LEXER_TO_ASCII_LOWERCASE (chr) <= LIT_CHAR_LOWERCASE_Z)
|
|
|| (chr >= LIT_CHAR_0 && chr <= LIT_CHAR_9)
|
|
|| chr == LIT_CHAR_DOLLAR_SIGN
|
|
|| chr == LIT_CHAR_UNDERSCORE);
|
|
}
|
|
|
|
return (lit_char_is_unicode_letter (chr)
|
|
|| lit_char_is_unicode_non_letter_ident_part (chr));
|
|
} /* lit_char_is_identifier_part_character */
|
|
|
|
/**
|
|
* Check if specified character is one of OctalDigit characters (ECMA-262 v5, B.1.2)
|
|
*
|
|
* @return true / false
|
|
*/
|
|
bool
|
|
lit_char_is_octal_digit (ecma_char_t c) /**< code unit */
|
|
{
|
|
return (c >= LIT_CHAR_ASCII_OCTAL_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_OCTAL_DIGITS_END);
|
|
} /* lit_char_is_octal_digit */
|
|
|
|
/**
|
|
* Check if specified character is one of DecimalDigit characters (ECMA-262 v5, 7.8.3)
|
|
*
|
|
* @return true / false
|
|
*/
|
|
bool
|
|
lit_char_is_decimal_digit (ecma_char_t c) /**< code unit */
|
|
{
|
|
return (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END);
|
|
} /* lit_char_is_decimal_digit */
|
|
|
|
/**
|
|
* Check if specified character is one of HexDigit characters (ECMA-262 v5, 7.8.3)
|
|
*
|
|
* @return true / false
|
|
*/
|
|
bool
|
|
lit_char_is_hex_digit (ecma_char_t c) /**< code unit */
|
|
{
|
|
return ((c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
|
|| (LEXER_TO_ASCII_LOWERCASE (c) >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
|
|
&& LEXER_TO_ASCII_LOWERCASE (c) <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END));
|
|
} /* lit_char_is_hex_digit */
|
|
|
|
/**
|
|
* Convert a HexDigit character to its numeric value, as defined in ECMA-262 v5, 7.8.3
|
|
*
|
|
* @return digit value, corresponding to the hex char
|
|
*/
|
|
uint32_t
|
|
lit_char_hex_to_int (ecma_char_t c) /**< code unit, corresponding to
|
|
* one of HexDigit characters */
|
|
{
|
|
JERRY_ASSERT (lit_char_is_hex_digit (c));
|
|
|
|
if (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
|
{
|
|
return (uint32_t) (c - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
|
}
|
|
else if (c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
|
{
|
|
return (uint32_t) (c - LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN + 10);
|
|
}
|
|
else
|
|
{
|
|
return (uint32_t) (c - LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN + 10);
|
|
}
|
|
} /* lit_char_hex_to_int */
|
|
|
|
/**
|
|
* Converts a character to UTF8 bytes.
|
|
*
|
|
* @return length of the UTF8 representation.
|
|
*/
|
|
size_t
|
|
lit_char_to_utf8_bytes (uint8_t *dst_p, /**< destination buffer */
|
|
ecma_char_t chr) /**< EcmaScript character */
|
|
{
|
|
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
|
{
|
|
/* 00000000 0xxxxxxx -> 0xxxxxxx */
|
|
*dst_p = (uint8_t) chr;
|
|
return 1;
|
|
}
|
|
|
|
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
|
{
|
|
/* 00000yyy yyxxxxxx -> 110yyyyy 10xxxxxx */
|
|
*(dst_p++) = (uint8_t) (LIT_UTF8_2_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_5_BITS_MASK));
|
|
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
|
return 2;
|
|
}
|
|
|
|
/* zzzzyyyy yyxxxxxx -> 1110zzzz 10yyyyyy 10xxxxxx */
|
|
*(dst_p++) = (uint8_t) (LIT_UTF8_3_BYTE_MARKER | ((chr >> 12) & LIT_UTF8_LAST_4_BITS_MASK));
|
|
*(dst_p++) = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | ((chr >> 6) & LIT_UTF8_LAST_6_BITS_MASK));
|
|
*dst_p = (uint8_t) (LIT_UTF8_EXTRA_BYTE_MARKER | (chr & LIT_UTF8_LAST_6_BITS_MASK));
|
|
return 3;
|
|
} /* lit_char_to_utf8_bytes */
|
|
|
|
/**
|
|
* Returns the length of the UTF8 representation of a character.
|
|
*
|
|
* @return length of the UTF8 representation.
|
|
*/
|
|
size_t
|
|
lit_char_get_utf8_length (ecma_char_t chr) /**< EcmaScript character */
|
|
{
|
|
if (!(chr & ~LIT_UTF8_1_BYTE_CODE_POINT_MAX))
|
|
{
|
|
/* 00000000 0xxxxxxx */
|
|
return 1;
|
|
}
|
|
|
|
if (!(chr & ~LIT_UTF8_2_BYTE_CODE_POINT_MAX))
|
|
{
|
|
/* 00000yyy yyxxxxxx */
|
|
return 2;
|
|
}
|
|
|
|
/* zzzzyyyy yyxxxxxx */
|
|
return 3;
|
|
} /* lit_char_get_utf8_length */
|
|
|
|
/**
|
|
* Parse the next number_of_characters hexadecimal character,
|
|
* and construct a code unit from them. The buffer must
|
|
* be zero terminated.
|
|
*
|
|
* @return true if decoding was successful, false otherwise
|
|
*/
|
|
bool
|
|
lit_read_code_unit_from_hex (const lit_utf8_byte_t *buf_p, /**< buffer with characters */
|
|
lit_utf8_size_t number_of_characters, /**< number of characters to be read */
|
|
ecma_char_ptr_t out_code_unit_p) /**< [out] decoded result */
|
|
{
|
|
ecma_char_t code_unit = LIT_CHAR_NULL;
|
|
|
|
JERRY_ASSERT (number_of_characters >= 2 && number_of_characters <= 4);
|
|
|
|
for (lit_utf8_size_t i = 0; i < number_of_characters; i++)
|
|
{
|
|
code_unit = (ecma_char_t) (code_unit << 4u);
|
|
|
|
if (*buf_p >= LIT_CHAR_ASCII_DIGITS_BEGIN
|
|
&& *buf_p <= LIT_CHAR_ASCII_DIGITS_END)
|
|
{
|
|
code_unit |= (ecma_char_t) (*buf_p - LIT_CHAR_ASCII_DIGITS_BEGIN);
|
|
}
|
|
else if (*buf_p >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN
|
|
&& *buf_p <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_END)
|
|
{
|
|
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_LOWERCASE_LETTERS_HEX_BEGIN - 10));
|
|
}
|
|
else if (*buf_p >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN
|
|
&& *buf_p <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_END)
|
|
{
|
|
code_unit |= (ecma_char_t) (*buf_p - (LIT_CHAR_ASCII_UPPERCASE_LETTERS_HEX_BEGIN - 10));
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
|
|
buf_p++;
|
|
}
|
|
|
|
*out_code_unit_p = code_unit;
|
|
return true;
|
|
} /* lit_read_code_unit_from_hex */
|
|
|
|
/**
|
|
* Check if specified character is a word character (part of IsWordChar abstract operation)
|
|
*
|
|
* See also: ECMA-262 v5, 15.10.2.6 (IsWordChar)
|
|
*
|
|
* @return true - if the character is a word character
|
|
* false - otherwise
|
|
*/
|
|
bool
|
|
lit_char_is_word_char (ecma_char_t c) /**< code unit */
|
|
{
|
|
return ((c >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
|
|| (c >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && c <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
|
|| (c >= LIT_CHAR_ASCII_DIGITS_BEGIN && c <= LIT_CHAR_ASCII_DIGITS_END)
|
|
|| c == LIT_CHAR_UNDERSCORE);
|
|
} /* lit_char_is_word_char */
|
|
|
|
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
|
|
|
/**
|
|
* Check if the specified character is in one of those tables which contain bidirectional conversions.
|
|
*
|
|
* @return the mapped character sequence of an ecma character, if it's in the table.
|
|
* 0 - otherwise.
|
|
*/
|
|
static ecma_length_t
|
|
search_in_bidirectional_conversion_tables (ecma_char_t character, /**< code unit */
|
|
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
|
bool is_lowercase) /**< is lowercase conversion */
|
|
{
|
|
/* 1, Check if the specified character is part of the lit_character_case_ranges table. */
|
|
int number_of_case_ranges = NUM_OF_ELEMENTS (lit_character_case_ranges);
|
|
int conv_counter = 0;
|
|
|
|
for (int i = 0; i < number_of_case_ranges; i++)
|
|
{
|
|
if (i % 2 == 0 && i > 0)
|
|
{
|
|
conv_counter++;
|
|
}
|
|
|
|
int range_length = lit_character_case_range_lengths[conv_counter];
|
|
ecma_char_t start_point = lit_character_case_ranges[i];
|
|
|
|
if (start_point > character || character >= start_point + range_length)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
int char_dist = character - start_point;
|
|
|
|
if (i % 2 == 0)
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (lit_character_case_ranges[i + 1] + char_dist) : character;
|
|
}
|
|
else
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (lit_character_case_ranges[i - 1] + char_dist);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* 2, Check if the specified character is part of the character_pair_ranges table. */
|
|
int bottom = 0;
|
|
int top = NUM_OF_ELEMENTS (lit_character_pair_ranges) - 1;
|
|
|
|
while (bottom <= top)
|
|
{
|
|
int middle = (bottom + top) / 2;
|
|
ecma_char_t current_sp = lit_character_pair_ranges[middle];
|
|
|
|
if (current_sp <= character && character < current_sp + lit_character_pair_range_lengths[middle])
|
|
{
|
|
int char_dist = character - current_sp;
|
|
|
|
if ((character - current_sp) % 2 == 0)
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? (ecma_char_t) (current_sp + char_dist + 1) : character;
|
|
}
|
|
else
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? character : (ecma_char_t) (current_sp + char_dist - 1);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
if (character > current_sp)
|
|
{
|
|
bottom = middle + 1;
|
|
}
|
|
else
|
|
{
|
|
top = middle - 1;
|
|
}
|
|
}
|
|
|
|
/* 3, Check if the specified character is part of the character_pairs table. */
|
|
int number_of_character_pairs = NUM_OF_ELEMENTS (lit_character_pairs);
|
|
|
|
for (int i = 0; i < number_of_character_pairs; i++)
|
|
{
|
|
if (character != lit_character_pairs[i])
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (i % 2 == 0)
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? lit_character_pairs[i + 1] : character;
|
|
}
|
|
else
|
|
{
|
|
output_buffer_p[0] = is_lowercase ? character : lit_character_pairs[i - 1];
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
} /* search_in_bidirectional_conversion_tables */
|
|
|
|
/**
|
|
* Check if the specified character is in the given conversion table.
|
|
*
|
|
* @return the mapped character sequence of an ecma character, if it's in the table.
|
|
* 0 - otherwise.
|
|
*/
|
|
static ecma_length_t
|
|
search_in_conversion_table (ecma_char_t character, /**< code unit */
|
|
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
|
const ecma_char_t *array, /**< array */
|
|
const uint8_t *counters) /**< case_values counter */
|
|
{
|
|
int end_point = 0;
|
|
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
int start_point = end_point;
|
|
int size_of_case_value = i + 1;
|
|
end_point += counters[i] * (size_of_case_value + 1);
|
|
|
|
int bottom = start_point;
|
|
int top = end_point - size_of_case_value;
|
|
|
|
while (bottom <= top)
|
|
{
|
|
int middle = (bottom + top) / 2;
|
|
|
|
middle -= ((middle - bottom) % (size_of_case_value + 1));
|
|
|
|
ecma_char_t current = array[middle];
|
|
|
|
if (current == character)
|
|
{
|
|
ecma_length_t char_sequence = 1;
|
|
|
|
switch (size_of_case_value)
|
|
{
|
|
case 3:
|
|
{
|
|
output_buffer_p[2] = array[middle + 3];
|
|
char_sequence++;
|
|
/* FALLTHRU */
|
|
}
|
|
case 2:
|
|
{
|
|
output_buffer_p[1] = array[middle + 2];
|
|
char_sequence++;
|
|
/* FALLTHRU */
|
|
}
|
|
default:
|
|
{
|
|
output_buffer_p[0] = array[middle + 1];
|
|
return char_sequence;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (character < current)
|
|
{
|
|
top = middle - (size_of_case_value + 1);
|
|
}
|
|
else
|
|
{
|
|
bottom = middle + (size_of_case_value + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
} /* search_in_conversion_table */
|
|
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
|
|
|
/**
|
|
* Returns the lowercase character sequence of an ecma character.
|
|
*
|
|
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
|
*
|
|
* @return the length of the lowercase character sequence
|
|
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
|
*/
|
|
ecma_length_t
|
|
lit_char_to_lower_case (ecma_char_t character, /**< input character value */
|
|
ecma_char_t *output_buffer_p, /**< [out] buffer for the result characters */
|
|
ecma_length_t buffer_size) /**< buffer size */
|
|
{
|
|
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
|
|
if (character >= LIT_CHAR_UPPERCASE_A && character <= LIT_CHAR_UPPERCASE_Z)
|
|
{
|
|
output_buffer_p[0] = (ecma_char_t) (character + (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
|
return 1;
|
|
}
|
|
|
|
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
|
|
|
ecma_length_t lowercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, true);
|
|
|
|
if (lowercase_sequence != 0)
|
|
{
|
|
return lowercase_sequence;
|
|
}
|
|
|
|
int num_of_lowercase_ranges = NUM_OF_ELEMENTS (lit_lower_case_ranges);
|
|
|
|
for (int i = 0, j = 0; i < num_of_lowercase_ranges; i += 2, j++)
|
|
{
|
|
int range_length = lit_lower_case_range_lengths[j] - 1;
|
|
ecma_char_t start_point = lit_lower_case_ranges[i];
|
|
|
|
if (start_point <= character && character <= start_point + range_length)
|
|
{
|
|
output_buffer_p[0] = (ecma_char_t) (lit_lower_case_ranges[i + 1] + (character - start_point));
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
lowercase_sequence = search_in_conversion_table (character,
|
|
output_buffer_p,
|
|
lit_lower_case_conversions,
|
|
lit_lower_case_conversion_counters);
|
|
|
|
if (lowercase_sequence != 0)
|
|
{
|
|
return lowercase_sequence;
|
|
}
|
|
|
|
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
|
|
|
output_buffer_p[0] = character;
|
|
return 1;
|
|
} /* lit_char_to_lower_case */
|
|
|
|
/**
|
|
* Returns the uppercase character sequence of an ecma character.
|
|
*
|
|
* Note: output_buffer_p must be able to hold at least LIT_MAXIMUM_OTHER_CASE_LENGTH characters.
|
|
*
|
|
* @return the length of the uppercase character sequence
|
|
* which is always between 1 and LIT_MAXIMUM_OTHER_CASE_LENGTH.
|
|
*/
|
|
ecma_length_t
|
|
lit_char_to_upper_case (ecma_char_t character, /**< input character value */
|
|
ecma_char_t *output_buffer_p, /**< buffer for the result characters */
|
|
ecma_length_t buffer_size) /**< buffer size */
|
|
{
|
|
JERRY_ASSERT (buffer_size >= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
|
|
if (character >= LIT_CHAR_LOWERCASE_A && character <= LIT_CHAR_LOWERCASE_Z)
|
|
{
|
|
output_buffer_p[0] = (ecma_char_t) (character - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
|
return 1;
|
|
}
|
|
|
|
#ifndef CONFIG_DISABLE_UNICODE_CASE_CONVERSION
|
|
|
|
ecma_length_t uppercase_sequence = search_in_bidirectional_conversion_tables (character, output_buffer_p, false);
|
|
|
|
if (uppercase_sequence != 0)
|
|
{
|
|
return uppercase_sequence;
|
|
}
|
|
|
|
int num_of_upper_case_special_ranges = NUM_OF_ELEMENTS (lit_upper_case_special_ranges);
|
|
|
|
for (int i = 0, j = 0; i < num_of_upper_case_special_ranges; i += 3, j++)
|
|
{
|
|
int range_length = lit_upper_case_special_range_lengths[j];
|
|
ecma_char_t start_point = lit_upper_case_special_ranges[i];
|
|
|
|
if (start_point <= character && character <= start_point + range_length)
|
|
{
|
|
output_buffer_p[0] = (ecma_char_t) (lit_upper_case_special_ranges[i + 1] + (character - start_point));
|
|
output_buffer_p[1] = (ecma_char_t) (lit_upper_case_special_ranges[i + 2]);
|
|
return 2;
|
|
}
|
|
}
|
|
|
|
uppercase_sequence = search_in_conversion_table (character,
|
|
output_buffer_p,
|
|
lit_upper_case_conversions,
|
|
lit_upper_case_conversion_counters);
|
|
|
|
if (uppercase_sequence != 0)
|
|
{
|
|
return uppercase_sequence;
|
|
}
|
|
|
|
#endif /* !CONFIG_DISABLE_UNICODE_CASE_CONVERSION */
|
|
|
|
output_buffer_p[0] = character;
|
|
return 1;
|
|
} /* lit_char_to_upper_case */
|