Add API functions to create string from a valid UTF-8 string. (#1430)

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka 2016-11-16 10:34:12 +01:00 committed by GitHub
parent 405092e700
commit ffaca583f3
4 changed files with 151 additions and 0 deletions

View File

@ -215,6 +215,120 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
return string_desc_p;
} /* ecma_new_ecma_string_from_utf8 */
/**
* Allocate a new ecma-string and initialize it from the utf8 string argument.
* All 4-bytes long unicode sequences are converted into two 3-bytes long sequences.
*
* @return pointer to ecma-string descriptor
*/
ecma_string_t *
ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p, /**< utf-8 string */
lit_utf8_size_t string_size) /**< utf-8 string size */
{
JERRY_ASSERT (string_p != NULL || string_size == 0);
ecma_string_t *string_desc_p = NULL;
ecma_length_t string_length = 0;
lit_utf8_size_t converted_string_size = 0;
lit_utf8_size_t pos = 0;
/* Calculate the required length and size information of the converted cesu-8 encoded string */
while (pos < string_size)
{
if ((string_p[pos] & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
pos++;
}
else if ((string_p[pos] & LIT_UTF8_2_BYTE_MASK) == LIT_UTF8_2_BYTE_MARKER)
{
pos += 2;
}
else if ((string_p[pos] & LIT_UTF8_3_BYTE_MASK) == LIT_UTF8_3_BYTE_MARKER)
{
pos += 3;
}
else
{
JERRY_ASSERT ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER);
pos += 4;
converted_string_size += 2;
}
string_length++;
}
JERRY_ASSERT (pos == string_size);
if (converted_string_size == 0)
{
return ecma_new_ecma_string_from_utf8 (string_p, string_size);
}
else
{
converted_string_size += string_size;
JERRY_ASSERT (lit_is_utf8_string_valid (string_p, string_size));
lit_utf8_byte_t *data_p;
if (likely (string_size <= UINT16_MAX))
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + converted_string_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) converted_string_size;
string_desc_p->u.utf8_string.length = (uint16_t) string_length;
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
}
else
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + converted_string_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.long_utf8_string_size = converted_string_size;
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
long_string_desc_p->long_utf8_string_length = string_length;
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
}
pos = 0;
while (pos < string_size)
{
if ((string_p[pos] & LIT_UTF8_4_BYTE_MASK) == LIT_UTF8_4_BYTE_MARKER)
{
/* Processing 4 byte unicode sequence. Always converted to two 3 byte long sequence. */
uint32_t character = ((((uint32_t) string_p[pos++]) & 0x7) << 18);
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 12);
character |= ((((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK) << 6);
character |= (((uint32_t) string_p[pos++]) & LIT_UTF8_LAST_6_BITS_MASK);
JERRY_ASSERT (character >= 0x10000);
character -= 0x10000;
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xd800 | (character >> 10)));
data_p += lit_char_to_utf8_bytes (data_p, (ecma_char_t) (0xdc00 | (character & LIT_UTF16_LAST_10_BITS_MASK)));
}
else
{
*data_p++ = string_p[pos++];
}
}
JERRY_ASSERT (pos == string_size);
string_desc_p->hash = lit_utf8_string_calc_hash (data_p, converted_string_size);
}
return string_desc_p;
} /* ecma_new_ecma_string_from_utf8_converted_to_cesu8 */
/**
* Allocate new ecma-string and fill it with cesu-8 character which represents specified code unit
*

View File

@ -164,6 +164,7 @@ extern void ecma_free_value_if_not_object (ecma_value_t);
/* ecma-helpers-string.c */
extern ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *, lit_utf8_size_t);
extern ecma_string_t *ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *, lit_utf8_size_t);
extern ecma_string_t *ecma_new_ecma_string_from_code_unit (ecma_char_t);
extern ecma_string_t *ecma_new_ecma_string_from_uint32 (uint32_t);
extern ecma_string_t *ecma_new_ecma_string_from_number (ecma_number_t);

View File

@ -248,6 +248,8 @@ jerry_value_t jerry_create_number_infinity (bool);
jerry_value_t jerry_create_number_nan (void);
jerry_value_t jerry_create_null (void);
jerry_value_t jerry_create_object (void);
jerry_value_t jerry_create_string_from_utf8 (const jerry_char_t *);
jerry_value_t jerry_create_string_sz_from_utf8 (const jerry_char_t *, jerry_size_t);
jerry_value_t jerry_create_string (const jerry_char_t *);
jerry_value_t jerry_create_string_sz (const jerry_char_t *, jerry_size_t);
jerry_value_t jerry_create_undefined (void);

View File

@ -913,6 +913,40 @@ jerry_create_object (void)
return ecma_make_object_value (ecma_op_create_object_object_noarg ());
} /* jerry_create_object */
/**
* Create string from a valid UTF8 string
*
* Note:
* returned value must be freed with jerry_release_value when it is no longer needed.
*
* @return value of the created string
*/
jerry_value_t
jerry_create_string_from_utf8 (const jerry_char_t *str_p) /**< pointer to string */
{
return jerry_create_string_sz_from_utf8 (str_p, lit_zt_utf8_string_size ((lit_utf8_byte_t *) str_p));
} /* jerry_create_string_from_utf8 */
/**
* Create string from a valid UTF8 string
*
* Note:
* returned value must be freed with jerry_release_value when it is no longer needed.
*
* @return value of the created string
*/
jerry_value_t
jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, /**< pointer to string */
jerry_size_t str_size) /**< string size */
{
jerry_assert_api_available ();
ecma_string_t *ecma_str_p = ecma_new_ecma_string_from_utf8_converted_to_cesu8 ((lit_utf8_byte_t *) str_p,
(lit_utf8_size_t) str_size);
return ecma_make_string_value (ecma_str_p);
} /* jerry_create_string_sz_from_utf8 */
/**
* Create string from a valid CESU8 string
*