mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Add long string support.
JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
parent
efdf91a0d6
commit
cfcb4c707c
@ -85,11 +85,6 @@
|
||||
|
||||
#define CONFIG_ECMA_REFERENCE_COUNTER_LIMIT ((1u << CONFIG_ECMA_REFERENCE_COUNTER_WIDTH) - 1u)
|
||||
|
||||
/**
|
||||
* Maximum length of strings' concatenation
|
||||
*/
|
||||
#define CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH (1048576)
|
||||
|
||||
/**
|
||||
* Use 32-bit/64-bit float for ecma-numbers
|
||||
*/
|
||||
|
||||
@ -809,7 +809,10 @@ typedef struct
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string */
|
||||
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
|
||||
* maximum size is 2^16. */
|
||||
ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
|
||||
* maximum size is 2^32. */
|
||||
ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number
|
||||
stored locally in the string's descriptor */
|
||||
ECMA_STRING_CONTAINER_MAGIC_STRING, /**< the ecma-string is equal to one of ECMA magic strings */
|
||||
@ -872,31 +875,32 @@ typedef struct ecma_string_t
|
||||
union
|
||||
{
|
||||
/**
|
||||
* Actual data of an utf-8 string type
|
||||
*/
|
||||
* Actual data of an utf-8 string type
|
||||
*/
|
||||
struct
|
||||
{
|
||||
uint16_t size; /**< Size of this utf-8 string in bytes */
|
||||
uint16_t length; /**< Length of this utf-8 string in characters */
|
||||
uint16_t size; /**< size of this utf-8 string in bytes */
|
||||
uint16_t length; /**< length of this utf-8 string in characters */
|
||||
} utf8_string;
|
||||
|
||||
/** UInt32-represented number placed locally in the descriptor */
|
||||
uint32_t uint32_number;
|
||||
|
||||
/** Identifier of magic string */
|
||||
lit_magic_string_id_t magic_string_id;
|
||||
|
||||
/** Identifier of external magic string */
|
||||
lit_magic_string_ex_id_t magic_string_ex_id;
|
||||
|
||||
/** Literal number */
|
||||
ecma_value_t lit_number;
|
||||
|
||||
/** For zeroing and comparison in some cases */
|
||||
uint32_t common_field;
|
||||
lit_utf8_size_t long_utf8_string_size; /**< size of this long utf-8 string in bytes */
|
||||
uint32_t uint32_number; /**< uint32-represented number placed locally in the descriptor */
|
||||
lit_magic_string_id_t magic_string_id; /**< identifier of a magic string */
|
||||
lit_magic_string_ex_id_t magic_string_ex_id; /**< identifier of an external magic string */
|
||||
ecma_value_t lit_number; /**< literal number (note: not a regular string type) */
|
||||
uint32_t common_field; /**< for zeroing and comparison in some cases */
|
||||
} u;
|
||||
} ecma_string_t;
|
||||
|
||||
/**
|
||||
* Long ECMA string-value descriptor
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
ecma_string_t header; /**< string header */
|
||||
lit_utf8_size_t long_utf8_string_length; /**< length of this long utf-8 string in bytes */
|
||||
} ecma_long_string_t;
|
||||
|
||||
/**
|
||||
* Compiled byte code data.
|
||||
*/
|
||||
|
||||
@ -31,17 +31,6 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Maximum length of strings' concatenation
|
||||
*/
|
||||
#define ECMA_STRING_MAX_CONCATENATION_LENGTH (CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH)
|
||||
|
||||
/**
|
||||
* The length should be representable with int32_t.
|
||||
*/
|
||||
JERRY_STATIC_ASSERT (ECMA_STRING_MAX_CONCATENATION_LENGTH <= INT32_MAX,
|
||||
ECMA_STRING_MAX_CONCATENATION_LENGTH_should_be_representable_with_int32_t);
|
||||
|
||||
/**
|
||||
* The ecma string ref counter should start after the container field.
|
||||
*/
|
||||
@ -124,17 +113,37 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
|
||||
return ecma_get_magic_string_ex (magic_string_ex_id);
|
||||
}
|
||||
|
||||
JERRY_ASSERT (string_size > 0 && string_size <= UINT16_MAX);
|
||||
JERRY_ASSERT (string_size > 0);
|
||||
|
||||
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
|
||||
ecma_string_t *string_desc_p;
|
||||
lit_utf8_byte_t *data_p;
|
||||
|
||||
if (likely (string_size <= UINT16_MAX))
|
||||
{
|
||||
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
|
||||
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
|
||||
|
||||
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + string_size);
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.long_utf8_string_size = string_size;
|
||||
|
||||
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
|
||||
long_string_desc_p->long_utf8_string_length = lit_utf8_string_length (string_p, string_size);
|
||||
|
||||
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
|
||||
}
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size);
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
|
||||
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
|
||||
|
||||
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
|
||||
memcpy (data_p, string_p, string_size);
|
||||
return string_desc_p;
|
||||
} /* ecma_new_ecma_string_from_utf8 */
|
||||
@ -348,6 +357,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
|
||||
utf8_string1_length = string1_p->u.utf8_string.length;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string1_p;
|
||||
|
||||
utf8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
|
||||
utf8_string1_size = string1_p->u.long_utf8_string_size;
|
||||
utf8_string1_length = long_string_desc_p->long_utf8_string_length;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
|
||||
@ -384,6 +402,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
|
||||
utf8_string2_length = string2_p->u.utf8_string.length;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p;
|
||||
|
||||
utf8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
|
||||
utf8_string2_size = string2_p->u.long_utf8_string_size;
|
||||
utf8_string2_length = long_string_desc_p->long_utf8_string_length;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
|
||||
@ -418,20 +445,44 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
|
||||
|
||||
lit_utf8_size_t new_size = utf8_string1_size + utf8_string2_size;
|
||||
|
||||
JERRY_ASSERT (new_size <= UINT16_MAX);
|
||||
/* It is impossible to allocate this large string. */
|
||||
if (new_size < (utf8_string1_size | utf8_string2_size))
|
||||
{
|
||||
jerry_fatal (ERR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
|
||||
ecma_string_t *string_desc_p;
|
||||
lit_utf8_byte_t *data_p;
|
||||
|
||||
if (likely (new_size <= UINT16_MAX))
|
||||
{
|
||||
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
|
||||
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
|
||||
|
||||
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + new_size);
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.long_utf8_string_size = new_size;
|
||||
|
||||
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
|
||||
long_string_desc_p->long_utf8_string_length = utf8_string1_length + utf8_string2_length;
|
||||
|
||||
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
|
||||
}
|
||||
|
||||
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
|
||||
string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, utf8_string2_p, utf8_string2_size);
|
||||
string_desc_p->u.common_field = 0;
|
||||
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
|
||||
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
|
||||
|
||||
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
|
||||
memcpy (data_p, utf8_string1_p, utf8_string1_size);
|
||||
memcpy (data_p + utf8_string1_size, utf8_string2_p, utf8_string2_size);
|
||||
|
||||
return string_desc_p;
|
||||
} /* ecma_concat_ecma_strings */
|
||||
|
||||
@ -480,6 +531,13 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */
|
||||
jmem_heap_free_block (string_p, string_p->u.utf8_string.size + sizeof (ecma_string_t));
|
||||
return;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
JERRY_ASSERT (string_p->u.long_utf8_string_size > UINT16_MAX);
|
||||
|
||||
jmem_heap_free_block (string_p, string_p->u.long_utf8_string_size + sizeof (ecma_long_string_t));
|
||||
return;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
case ECMA_STRING_CONTAINER_MAGIC_STRING:
|
||||
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
|
||||
@ -518,6 +576,7 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */
|
||||
}
|
||||
|
||||
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
case ECMA_STRING_CONTAINER_MAGIC_STRING:
|
||||
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
|
||||
{
|
||||
@ -562,7 +621,8 @@ ecma_string_get_array_index (const ecma_string_t *str_p, /**< ecma-string */
|
||||
*out_index_p = index;
|
||||
return index != UINT32_MAX;
|
||||
}
|
||||
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING)
|
||||
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING
|
||||
|| type == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -660,6 +720,12 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
|
||||
memcpy (buffer_p, string_desc_p + 1, size);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
size = string_desc_p->u.long_utf8_string_size;
|
||||
memcpy (buffer_p, ((ecma_long_string_t *) string_desc_p) + 1, size);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
const uint32_t uint32_number = string_desc_p->u.uint32_number;
|
||||
@ -775,6 +841,14 @@ ecma_string_raw_chars (const ecma_string_t *string_p, /**< ecma-string */
|
||||
result_p = (const lit_utf8_byte_t *) (string_p + 1);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
size = string_p->u.long_utf8_string_size;
|
||||
ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p;
|
||||
length = long_string_p->long_utf8_string_length;
|
||||
result_p = (const lit_utf8_byte_t *) (long_string_p + 1);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
size = (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
|
||||
@ -890,7 +964,8 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
|
||||
}
|
||||
default:
|
||||
{
|
||||
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING);
|
||||
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
|
||||
|| ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -910,6 +985,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
|
||||
utf8_string1_size = string1_p->u.utf8_string.size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
|
||||
utf8_string1_size = string1_p->u.long_utf8_string_size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
|
||||
@ -942,6 +1023,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
|
||||
utf8_string2_size = string2_p->u.utf8_string.size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
|
||||
utf8_string2_size = string2_p->u.long_utf8_string_size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
|
||||
@ -986,6 +1073,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
|
||||
{
|
||||
JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
|
||||
|
||||
/* Fast paths first. */
|
||||
if (string1_p == string2_p)
|
||||
{
|
||||
return true;
|
||||
@ -998,7 +1086,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
|
||||
|
||||
ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
|
||||
|
||||
if (string1_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
|
||||
if (string1_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
|
||||
&& string1_container == ECMA_STRING_GET_CONTAINER (string2_p))
|
||||
{
|
||||
return string1_p->u.common_field == string2_p->u.common_field;
|
||||
@ -1041,6 +1129,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
|
||||
utf8_string1_size = string1_p->u.utf8_string.size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
|
||||
utf8_string1_size = string1_p->u.long_utf8_string_size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
|
||||
@ -1073,6 +1167,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
|
||||
utf8_string2_size = string2_p->u.utf8_string.size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
|
||||
utf8_string2_size = string2_p->u.long_utf8_string_size;
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
|
||||
@ -1117,6 +1217,10 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
|
||||
{
|
||||
return (ecma_length_t) (string_p->u.utf8_string.length);
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
return (ecma_length_t) (((ecma_long_string_t *) string_p)->long_utf8_string_length);
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
return ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
|
||||
@ -1152,6 +1256,10 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
|
||||
{
|
||||
return (lit_utf8_size_t) string_p->u.utf8_string.size;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
return (lit_utf8_size_t) string_p->u.long_utf8_string_size;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
return (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
|
||||
|
||||
@ -164,7 +164,7 @@ ecma_lcache_lookup (ecma_object_t *object_p, /**< object */
|
||||
JERRY_ASSERT ((prop_name_p->hash & ECMA_LCACHE_HASH_MASK) == (entry_prop_name_p->hash & ECMA_LCACHE_HASH_MASK));
|
||||
|
||||
if (prop_name_p == entry_prop_name_p
|
||||
|| (prop_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
|
||||
|| (prop_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
|
||||
&& prop_container == ECMA_STRING_GET_CONTAINER (entry_prop_name_p)
|
||||
&& prop_name_p->u.common_field == entry_prop_name_p->u.common_field))
|
||||
{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user