Add long string support.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg 2016-08-23 03:53:13 -07:00
parent efdf91a0d6
commit cfcb4c707c
4 changed files with 162 additions and 55 deletions

View File

@ -85,11 +85,6 @@
#define CONFIG_ECMA_REFERENCE_COUNTER_LIMIT ((1u << CONFIG_ECMA_REFERENCE_COUNTER_WIDTH) - 1u)
/**
* Maximum length of strings' concatenation
*/
#define CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH (1048576)
/**
* Use 32-bit/64-bit float for ecma-numbers
*/

View File

@ -809,7 +809,10 @@ typedef struct
*/
typedef enum
{
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string */
ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
* maximum size is 2^16. */
ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string
* maximum size is 2^32. */
ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number
stored locally in the string's descriptor */
ECMA_STRING_CONTAINER_MAGIC_STRING, /**< the ecma-string is equal to one of ECMA magic strings */
@ -872,31 +875,32 @@ typedef struct ecma_string_t
union
{
/**
* Actual data of an utf-8 string type
*/
* Actual data of an utf-8 string type
*/
struct
{
uint16_t size; /**< Size of this utf-8 string in bytes */
uint16_t length; /**< Length of this utf-8 string in characters */
uint16_t size; /**< size of this utf-8 string in bytes */
uint16_t length; /**< length of this utf-8 string in characters */
} utf8_string;
/** UInt32-represented number placed locally in the descriptor */
uint32_t uint32_number;
/** Identifier of magic string */
lit_magic_string_id_t magic_string_id;
/** Identifier of external magic string */
lit_magic_string_ex_id_t magic_string_ex_id;
/** Literal number */
ecma_value_t lit_number;
/** For zeroing and comparison in some cases */
uint32_t common_field;
lit_utf8_size_t long_utf8_string_size; /**< size of this long utf-8 string in bytes */
uint32_t uint32_number; /**< uint32-represented number placed locally in the descriptor */
lit_magic_string_id_t magic_string_id; /**< identifier of a magic string */
lit_magic_string_ex_id_t magic_string_ex_id; /**< identifier of an external magic string */
ecma_value_t lit_number; /**< literal number (note: not a regular string type) */
uint32_t common_field; /**< for zeroing and comparison in some cases */
} u;
} ecma_string_t;
/**
* Long ECMA string-value descriptor
*/
typedef struct
{
ecma_string_t header; /**< string header */
lit_utf8_size_t long_utf8_string_length; /**< length of this long utf-8 string in bytes */
} ecma_long_string_t;
/**
* Compiled byte code data.
*/

View File

@ -31,17 +31,6 @@
* @{
*/
/**
* Maximum length of strings' concatenation
*/
#define ECMA_STRING_MAX_CONCATENATION_LENGTH (CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH)
/**
* The length should be representable with int32_t.
*/
JERRY_STATIC_ASSERT (ECMA_STRING_MAX_CONCATENATION_LENGTH <= INT32_MAX,
ECMA_STRING_MAX_CONCATENATION_LENGTH_should_be_representable_with_int32_t);
/**
* The ecma string ref counter should start after the container field.
*/
@ -124,17 +113,37 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
return ecma_get_magic_string_ex (magic_string_ex_id);
}
JERRY_ASSERT (string_size > 0 && string_size <= UINT16_MAX);
JERRY_ASSERT (string_size > 0);
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
ecma_string_t *string_desc_p;
lit_utf8_byte_t *data_p;
if (likely (string_size <= UINT16_MAX))
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
}
else
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + string_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.long_utf8_string_size = string_size;
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
long_string_desc_p->long_utf8_string_length = lit_utf8_string_length (string_p, string_size);
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
}
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size);
string_desc_p->u.common_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) string_size;
string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size);
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
memcpy (data_p, string_p, string_size);
return string_desc_p;
} /* ecma_new_ecma_string_from_utf8 */
@ -348,6 +357,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
utf8_string1_length = string1_p->u.utf8_string.length;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string1_p;
utf8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
utf8_string1_size = string1_p->u.long_utf8_string_size;
utf8_string1_length = long_string_desc_p->long_utf8_string_length;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@ -384,6 +402,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
utf8_string2_length = string2_p->u.utf8_string.length;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p;
utf8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
utf8_string2_size = string2_p->u.long_utf8_string_size;
utf8_string2_length = long_string_desc_p->long_utf8_string_length;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@ -418,20 +445,44 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */
lit_utf8_size_t new_size = utf8_string1_size + utf8_string2_size;
JERRY_ASSERT (new_size <= UINT16_MAX);
/* It is impossible to allocate this large string. */
if (new_size < (utf8_string1_size | utf8_string2_size))
{
jerry_fatal (ERR_OUT_OF_MEMORY);
}
ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
ecma_string_t *string_desc_p;
lit_utf8_byte_t *data_p;
if (likely (new_size <= UINT16_MAX))
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
}
else
{
string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + new_size);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->u.common_field = 0;
string_desc_p->u.long_utf8_string_size = new_size;
ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p;
long_string_desc_p->long_utf8_string_length = utf8_string1_length + utf8_string2_length;
data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1);
}
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, utf8_string2_p, utf8_string2_size);
string_desc_p->u.common_field = 0;
string_desc_p->u.utf8_string.size = (uint16_t) new_size;
string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length);
lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1);
memcpy (data_p, utf8_string1_p, utf8_string1_size);
memcpy (data_p + utf8_string1_size, utf8_string2_p, utf8_string2_size);
return string_desc_p;
} /* ecma_concat_ecma_strings */
@ -480,6 +531,13 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */
jmem_heap_free_block (string_p, string_p->u.utf8_string.size + sizeof (ecma_string_t));
return;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
JERRY_ASSERT (string_p->u.long_utf8_string_size > UINT16_MAX);
jmem_heap_free_block (string_p, string_p->u.long_utf8_string_size + sizeof (ecma_long_string_t));
return;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
case ECMA_STRING_CONTAINER_MAGIC_STRING:
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
@ -518,6 +576,7 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */
}
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
case ECMA_STRING_CONTAINER_MAGIC_STRING:
case ECMA_STRING_CONTAINER_MAGIC_STRING_EX:
{
@ -562,7 +621,8 @@ ecma_string_get_array_index (const ecma_string_t *str_p, /**< ecma-string */
*out_index_p = index;
return index != UINT32_MAX;
}
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING)
else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING
|| type == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING)
{
return false;
}
@ -660,6 +720,12 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
memcpy (buffer_p, string_desc_p + 1, size);
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
size = string_desc_p->u.long_utf8_string_size;
memcpy (buffer_p, ((ecma_long_string_t *) string_desc_p) + 1, size);
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
const uint32_t uint32_number = string_desc_p->u.uint32_number;
@ -775,6 +841,14 @@ ecma_string_raw_chars (const ecma_string_t *string_p, /**< ecma-string */
result_p = (const lit_utf8_byte_t *) (string_p + 1);
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
size = string_p->u.long_utf8_string_size;
ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p;
length = long_string_p->long_utf8_string_length;
result_p = (const lit_utf8_byte_t *) (long_string_p + 1);
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
size = (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
@ -890,7 +964,8 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
}
default:
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING);
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
|| ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING);
break;
}
}
@ -910,6 +985,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
utf8_string1_size = string1_p->u.utf8_string.size;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
utf8_string1_size = string1_p->u.long_utf8_string_size;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@ -942,6 +1023,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri
utf8_string2_size = string2_p->u.utf8_string.size;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
utf8_string2_size = string2_p->u.long_utf8_string_size;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@ -986,6 +1073,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
{
JERRY_ASSERT (string1_p != NULL && string2_p != NULL);
/* Fast paths first. */
if (string1_p == string2_p)
{
return true;
@ -998,7 +1086,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */
ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p);
if (string1_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
if (string1_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
&& string1_container == ECMA_STRING_GET_CONTAINER (string2_p))
{
return string1_p->u.common_field == string2_p->u.common_field;
@ -1041,6 +1129,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
utf8_string1_size = string1_p->u.utf8_string.size;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1);
utf8_string1_size = string1_p->u.long_utf8_string_size;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number,
@ -1073,6 +1167,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma-
utf8_string2_size = string2_p->u.utf8_string.size;
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1);
utf8_string2_size = string2_p->u.long_utf8_string_size;
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number,
@ -1117,6 +1217,10 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
{
return (ecma_length_t) (string_p->u.utf8_string.length);
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
return (ecma_length_t) (((ecma_long_string_t *) string_p)->long_utf8_string_length);
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
return ecma_string_get_number_in_desc_size (string_p->u.uint32_number);
@ -1152,6 +1256,10 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
{
return (lit_utf8_size_t) string_p->u.utf8_string.size;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
return (lit_utf8_size_t) string_p->u.long_utf8_string_size;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
return (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number);

View File

@ -164,7 +164,7 @@ ecma_lcache_lookup (ecma_object_t *object_p, /**< object */
JERRY_ASSERT ((prop_name_p->hash & ECMA_LCACHE_HASH_MASK) == (entry_prop_name_p->hash & ECMA_LCACHE_HASH_MASK));
if (prop_name_p == entry_prop_name_p
|| (prop_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING
|| (prop_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING
&& prop_container == ECMA_STRING_GET_CONTAINER (entry_prop_name_p)
&& prop_name_p->u.common_field == entry_prop_name_p->u.common_field))
{