From cfcb4c707c47fa1a9da09487f0614d93df6e72b8 Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Tue, 23 Aug 2016 03:53:13 -0700 Subject: [PATCH] Add long string support. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com --- jerry-core/config.h | 5 - jerry-core/ecma/base/ecma-globals.h | 42 +++--- jerry-core/ecma/base/ecma-helpers-string.c | 168 +++++++++++++++++---- jerry-core/ecma/base/ecma-lcache.c | 2 +- 4 files changed, 162 insertions(+), 55 deletions(-) diff --git a/jerry-core/config.h b/jerry-core/config.h index 8289df79a..01fa1c693 100644 --- a/jerry-core/config.h +++ b/jerry-core/config.h @@ -85,11 +85,6 @@ #define CONFIG_ECMA_REFERENCE_COUNTER_LIMIT ((1u << CONFIG_ECMA_REFERENCE_COUNTER_WIDTH) - 1u) -/** - * Maximum length of strings' concatenation - */ -#define CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH (1048576) - /** * Use 32-bit/64-bit float for ecma-numbers */ diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index be9fc4a25..113ef36d7 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -809,7 +809,10 @@ typedef struct */ typedef enum { - ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string */ + ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string + * maximum size is 2^16. */ + ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string + * maximum size is 2^32. */ ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number stored locally in the string's descriptor */ ECMA_STRING_CONTAINER_MAGIC_STRING, /**< the ecma-string is equal to one of ECMA magic strings */ @@ -872,31 +875,32 @@ typedef struct ecma_string_t union { /** - * Actual data of an utf-8 string type - */ + * Actual data of an utf-8 string type + */ struct { - uint16_t size; /**< Size of this utf-8 string in bytes */ - uint16_t length; /**< Length of this utf-8 string in characters */ + uint16_t size; /**< size of this utf-8 string in bytes */ + uint16_t length; /**< length of this utf-8 string in characters */ } utf8_string; - /** UInt32-represented number placed locally in the descriptor */ - uint32_t uint32_number; - - /** Identifier of magic string */ - lit_magic_string_id_t magic_string_id; - - /** Identifier of external magic string */ - lit_magic_string_ex_id_t magic_string_ex_id; - - /** Literal number */ - ecma_value_t lit_number; - - /** For zeroing and comparison in some cases */ - uint32_t common_field; + lit_utf8_size_t long_utf8_string_size; /**< size of this long utf-8 string in bytes */ + uint32_t uint32_number; /**< uint32-represented number placed locally in the descriptor */ + lit_magic_string_id_t magic_string_id; /**< identifier of a magic string */ + lit_magic_string_ex_id_t magic_string_ex_id; /**< identifier of an external magic string */ + ecma_value_t lit_number; /**< literal number (note: not a regular string type) */ + uint32_t common_field; /**< for zeroing and comparison in some cases */ } u; } ecma_string_t; +/** + * Long ECMA string-value descriptor + */ +typedef struct +{ + ecma_string_t header; /**< string header */ + lit_utf8_size_t long_utf8_string_length; /**< length of this long utf-8 string in bytes */ +} ecma_long_string_t; + /** * Compiled byte code data. */ diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index 9a8a0c259..aa3b3bb53 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -31,17 +31,6 @@ * @{ */ -/** - * Maximum length of strings' concatenation - */ -#define ECMA_STRING_MAX_CONCATENATION_LENGTH (CONFIG_ECMA_STRING_MAX_CONCATENATION_LENGTH) - -/** - * The length should be representable with int32_t. - */ -JERRY_STATIC_ASSERT (ECMA_STRING_MAX_CONCATENATION_LENGTH <= INT32_MAX, - ECMA_STRING_MAX_CONCATENATION_LENGTH_should_be_representable_with_int32_t); - /** * The ecma string ref counter should start after the container field. */ @@ -124,17 +113,37 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri return ecma_get_magic_string_ex (magic_string_ex_id); } - JERRY_ASSERT (string_size > 0 && string_size <= UINT16_MAX); + JERRY_ASSERT (string_size > 0); - ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size); + ecma_string_t *string_desc_p; + lit_utf8_byte_t *data_p; + + if (likely (string_size <= UINT16_MAX)) + { + string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + string_size); + + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; + string_desc_p->u.common_field = 0; + string_desc_p->u.utf8_string.size = (uint16_t) string_size; + string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size); + + data_p = (lit_utf8_byte_t *) (string_desc_p + 1); + } + else + { + string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + string_size); + + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE; + string_desc_p->u.common_field = 0; + string_desc_p->u.long_utf8_string_size = string_size; + + ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p; + long_string_desc_p->long_utf8_string_length = lit_utf8_string_length (string_p, string_size); + + data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1); + } - string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size); - string_desc_p->u.common_field = 0; - string_desc_p->u.utf8_string.size = (uint16_t) string_size; - string_desc_p->u.utf8_string.length = (uint16_t) lit_utf8_string_length (string_p, string_size); - - lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1); memcpy (data_p, string_p, string_size); return string_desc_p; } /* ecma_new_ecma_string_from_utf8 */ @@ -348,6 +357,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ utf8_string1_length = string1_p->u.utf8_string.length; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string1_p; + + utf8_string1_p = (lit_utf8_byte_t *) (long_string_desc_p + 1); + utf8_string1_size = string1_p->u.long_utf8_string_size; + utf8_string1_length = long_string_desc_p->long_utf8_string_length; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number, @@ -384,6 +402,15 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ utf8_string2_length = string2_p->u.utf8_string.length; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string2_p; + + utf8_string2_p = (lit_utf8_byte_t *) (long_string_desc_p + 1); + utf8_string2_size = string2_p->u.long_utf8_string_size; + utf8_string2_length = long_string_desc_p->long_utf8_string_length; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number, @@ -418,20 +445,44 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ lit_utf8_size_t new_size = utf8_string1_size + utf8_string2_size; - JERRY_ASSERT (new_size <= UINT16_MAX); + /* It is impossible to allocate this large string. */ + if (new_size < (utf8_string1_size | utf8_string2_size)) + { + jerry_fatal (ERR_OUT_OF_MEMORY); + } - ecma_string_t *string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size); + ecma_string_t *string_desc_p; + lit_utf8_byte_t *data_p; + + if (likely (new_size <= UINT16_MAX)) + { + string_desc_p = jmem_heap_alloc_block (sizeof (ecma_string_t) + new_size); + + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; + string_desc_p->u.common_field = 0; + string_desc_p->u.utf8_string.size = (uint16_t) new_size; + string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length); + + data_p = (lit_utf8_byte_t *) (string_desc_p + 1); + } + else + { + string_desc_p = jmem_heap_alloc_block (sizeof (ecma_long_string_t) + new_size); + + string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE; + string_desc_p->u.common_field = 0; + string_desc_p->u.long_utf8_string_size = new_size; + + ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_desc_p; + long_string_desc_p->long_utf8_string_length = utf8_string1_length + utf8_string2_length; + + data_p = (lit_utf8_byte_t *) (long_string_desc_p + 1); + } - string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE; string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, utf8_string2_p, utf8_string2_size); - string_desc_p->u.common_field = 0; - string_desc_p->u.utf8_string.size = (uint16_t) new_size; - string_desc_p->u.utf8_string.length = (uint16_t) (utf8_string1_length + utf8_string2_length); - lit_utf8_byte_t *data_p = (lit_utf8_byte_t *) (string_desc_p + 1); memcpy (data_p, utf8_string1_p, utf8_string1_size); memcpy (data_p + utf8_string1_size, utf8_string2_p, utf8_string2_size); - return string_desc_p; } /* ecma_concat_ecma_strings */ @@ -480,6 +531,13 @@ ecma_deref_ecma_string (ecma_string_t *string_p) /**< ecma-string */ jmem_heap_free_block (string_p, string_p->u.utf8_string.size + sizeof (ecma_string_t)); return; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + JERRY_ASSERT (string_p->u.long_utf8_string_size > UINT16_MAX); + + jmem_heap_free_block (string_p, string_p->u.long_utf8_string_size + sizeof (ecma_long_string_t)); + return; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: case ECMA_STRING_CONTAINER_MAGIC_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING_EX: @@ -518,6 +576,7 @@ ecma_string_to_number (const ecma_string_t *str_p) /**< ecma-string */ } case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING: case ECMA_STRING_CONTAINER_MAGIC_STRING_EX: { @@ -562,7 +621,8 @@ ecma_string_get_array_index (const ecma_string_t *str_p, /**< ecma-string */ *out_index_p = index; return index != UINT32_MAX; } - else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING) + else if (type == ECMA_STRING_CONTAINER_MAGIC_STRING + || type == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING) { return false; } @@ -660,6 +720,12 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s memcpy (buffer_p, string_desc_p + 1, size); break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + size = string_desc_p->u.long_utf8_string_size; + memcpy (buffer_p, ((ecma_long_string_t *) string_desc_p) + 1, size); + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { const uint32_t uint32_number = string_desc_p->u.uint32_number; @@ -775,6 +841,14 @@ ecma_string_raw_chars (const ecma_string_t *string_p, /**< ecma-string */ result_p = (const lit_utf8_byte_t *) (string_p + 1); break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + size = string_p->u.long_utf8_string_size; + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + length = long_string_p->long_utf8_string_length; + result_p = (const lit_utf8_byte_t *) (long_string_p + 1); + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { size = (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number); @@ -890,7 +964,8 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri } default: { - JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING); + JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING + || ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING); break; } } @@ -910,6 +985,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri utf8_string1_size = string1_p->u.utf8_string.size; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1); + utf8_string1_size = string1_p->u.long_utf8_string_size; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number, @@ -942,6 +1023,12 @@ ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /* ecma-stri utf8_string2_size = string2_p->u.utf8_string.size; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1); + utf8_string2_size = string2_p->u.long_utf8_string_size; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number, @@ -986,6 +1073,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */ { JERRY_ASSERT (string1_p != NULL && string2_p != NULL); + /* Fast paths first. */ if (string1_p == string2_p) { return true; @@ -998,7 +1086,7 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /* ecma-string */ ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p); - if (string1_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING + if (string1_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING && string1_container == ECMA_STRING_GET_CONTAINER (string2_p)) { return string1_p->u.common_field == string2_p->u.common_field; @@ -1041,6 +1129,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma- utf8_string1_size = string1_p->u.utf8_string.size; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + utf8_string1_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string1_p) + 1); + utf8_string1_size = string1_p->u.long_utf8_string_size; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string1_size = ecma_uint32_to_utf8_string (string1_p->u.uint32_number, @@ -1073,6 +1167,12 @@ ecma_compare_ecma_strings_relational (const ecma_string_t *string1_p, /**< ecma- utf8_string2_size = string2_p->u.utf8_string.size; break; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + utf8_string2_p = (lit_utf8_byte_t *) (((ecma_long_string_t *) string2_p) + 1); + utf8_string2_size = string2_p->u.long_utf8_string_size; + break; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { utf8_string2_size = ecma_uint32_to_utf8_string (string2_p->u.uint32_number, @@ -1117,6 +1217,10 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */ { return (ecma_length_t) (string_p->u.utf8_string.length); } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + return (ecma_length_t) (((ecma_long_string_t *) string_p)->long_utf8_string_length); + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { return ecma_string_get_number_in_desc_size (string_p->u.uint32_number); @@ -1152,6 +1256,10 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */ { return (lit_utf8_size_t) string_p->u.utf8_string.size; } + case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + { + return (lit_utf8_size_t) string_p->u.long_utf8_string_size; + } case ECMA_STRING_CONTAINER_UINT32_IN_DESC: { return (lit_utf8_size_t) ecma_string_get_number_in_desc_size (string_p->u.uint32_number); diff --git a/jerry-core/ecma/base/ecma-lcache.c b/jerry-core/ecma/base/ecma-lcache.c index fb0185564..b97874e5c 100644 --- a/jerry-core/ecma/base/ecma-lcache.c +++ b/jerry-core/ecma/base/ecma-lcache.c @@ -164,7 +164,7 @@ ecma_lcache_lookup (ecma_object_t *object_p, /**< object */ JERRY_ASSERT ((prop_name_p->hash & ECMA_LCACHE_HASH_MASK) == (entry_prop_name_p->hash & ECMA_LCACHE_HASH_MASK)); if (prop_name_p == entry_prop_name_p - || (prop_container != ECMA_STRING_CONTAINER_HEAP_UTF8_STRING + || (prop_container > ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING && prop_container == ECMA_STRING_GET_CONTAINER (entry_prop_name_p) && prop_name_p->u.common_field == entry_prop_name_p->u.common_field)) {