diff --git a/docs/02.API-REFERENCE.md b/docs/02.API-REFERENCE.md index 5aee46ac0..278b46e6c 100644 --- a/docs/02.API-REFERENCE.md +++ b/docs/02.API-REFERENCE.md @@ -4564,6 +4564,100 @@ jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, - [jerry_create_string_from_utf8](#jerry_create_string_from_utf8) +## jerry_create_external_string + +**Summary** + +Create an external string from a valid CESU8 string. The string buffer passed to the function +should not be modified until the free callback is called. This function can be used to avoid +the duplication of large strings. + +*Note*: Returned value must be freed with [jerry_release_value](#jerry_release_value) when it +is no longer needed. + +**Prototype** + +```c +jerry_value_t +jerry_create_external_string (const jerry_char_t *str_p, + jerry_object_native_free_callback_t free_cb) +``` + +- `str_p` - non-null pointer to string +- `free_cb` - optional callback which is called right before the string is freed +- return value - value of the created string + +*New in version [[NEXT_RELEASE]]* + +**Example** + +```c +{ + const char* string_p = "a large and immutable string: this is a story about ...."; + jerry_value_t string_value = jerry_create_external_string ((const jerry_char_t *) string_p, + NULL); + + ... // usage of string_value + + jerry_release_value (string_value); +} +``` + +**See also** + +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) +- [jerry_create_external_string_sz](#jerry_create_external_string_sz) + + +## jerry_create_external_string_sz + +**Summary** + +Create an external string from a valid CESU8 string. The string buffer passed to the function +should not be modified until the free callback is called. This function can be used to avoid +the duplication of large strings. + +*Note*: Returned value must be freed with [jerry_release_value](#jerry_release_value) when it +is no longer needed. + +**Prototype** + +```c +jerry_value_t +jerry_create_external_string_sz (const jerry_char_t *str_p, + jerry_size_t str_size, + jerry_object_native_free_callback_t free_cb) +``` + +- `str_p` - non-null pointer to string +- `str_size` - size of the string +- `free_cb` - optional callback which is called right before the string is freed +- return value - value of the created string + +*New in version [[NEXT_RELEASE]]* + +**Example** + +```c +{ + const char* string_p = "a large and immutable string: this is a story about ...."; + jerry_value_t string_value = jerry_create_external_string_sz ((const jerry_char_t *) string_p, + strlen (string_p), + NULL); + + ... // usage of string_value + + jerry_release_value (string_value); +} + +``` + +**See also** + +- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string) +- [jerry_create_external_string](#jerry_create_external_string) + + ## jerry_create_symbol **Summary** diff --git a/jerry-core/api/jerry.c b/jerry-core/api/jerry.c index 1320009e9..d36966f19 100644 --- a/jerry-core/api/jerry.c +++ b/jerry-core/api/jerry.c @@ -1710,6 +1710,42 @@ jerry_create_string_sz (const jerry_char_t *str_p, /**< pointer to string */ return ecma_make_string_value (ecma_str_p); } /* jerry_create_string_sz */ +/** + * Create external string from a valid CESU-8 string + * + * Note: + * returned value must be freed with jerry_release_value, when it is no longer needed. + * + * @return value of the created external string + */ +jerry_value_t +jerry_create_external_string (const jerry_char_t *str_p, /**< pointer to string */ + jerry_object_native_free_callback_t free_cb) /**< free callback */ +{ + return jerry_create_external_string_sz (str_p, lit_zt_utf8_string_size ((lit_utf8_byte_t *) str_p), free_cb); +} /* jerry_create_external_string */ + +/** + * Create external string from a valid CESU-8 string + * + * Note: + * returned value must be freed with jerry_release_value when it is no longer needed. + * + * @return value of the created external string + */ +jerry_value_t +jerry_create_external_string_sz (const jerry_char_t *str_p, /**< pointer to string */ + jerry_size_t str_size, /**< string size */ + jerry_object_native_free_callback_t free_cb) /**< free callback */ +{ + jerry_assert_api_available (); + + ecma_string_t *ecma_str_p = ecma_new_ecma_external_string_from_cesu8 ((lit_utf8_byte_t *) str_p, + (lit_utf8_size_t) str_size, + free_cb); + return ecma_make_string_value (ecma_str_p); +} /* jerry_create_external_string_sz */ + /** * Create symbol from an api value * diff --git a/jerry-core/ecma/base/ecma-alloc.c b/jerry-core/ecma/base/ecma-alloc.c index 9e72b9971..be30dc6cb 100644 --- a/jerry-core/ecma/base/ecma-alloc.c +++ b/jerry-core/ecma/base/ecma-alloc.c @@ -179,6 +179,34 @@ ecma_dealloc_extended_string (ecma_extended_string_t *ext_string_p) /**< extende jmem_heap_free_block (ext_string_p, sizeof (ecma_extended_string_t)); } /* ecma_dealloc_extended_string */ +/** + * Allocate memory for external ecma-string descriptor + * + * @return pointer to allocated memory + */ +inline ecma_external_string_t * JERRY_ATTR_ALWAYS_INLINE +ecma_alloc_external_string (void) +{ +#if ENABLED (JERRY_MEM_STATS) + jmem_stats_allocate_string_bytes (sizeof (ecma_external_string_t)); +#endif /* ENABLED (JERRY_MEM_STATS) */ + + return (ecma_external_string_t *) jmem_heap_alloc_block (sizeof (ecma_external_string_t)); +} /* ecma_alloc_external_string */ + +/** + * Dealloc memory from external ecma-string descriptor + */ +inline void JERRY_ATTR_ALWAYS_INLINE +ecma_dealloc_external_string (ecma_external_string_t *ext_string_p) /**< external string to be freed */ +{ +#if ENABLED (JERRY_MEM_STATS) + jmem_stats_free_string_bytes (sizeof (ecma_external_string_t)); +#endif /* ENABLED (JERRY_MEM_STATS) */ + + jmem_heap_free_block (ext_string_p, sizeof (ecma_external_string_t)); +} /* ecma_dealloc_external_string */ + /** * Allocate memory for an string with character data * diff --git a/jerry-core/ecma/base/ecma-alloc.h b/jerry-core/ecma/base/ecma-alloc.h index 437931781..a5af9d1a5 100644 --- a/jerry-core/ecma/base/ecma-alloc.h +++ b/jerry-core/ecma/base/ecma-alloc.h @@ -85,6 +85,18 @@ ecma_extended_string_t *ecma_alloc_extended_string (void); */ void ecma_dealloc_extended_string (ecma_extended_string_t *string_p); +/** + * Allocate memory for external ecma-string descriptor + * + * @return pointer to allocated memory + */ +ecma_external_string_t *ecma_alloc_external_string (void); + +/** + * Dealloc memory from external ecma-string descriptor + */ +void ecma_dealloc_external_string (ecma_external_string_t *string_p); + /** * Allocate memory for string with character data * diff --git a/jerry-core/ecma/base/ecma-globals.h b/jerry-core/ecma/base/ecma-globals.h index 5a8f156d8..204040815 100644 --- a/jerry-core/ecma/base/ecma-globals.h +++ b/jerry-core/ecma/base/ecma-globals.h @@ -1526,14 +1526,12 @@ typedef enum { ECMA_STRING_CONTAINER_HEAP_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string * maximum size is 2^16. */ - ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING, /**< actual data is on the heap as an utf-8 (cesu8) string - * maximum size is 2^32. */ - ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< actual data is UInt32-represeneted Number - stored locally in the string's descriptor */ + ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING, /**< the string is a long string or provided externally + * and only its attributes are stored. */ + ECMA_STRING_CONTAINER_UINT32_IN_DESC, /**< string representation of an uint32 number */ ECMA_STRING_CONTAINER_HEAP_ASCII_STRING, /**< actual data is on the heap as an ASCII string * maximum size is 2^16. */ ECMA_STRING_CONTAINER_MAGIC_STRING_EX, /**< the ecma-string is equal to one of external magic strings */ - ECMA_STRING_CONTAINER_SYMBOL, /**< the ecma-string is a symbol */ ECMA_STRING_CONTAINER__MAX = ECMA_STRING_CONTAINER_SYMBOL /**< maximum value */ @@ -1611,43 +1609,53 @@ typedef struct uint16_t size; /**< size of this ASCII string in bytes */ } ecma_ascii_string_t; -/** - * ECMA long UTF8 string-value descriptor - */ -typedef struct -{ - ecma_string_t header; /**< string header */ - uint16_t size; /**< size of this utf-8 string in bytes */ - uint16_t length; /**< length of this utf-8 string in bytes */ -} ecma_utf8_string_t; - /** * ECMA UTF8 string-value descriptor */ typedef struct { ecma_string_t header; /**< string header */ - lit_utf8_size_t size; /**< size of this long utf-8 string in bytes */ - lit_utf8_size_t length; /**< length of this long utf-8 string in bytes */ -} ecma_long_utf8_string_t; + uint16_t size; /**< size of this utf-8 string in bytes */ + uint16_t length; /**< length of this utf-8 string in characters */ +} ecma_utf8_string_t; + +/** + * Long or external CESU8 string-value descriptor + */ +typedef struct +{ + ecma_string_t header; /**< string header */ + const lit_utf8_byte_t *string_p; /**< string data */ + lit_utf8_size_t size; /**< size of this external string in bytes */ + lit_utf8_size_t length; /**< length of this external string in characters */ +} ecma_long_string_t; + +/** + * External UTF8 string-value descriptor + */ +typedef struct +{ + ecma_long_string_t header; + ecma_object_native_free_callback_t free_cb; /**< free callback */ +} ecma_external_string_t; /** * Get the start position of the string buffer of an ecma ASCII string */ #define ECMA_ASCII_STRING_GET_BUFFER(string_p) \ - ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_ascii_string_t))) + ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_ascii_string_t))) /** * Get the start position of the string buffer of an ecma UTF8 string */ #define ECMA_UTF8_STRING_GET_BUFFER(string_p) \ - ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_utf8_string_t))) + ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_utf8_string_t))) /** - * Get the start position of the string buffer of an ecma long UTF8 string + * Get the start position of the string buffer of an ecma long CESU8 string */ -#define ECMA_LONG_UTF8_STRING_GET_BUFFER(string_p) \ - ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_long_utf8_string_t))) +#define ECMA_LONG_STRING_BUFFER_START(string_p) \ + ((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_long_string_t))) /** * ECMA extended string-value descriptor diff --git a/jerry-core/ecma/base/ecma-helpers-string.c b/jerry-core/ecma/base/ecma-helpers-string.c index 5ec11423e..bb060d97c 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.c +++ b/jerry-core/ecma/base/ecma-helpers-string.c @@ -158,10 +158,11 @@ ecma_string_get_chars_fast (const ecma_string_t *string_p, /**< ecma-string */ *size_p = ((ecma_utf8_string_t *) string_p)->size; return ECMA_UTF8_STRING_GET_BUFFER (string_p); } - case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING: { - *size_p = ((ecma_long_utf8_string_t *) string_p)->size; - return ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p); + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + *size_p = long_string_p->size; + return long_string_p->string_p; } case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: { @@ -275,14 +276,15 @@ ecma_new_ecma_string_from_utf8_buffer (lit_utf8_size_t length, /**< length of th return (ecma_string_t *) string_desc_p; } - ecma_long_utf8_string_t *string_desc_p; - string_desc_p = (ecma_long_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_long_utf8_string_t)); - string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE; - string_desc_p->size = size; - string_desc_p->length = length; + ecma_long_string_t *long_string_p; + long_string_p = (ecma_long_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_long_string_t)); + long_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING | ECMA_STRING_REF_ONE; + long_string_p->string_p = ECMA_LONG_STRING_BUFFER_START (long_string_p); + long_string_p->size = size; + long_string_p->length = length; - *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_desc_p); - return (ecma_string_t *) string_desc_p; + *data_p = ECMA_LONG_STRING_BUFFER_START (long_string_p); + return (ecma_string_t *) long_string_p; } /* ecma_new_ecma_string_from_utf8_buffer */ /** @@ -443,6 +445,55 @@ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string return (ecma_string_t *) string_desc_p; } /* ecma_new_ecma_string_from_utf8_converted_to_cesu8 */ +/** + * Allocate new ecma-external-string and fill it with characters from the cesu8 string + * + * @return pointer to ecma-string descriptor + */ +ecma_string_t * +ecma_new_ecma_external_string_from_cesu8 (const lit_utf8_byte_t *string_p, /**< cesu-8 string */ + lit_utf8_size_t string_size, /**< string size */ + ecma_object_native_free_callback_t free_cb) /**< free callback */ +{ + JERRY_ASSERT (string_p != NULL || string_size == 0); + JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size)); + + if (string_size < (sizeof (ecma_external_string_t) - sizeof (ecma_utf8_string_t))) + { + /* Normal strings are created for short strings. */ + ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8 (string_p, string_size); + + if (free_cb != NULL) + { + free_cb ((void *) string_p); + } + return string_desc_p; + } + + ecma_string_t *string_desc_p = ecma_find_special_string (string_p, string_size); + + if (string_desc_p != NULL) + { + if (free_cb != NULL) + { + free_cb ((void *) string_p); + } + return string_desc_p; + } + + ecma_external_string_t *external_string_p = ecma_alloc_external_string (); + ecma_long_string_t *long_string_p = (ecma_long_string_t *) external_string_p; + + long_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING | ECMA_STRING_REF_ONE; + long_string_p->header.u.hash = lit_utf8_string_calc_hash (string_p, string_size); + long_string_p->string_p = string_p; + long_string_p->size = string_size; + long_string_p->length = lit_utf8_string_length (string_p, string_size); + external_string_p->free_cb = free_cb; + + return (ecma_string_t *) external_string_p; +} /* ecma_new_ecma_external_string_from_cesu8 */ + /** * Allocate new ecma-string and fill it with cesu-8 character which represents specified code unit * @@ -833,10 +884,24 @@ ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */ ecma_dealloc_string_buffer (string_p, ((ecma_utf8_string_t *) string_p)->size + sizeof (ecma_utf8_string_t)); return; } - case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING: { - ecma_dealloc_string_buffer (string_p, - ((ecma_long_utf8_string_t *) string_p)->size + sizeof (ecma_long_utf8_string_t)); + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + + if (long_string_p->string_p == ECMA_LONG_STRING_BUFFER_START (long_string_p)) + { + ecma_dealloc_string_buffer (string_p, long_string_p->size + sizeof (ecma_long_string_t)); + return; + } + + ecma_external_string_t *external_string_p = (ecma_external_string_t *) string_p; + + if (external_string_p->free_cb != NULL) + { + external_string_p->free_cb ((void *) external_string_p->header.string_p); + } + + ecma_dealloc_external_string (external_string_p); return; } case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: @@ -848,7 +913,7 @@ ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */ #if ENABLED (JERRY_ESNEXT) case ECMA_STRING_CONTAINER_SYMBOL: { - ecma_extended_string_t * symbol_p = (ecma_extended_string_t *) string_p; + ecma_extended_string_t *symbol_p = (ecma_extended_string_t *) string_p; ecma_free_value (symbol_p->u.symbol_descriptor); ecma_dealloc_extended_string (symbol_p); return; @@ -1406,12 +1471,12 @@ ecma_string_get_chars (const ecma_string_t *string_p, /**< ecma-string */ result_p = ECMA_UTF8_STRING_GET_BUFFER (utf8_string_desc_p); break; } - case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING: { - ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p; - size = long_utf8_string_desc_p->size; - length = long_utf8_string_desc_p->length; - result_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (long_utf8_string_desc_p); + ecma_long_string_t *long_string_desc_p = (ecma_long_string_t *) string_p; + size = long_string_desc_p->size; + length = long_string_desc_p->length; + result_p = long_string_desc_p->string_p; break; } case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: @@ -1644,6 +1709,45 @@ ecma_string_compare_to_property_name (ecma_property_t property, /**< property na return ecma_compare_ecma_non_direct_strings (prop_name_p, string_p); } /* ecma_string_compare_to_property_name */ +/** + * Helper for ecma_compare_ecma_strings_longpath to get string data + * + * @return string characters + */ +static const lit_utf8_byte_t * +ecma_compare_get_string_chars (const ecma_string_t *string_p, /**< ecma-string */ + lit_utf8_size_t *size_and_length_p) /**< [out] size and length */ +{ + switch (ECMA_STRING_GET_CONTAINER (string_p)) + { + case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING: + { + ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p; + size_and_length_p[0] = utf8_string_p->size; + size_and_length_p[1] = utf8_string_p->length; + return ECMA_UTF8_STRING_GET_BUFFER (string_p); + } + case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING: + { + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + size_and_length_p[0] = long_string_p->size; + size_and_length_p[1] = long_string_p->length; + return long_string_p->string_p; + } + case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING: + { + ecma_ascii_string_t *ascii_string_p = (ecma_ascii_string_t *) string_p; + size_and_length_p[0] = ascii_string_p->size; + size_and_length_p[1] = ascii_string_p->size; + return ECMA_ASCII_STRING_GET_BUFFER (string_p); + } + default: + { + return NULL; + } + } +} /* ecma_compare_get_string_chars */ + /** * Long path part of ecma-string to ecma-string comparison routine * @@ -1657,41 +1761,24 @@ static bool JERRY_ATTR_NOINLINE ecma_compare_ecma_strings_longpath (const ecma_string_t *string1_p, /**< ecma-string */ const ecma_string_t *string2_p) /**< ecma-string */ { - JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_GET_CONTAINER (string2_p)); - const lit_utf8_byte_t *utf8_string1_p, *utf8_string2_p; - lit_utf8_size_t utf8_string1_size, utf8_string2_size; + lit_utf8_size_t string1_size_and_length[2], string2_size_and_length[2]; - if (JERRY_LIKELY (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)) - { - utf8_string1_p = ECMA_ASCII_STRING_GET_BUFFER (string1_p); - utf8_string1_size = ((ecma_ascii_string_t *) string1_p)->size; - utf8_string2_p = ECMA_ASCII_STRING_GET_BUFFER (string2_p); - utf8_string2_size = ((ecma_ascii_string_t *) string2_p)->size; - } - else if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) - { - utf8_string1_p = ECMA_UTF8_STRING_GET_BUFFER (string1_p); - utf8_string1_size = ((ecma_utf8_string_t *) string1_p)->size; - utf8_string2_p = ECMA_UTF8_STRING_GET_BUFFER (string2_p); - utf8_string2_size = ((ecma_utf8_string_t *) string2_p)->size; - } - else - { - JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING); + utf8_string1_p = ecma_compare_get_string_chars (string1_p, string1_size_and_length); + utf8_string2_p = ecma_compare_get_string_chars (string2_p, string2_size_and_length); - utf8_string1_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string1_p); - utf8_string1_size = ((ecma_long_utf8_string_t *) string1_p)->size; - utf8_string2_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string2_p); - utf8_string2_size = ((ecma_long_utf8_string_t *) string2_p)->size; - } - - if (utf8_string1_size != utf8_string2_size) + if (utf8_string1_p == NULL || utf8_string2_p == NULL) { return false; } - return !memcmp ((char *) utf8_string1_p, (char *) utf8_string2_p, utf8_string1_size); + if (string1_size_and_length[0] != string2_size_and_length[0] + || string1_size_and_length[1] != string2_size_and_length[1]) + { + return false; + } + + return !memcmp ((char *) utf8_string1_p, (char *) utf8_string2_p, string1_size_and_length[0]); } /* ecma_compare_ecma_strings_longpath */ /** @@ -1718,30 +1805,17 @@ ecma_compare_ecma_strings (const ecma_string_t *string1_p, /**< ecma-string */ return false; } + /* Also compares uint32 values in descriptor. */ if (string1_p->u.hash != string2_p->u.hash) { return false; } - ecma_string_container_t string1_container = ECMA_STRING_GET_CONTAINER (string1_p); - - if (string1_container != ECMA_STRING_GET_CONTAINER (string2_p)) + if (ECMA_STRING_GET_CONTAINER (string1_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC) { - return false; + return ECMA_STRING_GET_CONTAINER (string2_p) == ECMA_STRING_CONTAINER_UINT32_IN_DESC; } - if (string1_container == ECMA_STRING_CONTAINER_UINT32_IN_DESC) - { - return true; - } - -#if ENABLED (JERRY_ESNEXT) - if (string1_container == ECMA_STRING_CONTAINER_SYMBOL) - { - return false; - } -#endif /* ENABLED (JERRY_ESNEXT) */ - return ecma_compare_ecma_strings_longpath (string1_p, string2_p); } /* ecma_compare_ecma_strings */ @@ -1967,12 +2041,12 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */ if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { - return (lit_utf8_size_t) (((ecma_utf8_string_t *) string_p)->length); + return ((ecma_utf8_string_t *) string_p)->length; } - if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING) + if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING) { - return (lit_utf8_size_t) (((ecma_long_utf8_string_t *) string_p)->length); + return ((ecma_long_string_t *) string_p)->length; } JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX); @@ -2010,26 +2084,27 @@ ecma_string_get_utf8_length (const ecma_string_t *string_p) /**< ecma-string */ if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p; + lit_utf8_size_t size = utf8_string_p->size; - if (utf8_string_p->size == utf8_string_p->length) + if (size == utf8_string_p->length) { - return (lit_utf8_size_t) (utf8_string_p->length); + return size; } - return lit_get_utf8_length_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size); + return lit_get_utf8_length_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), size); } - if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING) + if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING) { - ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p; + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + lit_utf8_size_t size = long_string_p->size; - if (long_utf8_string_p->size == long_utf8_string_p->length) + if (size == long_string_p->length) { - return (lit_utf8_size_t) (long_utf8_string_p->length); + return size; } - return lit_get_utf8_length_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p), - long_utf8_string_p->size); + return lit_get_utf8_length_of_cesu8_string (long_string_p->string_p, size); } JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX); @@ -2065,12 +2140,12 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */ if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { - return (lit_utf8_size_t) (((ecma_utf8_string_t *) string_p)->size); + return ((ecma_utf8_string_t *) string_p)->size; } - if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING) + if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING) { - return (lit_utf8_size_t) (((ecma_long_utf8_string_t *) string_p)->size); + return ((ecma_long_string_t *) string_p)->size; } JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX); @@ -2106,26 +2181,26 @@ ecma_string_get_utf8_size (const ecma_string_t *string_p) /**< ecma-string */ if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING) { ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p; + lit_utf8_size_t size = utf8_string_p->size; - if (utf8_string_p->size == utf8_string_p->length) + if (size == utf8_string_p->length) { - return utf8_string_p->size; + return size; } - return lit_get_utf8_size_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), utf8_string_p->size); + return lit_get_utf8_size_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), size); } - if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING) + if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING) { - ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) string_p; + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; - if (long_utf8_string_p->size == long_utf8_string_p->length) + if (long_string_p->size == long_string_p->length) { - return long_utf8_string_p->size; + return long_string_p->size; } - return lit_get_utf8_size_of_cesu8_string (ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p), - long_utf8_string_p->size); + return lit_get_utf8_size_of_cesu8_string (long_string_p->string_p, long_string_p->size); } JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX); @@ -2217,13 +2292,13 @@ ecma_string_get_char_at_pos (const ecma_string_t *string_p, /**< ecma-string */ return lit_utf8_string_code_unit_at (data_p, size, index); } - case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING: + case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING: { - ecma_long_utf8_string_t *long_utf8_string_desc_p = (ecma_long_utf8_string_t *) string_p; - lit_utf8_size_t size = long_utf8_string_desc_p->size; - const lit_utf8_byte_t *data_p = ECMA_LONG_UTF8_STRING_GET_BUFFER (string_p); + ecma_long_string_t *long_string_p = (ecma_long_string_t *) string_p; + lit_utf8_size_t size = long_string_p->size; + const lit_utf8_byte_t *data_p = long_string_p->string_p; - if (JERRY_LIKELY (size == long_utf8_string_desc_p->length)) + if (JERRY_LIKELY (size == long_string_p->length)) { return (ecma_char_t) data_p[index]; } @@ -2686,7 +2761,7 @@ ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builde } else { - container_size = sizeof (ecma_long_utf8_string_t); + container_size = sizeof (ecma_long_string_t); } const size_t utf8_string_size = string_size + container_size; @@ -2711,14 +2786,15 @@ ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builde return (ecma_string_t *) utf8_string_p; } - ecma_long_utf8_string_t *long_utf8_string_p = (ecma_long_utf8_string_t *) header_p; + ecma_long_string_t *long_string_p = (ecma_long_string_t *) header_p; - long_utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING | ECMA_STRING_REF_ONE; - long_utf8_string_p->header.u.hash = hash; - long_utf8_string_p->size = string_size; - long_utf8_string_p->length = length; + long_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING | ECMA_STRING_REF_ONE; + long_string_p->header.u.hash = hash; + long_string_p->string_p = ECMA_LONG_STRING_BUFFER_START (long_string_p); + long_string_p->size = string_size; + long_string_p->length = length; - return (ecma_string_t *) long_utf8_string_p; + return (ecma_string_t *) long_string_p; } /* ecma_stringbuilder_finalize */ /** diff --git a/jerry-core/ecma/base/ecma-helpers.h b/jerry-core/ecma/base/ecma-helpers.h index 6e5f384bc..922e2a8ec 100644 --- a/jerry-core/ecma/base/ecma-helpers.h +++ b/jerry-core/ecma/base/ecma-helpers.h @@ -306,6 +306,8 @@ bool ecma_prop_name_is_map_key (ecma_string_t *string_p); ecma_string_t *ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size); ecma_string_t *ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size); +ecma_string_t *ecma_new_ecma_external_string_from_cesu8 (const lit_utf8_byte_t *string_p, lit_utf8_size_t string_size, + ecma_object_native_free_callback_t free_cb); ecma_string_t *ecma_new_ecma_string_from_code_unit (ecma_char_t code_unit); #if ENABLED (JERRY_ESNEXT) ecma_string_t *ecma_new_ecma_string_from_code_units (ecma_char_t first_code_unit, ecma_char_t second_code_unit); diff --git a/jerry-core/include/jerryscript-core.h b/jerry-core/include/jerryscript-core.h index 71e05defc..990754581 100644 --- a/jerry-core/include/jerryscript-core.h +++ b/jerry-core/include/jerryscript-core.h @@ -506,6 +506,10 @@ jerry_value_t jerry_create_string_from_utf8 (const jerry_char_t *str_p); jerry_value_t jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, jerry_size_t str_size); jerry_value_t jerry_create_string (const jerry_char_t *str_p); jerry_value_t jerry_create_string_sz (const jerry_char_t *str_p, jerry_size_t str_size); +jerry_value_t jerry_create_external_string (const jerry_char_t *str_p, + jerry_object_native_free_callback_t free_cb); +jerry_value_t jerry_create_external_string_sz (const jerry_char_t *str_p, jerry_size_t str_size, + jerry_object_native_free_callback_t free_cb); jerry_value_t jerry_create_symbol (const jerry_value_t value); jerry_value_t jerry_create_undefined (void); diff --git a/tests/unit-core/test-external-string.c b/tests/unit-core/test-external-string.c new file mode 100644 index 000000000..55dd83c58 --- /dev/null +++ b/tests/unit-core/test-external-string.c @@ -0,0 +1,122 @@ +/* Copyright JS Foundation and other contributors, http://js.foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jerryscript.h" + +#include "test-common.h" + +static int free_count = 0; + +static const char *external_1 = "External string! External string! External string! External string!"; +static const char *external_2 = "Object"; +static const char *external_3 = "x!?:s"; + +static void +free_external1 (void *ptr) +{ + TEST_ASSERT (ptr == external_1); + free_count++; +} /* free_external1 */ + +static void +free_external2 (void *ptr) +{ + TEST_ASSERT (ptr == external_2); + free_count++; +} /* free_external2 */ + +static void +free_external3 (void *ptr) +{ + TEST_ASSERT (ptr == external_3); + free_count++; +} /* free_external3 */ + +int +main (void) +{ + TEST_INIT (); + + jerry_init (JERRY_INIT_EMPTY); + + /* Test external callback calls. */ + jerry_value_t external_string = jerry_create_external_string ((jerry_char_t *) external_1, free_external1); + TEST_ASSERT (free_count == 0); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 1); + + external_string = jerry_create_external_string ((jerry_char_t *) external_1, NULL); + TEST_ASSERT (free_count == 1); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 1); + + external_string = jerry_create_external_string ((jerry_char_t *) external_2, free_external2); + TEST_ASSERT (free_count == 2); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 2); + + external_string = jerry_create_external_string ((jerry_char_t *) external_2, NULL); + TEST_ASSERT (free_count == 2); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 2); + + external_string = jerry_create_external_string ((jerry_char_t *) external_3, free_external3); + TEST_ASSERT (free_count == 3); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 3); + + external_string = jerry_create_external_string ((jerry_char_t *) external_3, NULL); + TEST_ASSERT (free_count == 3); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 3); + + /* Test string comparison. */ + external_string = jerry_create_external_string ((jerry_char_t *) external_1, free_external1); + jerry_value_t other_string = jerry_create_string ((jerry_char_t *) external_1); + + jerry_value_t result = jerry_binary_operation (JERRY_BIN_OP_STRICT_EQUAL, external_string, other_string); + TEST_ASSERT (jerry_value_is_boolean (result)); + TEST_ASSERT (jerry_get_boolean_value (result)); + jerry_release_value (result); + + result = jerry_binary_operation (JERRY_BIN_OP_STRICT_EQUAL, external_string, external_string); + TEST_ASSERT (jerry_value_is_boolean (result)); + TEST_ASSERT (jerry_get_boolean_value (result)); + jerry_release_value (result); + + TEST_ASSERT (free_count == 3); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 4); + jerry_release_value (other_string); + + /* Test getting string. */ + external_string = jerry_create_external_string ((jerry_char_t *) external_1, free_external1); + size_t length = strlen (external_1); + + TEST_ASSERT (jerry_value_is_string (external_string)); + TEST_ASSERT (jerry_get_string_size (external_string) == length); + TEST_ASSERT (jerry_get_string_length (external_string) == length); + + jerry_char_t buf[128]; + jerry_string_to_char_buffer (external_string, buf, sizeof (buf)); + TEST_ASSERT (memcmp (buf, external_1, length) == 0); + + TEST_ASSERT (free_count == 4); + jerry_release_value (external_string); + TEST_ASSERT (free_count == 5); + + jerry_cleanup (); + return 0; +} /* main */