Rework ascii string to use less memory. (#4030)

Furthermore rename utf8_string to short_string.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
This commit is contained in:
Zoltan Herczeg 2020-07-28 15:47:07 +02:00 committed by GitHub
parent 51aee962a4
commit dff2a2a6da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 87 additions and 83 deletions

View File

@ -1601,15 +1601,6 @@ typedef struct
} u;
} ecma_string_t;
/**
* ECMA ASCII string-value descriptor
*/
typedef struct
{
ecma_string_t header; /**< string header */
uint16_t size; /**< size of this ASCII string in bytes */
} ecma_ascii_string_t;
/**
* ECMA UTF8 string-value descriptor
*/
@ -1618,7 +1609,7 @@ typedef struct
ecma_string_t header; /**< string header */
uint16_t size; /**< size of this utf-8 string in bytes */
uint16_t length; /**< length of this utf-8 string in characters */
} ecma_utf8_string_t;
} ecma_short_string_t;
/**
* Long or external CESU8 string-value descriptor
@ -1640,23 +1631,41 @@ typedef struct
ecma_object_native_free_callback_t free_cb; /**< free callback */
} ecma_external_string_t;
/**
* Header size of an ecma ASCII string
*/
#define ECMA_ASCII_STRING_HEADER_SIZE \
((lit_utf8_size_t) (sizeof (ecma_string_t) + sizeof (uint8_t)))
/**
* Get the size of an ecma ASCII string
*/
#define ECMA_ASCII_STRING_GET_SIZE(string_p) \
((lit_utf8_size_t) *((lit_utf8_byte_t *) (string_p) + sizeof (ecma_string_t)) + 1)
/**
* Set the size of an ecma ASCII string
*/
#define ECMA_ASCII_STRING_SET_SIZE(string_p, size) \
(*((lit_utf8_byte_t *) (string_p) + sizeof (ecma_string_t)) = (uint8_t) ((size) - 1))
/**
* Get the start position of the string buffer of an ecma ASCII string
*/
#define ECMA_ASCII_STRING_GET_BUFFER(string_p) \
((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_ascii_string_t)))
((lit_utf8_byte_t *) (string_p) + ECMA_ASCII_STRING_HEADER_SIZE)
/**
* Get the start position of the string buffer of an ecma UTF8 string
*/
#define ECMA_UTF8_STRING_GET_BUFFER(string_p) \
((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_utf8_string_t)))
#define ECMA_SHORT_STRING_GET_BUFFER(string_p) \
((lit_utf8_byte_t *) (string_p) + sizeof (ecma_short_string_t))
/**
* Get the start position of the string buffer of an ecma long CESU8 string
*/
#define ECMA_LONG_STRING_BUFFER_START(string_p) \
((lit_utf8_byte_t *) ((lit_utf8_byte_t *) (string_p) + sizeof (ecma_long_string_t)))
((lit_utf8_byte_t *) (string_p) + sizeof (ecma_long_string_t))
/**
* ECMA extended string-value descriptor
@ -1684,13 +1693,13 @@ typedef struct
* Get pointer to the beginning of the stored string in the string builder
*/
#define ECMA_STRINGBUILDER_STRING_PTR(header_p) \
((lit_utf8_byte_t *) (((lit_utf8_byte_t *) header_p) + sizeof (ecma_ascii_string_t)))
((lit_utf8_byte_t *) (((lit_utf8_byte_t *) header_p) + ECMA_ASCII_STRING_HEADER_SIZE))
/**
* Get the size of the stored string in the string builder
*/
#define ECMA_STRINGBUILDER_STRING_SIZE(header_p) \
((lit_utf8_size_t) (header_p->current_size - sizeof (ecma_ascii_string_t)))
((lit_utf8_size_t) (header_p->current_size - ECMA_ASCII_STRING_HEADER_SIZE))
/**
* String builder handle

View File

@ -51,7 +51,7 @@ JERRY_STATIC_ASSERT ((int) ECMA_DIRECT_STRING_UINT == (int) ECMA_STRING_CONTAINE
JERRY_STATIC_ASSERT (ECMA_PROPERTY_NAME_TYPE_SHIFT > ECMA_VALUE_SHIFT,
ecma_property_name_type_shift_must_be_greater_than_ecma_value_shift);
JERRY_STATIC_ASSERT (sizeof (ecma_stringbuilder_header_t) <= sizeof (ecma_ascii_string_t),
JERRY_STATIC_ASSERT (sizeof (ecma_stringbuilder_header_t) <= ECMA_ASCII_STRING_HEADER_SIZE,
ecma_stringbuilder_header_must_not_be_larger_than_ecma_ascii_string);
/**
@ -155,8 +155,8 @@ ecma_string_get_chars_fast (const ecma_string_t *string_p, /**< ecma-string */
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
*size_p = ((ecma_utf8_string_t *) string_p)->size;
return ECMA_UTF8_STRING_GET_BUFFER (string_p);
*size_p = ((ecma_short_string_t *) string_p)->size;
return ECMA_SHORT_STRING_GET_BUFFER (string_p);
}
case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING:
{
@ -166,7 +166,7 @@ ecma_string_get_chars_fast (const ecma_string_t *string_p, /**< ecma-string */
}
case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
{
*size_p = ((ecma_ascii_string_t *) string_p)->size;
*size_p = ECMA_ASCII_STRING_GET_SIZE (string_p);
return ECMA_ASCII_STRING_GET_BUFFER (string_p);
}
default:
@ -253,26 +253,24 @@ ecma_new_ecma_string_from_utf8_buffer (lit_utf8_size_t length, /**< length of th
{
if (JERRY_LIKELY (size <= UINT16_MAX))
{
if (JERRY_LIKELY (length == size))
if (JERRY_LIKELY (length == size) && size <= (UINT8_MAX + 1))
{
ecma_ascii_string_t *string_desc_p;
string_desc_p = (ecma_ascii_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_ascii_string_t));
string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
string_desc_p->size = (uint16_t) size;
ecma_string_t *string_desc_p;
string_desc_p = (ecma_string_t *) ecma_alloc_string_buffer (size + ECMA_ASCII_STRING_HEADER_SIZE);
string_desc_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
ECMA_ASCII_STRING_SET_SIZE (string_desc_p, size);
*data_p = ECMA_ASCII_STRING_GET_BUFFER (string_desc_p);
return (ecma_string_t *) string_desc_p;
}
JERRY_ASSERT (length < size);
ecma_utf8_string_t *string_desc_p;
string_desc_p = (ecma_utf8_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_utf8_string_t));
ecma_short_string_t *string_desc_p;
string_desc_p = (ecma_short_string_t *) ecma_alloc_string_buffer (size + sizeof (ecma_short_string_t));
string_desc_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
string_desc_p->size = (uint16_t) size;
string_desc_p->length = (uint16_t) length;
*data_p = ECMA_UTF8_STRING_GET_BUFFER (string_desc_p);
*data_p = ECMA_SHORT_STRING_GET_BUFFER (string_desc_p);
return (ecma_string_t *) string_desc_p;
}
@ -458,7 +456,7 @@ ecma_new_ecma_external_string_from_cesu8 (const lit_utf8_byte_t *string_p, /**<
JERRY_ASSERT (string_p != NULL || string_size == 0);
JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size));
if (string_size < (sizeof (ecma_external_string_t) - sizeof (ecma_utf8_string_t)))
if (string_size < (sizeof (ecma_external_string_t) - sizeof (ecma_short_string_t)))
{
/* Normal strings are created for short strings. */
ecma_string_t *string_desc_p = ecma_new_ecma_string_from_utf8 (string_p, string_size);
@ -881,7 +879,7 @@ ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
ecma_dealloc_string_buffer (string_p, ((ecma_utf8_string_t *) string_p)->size + sizeof (ecma_utf8_string_t));
ecma_dealloc_string_buffer (string_p, ((ecma_short_string_t *) string_p)->size + sizeof (ecma_short_string_t));
return;
}
case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING:
@ -906,8 +904,7 @@ ecma_destroy_ecma_string (ecma_string_t *string_p) /**< ecma-string */
}
case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
{
ecma_dealloc_string_buffer (string_p,
((ecma_ascii_string_t *) string_p)->size + sizeof (ecma_ascii_string_t));
ecma_dealloc_string_buffer (string_p, ECMA_ASCII_STRING_GET_SIZE (string_p) + ECMA_ASCII_STRING_HEADER_SIZE);
return;
}
#if ENABLED (JERRY_ESNEXT)
@ -1465,10 +1462,10 @@ ecma_string_get_chars (const ecma_string_t *string_p, /**< ecma-string */
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
size = utf8_string_desc_p->size;
length = utf8_string_desc_p->length;
result_p = ECMA_UTF8_STRING_GET_BUFFER (utf8_string_desc_p);
ecma_short_string_t *short_string_p = (ecma_short_string_t *) string_p;
size = short_string_p->size;
length = short_string_p->length;
result_p = ECMA_SHORT_STRING_GET_BUFFER (short_string_p);
break;
}
case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING:
@ -1481,10 +1478,9 @@ ecma_string_get_chars (const ecma_string_t *string_p, /**< ecma-string */
}
case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
{
ecma_ascii_string_t *ascii_string_desc_p = (ecma_ascii_string_t *) string_p;
size = ascii_string_desc_p->size;
length = ascii_string_desc_p->size;
result_p = ECMA_ASCII_STRING_GET_BUFFER (ascii_string_desc_p);
size = ECMA_ASCII_STRING_GET_SIZE (string_p);
length = size;
result_p = ECMA_ASCII_STRING_GET_BUFFER (string_p);
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
@ -1722,10 +1718,10 @@ ecma_compare_get_string_chars (const ecma_string_t *string_p, /**< ecma-string *
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
size_and_length_p[0] = utf8_string_p->size;
size_and_length_p[1] = utf8_string_p->length;
return ECMA_UTF8_STRING_GET_BUFFER (string_p);
ecma_short_string_t *short_string_p = (ecma_short_string_t *) string_p;
size_and_length_p[0] = short_string_p->size;
size_and_length_p[1] = short_string_p->length;
return ECMA_SHORT_STRING_GET_BUFFER (string_p);
}
case ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING:
{
@ -1736,9 +1732,8 @@ ecma_compare_get_string_chars (const ecma_string_t *string_p, /**< ecma-string *
}
case ECMA_STRING_CONTAINER_HEAP_ASCII_STRING:
{
ecma_ascii_string_t *ascii_string_p = (ecma_ascii_string_t *) string_p;
size_and_length_p[0] = ascii_string_p->size;
size_and_length_p[1] = ascii_string_p->size;
size_and_length_p[0] = ECMA_ASCII_STRING_GET_SIZE (string_p);
size_and_length_p[1] = size_and_length_p[0];
return ECMA_ASCII_STRING_GET_BUFFER (string_p);
}
default:
@ -1994,7 +1989,7 @@ ecma_string_get_ascii_size (const ecma_string_t *string_p) /**< ecma-string */
}
else if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING)
{
return ((ecma_ascii_string_t *) string_p)->size;
return ECMA_ASCII_STRING_GET_SIZE (string_p);
}
return ECMA_STRING_NO_ASCII_SIZE;
@ -2027,7 +2022,7 @@ ecma_string_get_length (const ecma_string_t *string_p) /**< ecma-string */
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
{
return ((ecma_utf8_string_t *) string_p)->length;
return ((ecma_short_string_t *) string_p)->length;
}
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING)
@ -2069,15 +2064,15 @@ ecma_string_get_utf8_length (const ecma_string_t *string_p) /**< ecma-string */
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
{
ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
lit_utf8_size_t size = utf8_string_p->size;
ecma_short_string_t *short_string_p = (ecma_short_string_t *) string_p;
lit_utf8_size_t size = short_string_p->size;
if (size == utf8_string_p->length)
if (size == short_string_p->length)
{
return size;
}
return lit_get_utf8_length_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), size);
return lit_get_utf8_length_of_cesu8_string (ECMA_SHORT_STRING_GET_BUFFER (string_p), size);
}
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING)
@ -2126,7 +2121,7 @@ ecma_string_get_size (const ecma_string_t *string_p) /**< ecma-string */
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
{
return ((ecma_utf8_string_t *) string_p)->size;
return ((ecma_short_string_t *) string_p)->size;
}
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING)
@ -2166,15 +2161,15 @@ ecma_string_get_utf8_size (const ecma_string_t *string_p) /**< ecma-string */
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_HEAP_UTF8_STRING)
{
ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) string_p;
lit_utf8_size_t size = utf8_string_p->size;
ecma_short_string_t *short_string_p = (ecma_short_string_t *) string_p;
lit_utf8_size_t size = short_string_p->size;
if (size == utf8_string_p->length)
if (size == short_string_p->length)
{
return size;
}
return lit_get_utf8_size_of_cesu8_string (ECMA_UTF8_STRING_GET_BUFFER (string_p), size);
return lit_get_utf8_size_of_cesu8_string (ECMA_SHORT_STRING_GET_BUFFER (string_p), size);
}
if (ECMA_STRING_GET_CONTAINER (string_p) == ECMA_STRING_CONTAINER_LONG_OR_EXTERNAL_STRING)
@ -2267,11 +2262,11 @@ ecma_string_get_char_at_pos (const ecma_string_t *string_p, /**< ecma-string */
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
ecma_utf8_string_t *utf8_string_desc_p = (ecma_utf8_string_t *) string_p;
lit_utf8_size_t size = utf8_string_desc_p->size;
const lit_utf8_byte_t *data_p = ECMA_UTF8_STRING_GET_BUFFER (string_p);
ecma_short_string_t *short_string_p = (ecma_short_string_t *) string_p;
lit_utf8_size_t size = short_string_p->size;
const lit_utf8_byte_t *data_p = ECMA_SHORT_STRING_GET_BUFFER (string_p);
if (JERRY_LIKELY (size == utf8_string_desc_p->length))
if (JERRY_LIKELY (size == short_string_p->length))
{
return (ecma_char_t) data_p[index];
}
@ -2581,7 +2576,7 @@ ecma_string_pad (ecma_value_t original_string_p, /**< Input ecma string */
ecma_stringbuilder_t
ecma_stringbuilder_create (void)
{
const lit_utf8_size_t initial_size = sizeof (ecma_ascii_string_t);
const lit_utf8_size_t initial_size = ECMA_ASCII_STRING_HEADER_SIZE;
ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
header_p->current_size = initial_size;
#if ENABLED (JERRY_MEM_STATS)
@ -2601,7 +2596,7 @@ ecma_stringbuilder_t
ecma_stringbuilder_create_from (ecma_string_t *string_p) /**< ecma string */
{
const lit_utf8_size_t string_size = ecma_string_get_size (string_p);
const lit_utf8_size_t initial_size = string_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
const lit_utf8_size_t initial_size = string_size + ECMA_ASCII_STRING_HEADER_SIZE;
ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
header_p->current_size = initial_size;
@ -2627,7 +2622,7 @@ ecma_stringbuilder_t
ecma_stringbuilder_create_raw (const lit_utf8_byte_t *data_p, /**< pointer to data */
const lit_utf8_size_t data_size) /**< size of the data */
{
const lit_utf8_size_t initial_size = data_size + (lit_utf8_size_t) sizeof (ecma_ascii_string_t);
const lit_utf8_size_t initial_size = data_size + ECMA_ASCII_STRING_HEADER_SIZE;
ecma_stringbuilder_header_t *header_p = (ecma_stringbuilder_header_t *) jmem_heap_alloc_block (initial_size);
header_p->current_size = initial_size;
@ -2703,7 +2698,7 @@ ecma_stringbuilder_revert (ecma_stringbuilder_t *builder_p, /**< string builder
ecma_stringbuilder_header_t *header_p = builder_p->header_p;
JERRY_ASSERT (header_p != NULL);
const lit_utf8_size_t new_size = size + (lit_utf8_size_t) (sizeof (ecma_ascii_string_t));
const lit_utf8_size_t new_size = size + ECMA_ASCII_STRING_HEADER_SIZE;
JERRY_ASSERT (new_size <= header_p->current_size);
#if ENABLED (JERRY_MEM_STATS)
@ -2820,20 +2815,20 @@ ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builde
builder_p->header_p = NULL;
#endif
size_t container_size = sizeof (ecma_utf8_string_t);
size_t container_size = sizeof (ecma_short_string_t);
const lit_string_hash_t hash = lit_utf8_string_calc_hash (string_begin_p, string_size);
const lit_utf8_size_t length = lit_utf8_string_length (string_begin_p, string_size);
if (JERRY_LIKELY (string_size <= UINT16_MAX))
{
if (JERRY_LIKELY (length == string_size))
if (JERRY_LIKELY (length == string_size) && string_size <= (UINT8_MAX + 1))
{
ecma_ascii_string_t *ascii_string_p = (ecma_ascii_string_t *) header_p;
ascii_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
ascii_string_p->header.u.hash = hash;
ascii_string_p->size = (uint16_t) string_size;
string_p = (ecma_string_t *) header_p;
string_p->refs_and_container = ECMA_STRING_CONTAINER_HEAP_ASCII_STRING | ECMA_STRING_REF_ONE;
string_p->u.hash = hash;
ECMA_ASCII_STRING_SET_SIZE (string_p, string_size);
return (ecma_string_t *) ascii_string_p;
return (ecma_string_t *) string_p;
}
}
else
@ -2848,19 +2843,19 @@ ecma_stringbuilder_finalize (ecma_stringbuilder_t *builder_p) /**< string builde
string_size);
#if ENABLED (JERRY_MEM_STATS)
jmem_stats_allocate_string_bytes (container_size - sizeof (ecma_ascii_string_t));
jmem_stats_allocate_string_bytes (container_size - ECMA_ASCII_STRING_HEADER_SIZE);
#endif /* ENABLED (JERRY_MEM_STATS) */
if (JERRY_LIKELY (string_size <= UINT16_MAX))
{
ecma_utf8_string_t *utf8_string_p = (ecma_utf8_string_t *) header_p;
ecma_short_string_t *short_string_p = (ecma_short_string_t *) header_p;
utf8_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
utf8_string_p->header.u.hash = hash;
utf8_string_p->size = (uint16_t) string_size;
utf8_string_p->length = (uint16_t) length;
short_string_p->header.refs_and_container = ECMA_STRING_CONTAINER_HEAP_UTF8_STRING | ECMA_STRING_REF_ONE;
short_string_p->header.u.hash = hash;
short_string_p->size = (uint16_t) string_size;
short_string_p->length = (uint16_t) length;
return (ecma_string_t *) utf8_string_p;
return (ecma_string_t *) short_string_p;
}
ecma_long_string_t *long_string_p = (ecma_long_string_t *) header_p;

View File

@ -2939,7 +2939,7 @@ lexer_construct_regexp_object (parser_context_t *context_p, /**< context */
JERRY_ASSERT (!ECMA_IS_DIRECT_STRING (message_p));
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (message_p) == ECMA_STRING_CONTAINER_HEAP_ASCII_STRING);
buffer_p = ECMA_ASCII_STRING_GET_BUFFER (message_p);
size = ((ecma_ascii_string_t *) message_p)->size;
size = ECMA_ASCII_STRING_GET_SIZE (message_p);
}
uint16_t literal_index = parser_find_string_literal (context_p, &literal_p, buffer_p, size);