mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Add jerry_string_to_utf8_char_buffer API function. (#1501)
Copy the characters of a string into a specified utf-8 string buffer. JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
parent
8ebbfda996
commit
445ca1d6de
@ -1352,6 +1352,49 @@ jerry_string_to_char_buffer (const jerry_value_t value,
|
||||
- [jerry_create_string](#jerry_create_string)
|
||||
- [jerry_get_string_size](#jerry_get_string_size)
|
||||
|
||||
## jerry_string_to_utf8_char_buffer
|
||||
|
||||
**Summary**
|
||||
|
||||
Copy the characters of a string into a specified utf-8 buffer.
|
||||
The '\0' character could occur in character buffer. Returns 0,
|
||||
if the value parameter is not a string or the buffer isn't
|
||||
large enough for the whole string.
|
||||
|
||||
**Prototype**
|
||||
|
||||
```c
|
||||
jerry_size_t
|
||||
jerry_string_to_utf8_char_buffer (const jerry_value_t value,
|
||||
jerry_char_t *buffer_p,
|
||||
jerry_size_t buffer_size);
|
||||
```
|
||||
|
||||
- `value` - input string value
|
||||
- `buffer_p` - pointer to output buffer
|
||||
- `buffer_size` - size of the buffer
|
||||
- return value - number of bytes, actually copied to the buffer
|
||||
|
||||
**Example**
|
||||
|
||||
```c
|
||||
{
|
||||
jerry_value_t value;
|
||||
... // create or acquire value
|
||||
|
||||
jerry_size_t req_sz = jerry_get_utf8_string_size (value);
|
||||
jerry_char_t str_buf_p[req_sz];
|
||||
|
||||
jerry_string_to_utf8_char_buffer (value, str_buf_p, req_sz);
|
||||
|
||||
jerry_release_value (value);
|
||||
}
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
|
||||
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
|
||||
|
||||
# Functions for array object values
|
||||
|
||||
|
||||
@ -858,10 +858,10 @@ ecma_string_get_array_index (const ecma_string_t *str_p) /**< ecma-string */
|
||||
* @return number of bytes, actually copied to the buffer.
|
||||
*/
|
||||
lit_utf8_size_t __attr_return_value_should_be_checked___
|
||||
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
|
||||
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
|
||||
* (can be NULL if buffer_size == 0) */
|
||||
lit_utf8_size_t buffer_size) /**< size of buffer */
|
||||
ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
|
||||
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
|
||||
* (can be NULL if buffer_size == 0) */
|
||||
lit_utf8_size_t buffer_size) /**< size of buffer */
|
||||
{
|
||||
JERRY_ASSERT (string_desc_p != NULL);
|
||||
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
|
||||
@ -908,6 +908,73 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
|
||||
}
|
||||
}
|
||||
|
||||
JERRY_ASSERT (size <= buffer_size);
|
||||
return size;
|
||||
} /* ecma_string_copy_to_cesu8_buffer */
|
||||
|
||||
/**
|
||||
* Convert ecma-string's contents to an utf-8 string and put it to the buffer.
|
||||
* It is the caller's responsibility to make sure that the string fits in the buffer.
|
||||
*
|
||||
* @return number of bytes, actually copied to the buffer.
|
||||
*/
|
||||
lit_utf8_size_t __attr_return_value_should_be_checked___
|
||||
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
|
||||
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
|
||||
* (can be NULL if buffer_size == 0) */
|
||||
lit_utf8_size_t buffer_size) /**< size of buffer */
|
||||
{
|
||||
JERRY_ASSERT (string_desc_p != NULL);
|
||||
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
|
||||
JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
|
||||
JERRY_ASSERT (ecma_string_get_utf8_size (string_desc_p) <= buffer_size);
|
||||
|
||||
lit_utf8_size_t size;
|
||||
|
||||
switch (ECMA_STRING_GET_CONTAINER (string_desc_p))
|
||||
{
|
||||
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
|
||||
{
|
||||
size = lit_convert_cesu8_string_to_utf8_string ((lit_utf8_byte_t *) (string_desc_p + 1),
|
||||
string_desc_p->u.utf8_string.size,
|
||||
buffer_p,
|
||||
buffer_size);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
|
||||
{
|
||||
size = lit_convert_cesu8_string_to_utf8_string ((lit_utf8_byte_t *) (((ecma_long_string_t *) string_desc_p) + 1),
|
||||
string_desc_p->u.long_utf8_string_size,
|
||||
buffer_p,
|
||||
buffer_size);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
|
||||
{
|
||||
const uint32_t uint32_number = string_desc_p->u.uint32_number;
|
||||
size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
|
||||
break;
|
||||
}
|
||||
case ECMA_STRING_CONTAINER_MAGIC_STRING:
|
||||
{
|
||||
const lit_magic_string_id_t id = string_desc_p->u.magic_string_id;
|
||||
size = lit_get_magic_string_size (id);
|
||||
memcpy (buffer_p, lit_get_magic_string_utf8 (id), size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_desc_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
|
||||
|
||||
const lit_magic_string_ex_id_t id = string_desc_p->u.magic_string_ex_id;
|
||||
size = lit_convert_cesu8_string_to_utf8_string (lit_get_magic_string_ex_utf8 (id),
|
||||
lit_get_magic_string_ex_size (id),
|
||||
buffer_p,
|
||||
buffer_size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
JERRY_ASSERT (size <= buffer_size);
|
||||
return size;
|
||||
} /* ecma_string_copy_to_utf8_buffer */
|
||||
@ -923,7 +990,7 @@ ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, /**< ecma-string
|
||||
* (can be NULL if buffer_size == 0) */
|
||||
lit_utf8_size_t buffer_size) /**< size of buffer */
|
||||
{
|
||||
const lit_utf8_size_t size = ecma_string_copy_to_utf8_buffer (string_desc_p, buffer_p, buffer_size);
|
||||
const lit_utf8_size_t size = ecma_string_copy_to_cesu8_buffer (string_desc_p, buffer_p, buffer_size);
|
||||
JERRY_ASSERT (size == buffer_size);
|
||||
} /* ecma_string_to_utf8_bytes */
|
||||
|
||||
|
||||
@ -178,7 +178,12 @@ ecma_number_t ecma_string_to_number (const ecma_string_t *str_p);
|
||||
uint32_t ecma_string_get_array_index (const ecma_string_t *str_p);
|
||||
|
||||
lit_utf8_size_t __attr_return_value_should_be_checked___
|
||||
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
|
||||
ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p,
|
||||
lit_utf8_byte_t *buffer_p,
|
||||
lit_utf8_size_t buffer_size);
|
||||
lit_utf8_size_t __attr_return_value_should_be_checked___
|
||||
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p,
|
||||
lit_utf8_byte_t *buffer_p,
|
||||
lit_utf8_size_t buffer_size);
|
||||
void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
|
||||
lit_utf8_size_t buffer_size);
|
||||
|
||||
@ -141,7 +141,7 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
|
||||
JMEM_DEFINE_LOCAL_ARRAY (ret_str_buffer, size, lit_utf8_byte_t);
|
||||
lit_utf8_byte_t *ret_str_buffer_p = ret_str_buffer;
|
||||
|
||||
lit_utf8_size_t bytes = ecma_string_copy_to_utf8_buffer (name_string_p, ret_str_buffer_p, name_size);
|
||||
lit_utf8_size_t bytes = ecma_string_copy_to_cesu8_buffer (name_string_p, ret_str_buffer_p, name_size);
|
||||
JERRY_ASSERT (bytes == name_size);
|
||||
ret_str_buffer_p = ret_str_buffer_p + bytes;
|
||||
JERRY_ASSERT (ret_str_buffer_p <= ret_str_buffer + size);
|
||||
@ -156,7 +156,7 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
|
||||
space_size);
|
||||
JERRY_ASSERT (ret_str_buffer_p <= ret_str_buffer + size);
|
||||
|
||||
bytes = ecma_string_copy_to_utf8_buffer (msg_string_p, ret_str_buffer_p, msg_size);
|
||||
bytes = ecma_string_copy_to_cesu8_buffer (msg_string_p, ret_str_buffer_p, msg_size);
|
||||
JERRY_ASSERT (bytes == msg_size);
|
||||
ret_str_buffer_p = ret_str_buffer_p + bytes;
|
||||
JERRY_ASSERT (ret_str_buffer_p == ret_str_buffer + size);
|
||||
|
||||
@ -230,6 +230,9 @@ jerry_size_t jerry_get_utf8_string_size (const jerry_value_t value);
|
||||
jerry_length_t jerry_get_string_length (const jerry_value_t value);
|
||||
jerry_length_t jerry_get_utf8_string_length (const jerry_value_t value);
|
||||
jerry_size_t jerry_string_to_char_buffer (const jerry_value_t value, jerry_char_t *buffer_p, jerry_size_t buffer_size);
|
||||
jerry_size_t jerry_string_to_utf8_char_buffer (const jerry_value_t value,
|
||||
jerry_char_t *buffer_p,
|
||||
jerry_size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Functions for array object values
|
||||
|
||||
@ -1160,10 +1160,44 @@ jerry_string_to_char_buffer (const jerry_value_t value, /**< input string value
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ecma_string_copy_to_cesu8_buffer (str_p,
|
||||
(lit_utf8_byte_t *) buffer_p,
|
||||
buffer_size);
|
||||
} /* jerry_string_to_char_buffer */
|
||||
|
||||
/**
|
||||
* Copy the characters of an utf-8 encoded string into a specified buffer.
|
||||
*
|
||||
* Note:
|
||||
* The '\0' character could occur anywhere in the returned string
|
||||
* Returns 0, if the value parameter is not a string or the buffer
|
||||
* is not large enough for the whole string.
|
||||
*
|
||||
* @return number of bytes copied to the buffer.
|
||||
*/
|
||||
jerry_size_t
|
||||
jerry_string_to_utf8_char_buffer (const jerry_value_t value, /**< input string value */
|
||||
jerry_char_t *buffer_p, /**< [out] output characters buffer */
|
||||
jerry_size_t buffer_size) /**< size of output buffer */
|
||||
{
|
||||
jerry_assert_api_available ();
|
||||
|
||||
if (!ecma_is_value_string (value) || buffer_p == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
ecma_string_t *str_p = ecma_get_string_from_value (value);
|
||||
|
||||
if (ecma_string_get_utf8_size (str_p) > buffer_size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ecma_string_copy_to_utf8_buffer (str_p,
|
||||
(lit_utf8_byte_t *) buffer_p,
|
||||
buffer_size);
|
||||
} /* jerry_string_to_char_buffer */
|
||||
} /* jerry_string_to_utf8_char_buffer */
|
||||
|
||||
/**
|
||||
* Checks whether the object or it's prototype objects have the given property.
|
||||
|
||||
@ -778,6 +778,61 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
|
||||
}
|
||||
} /* lit_code_point_to_utf8 */
|
||||
|
||||
/**
|
||||
* Convert cesu-8 string to an utf-8 string and put it into the buffer.
|
||||
* It is the caller's responsibility to make sure that the string fits in the buffer.
|
||||
*
|
||||
* @return number of bytes copied to the buffer.
|
||||
*/
|
||||
lit_utf8_size_t
|
||||
lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string, /**< cesu-8 string */
|
||||
lit_utf8_size_t cesu8_size, /**< size of cesu-8 string */
|
||||
lit_utf8_byte_t *utf8_string, /**< destination utf-8 buffer pointer
|
||||
* (can be NULL if buffer_size == 0) */
|
||||
lit_utf8_size_t utf8_size) /**< size of utf-8 buffer */
|
||||
{
|
||||
const lit_utf8_byte_t *cesu8_pos = cesu8_string;
|
||||
const lit_utf8_byte_t *cesu8_end_pos = cesu8_string + cesu8_size;
|
||||
|
||||
lit_utf8_byte_t *utf8_pos = utf8_string;
|
||||
lit_utf8_byte_t *utf8_end_pos = utf8_string + utf8_size;
|
||||
|
||||
lit_utf8_size_t size = 0;
|
||||
|
||||
ecma_char_t prev_ch = 0;
|
||||
lit_utf8_size_t prev_ch_size = 0;
|
||||
|
||||
while (cesu8_pos < cesu8_end_pos)
|
||||
{
|
||||
ecma_char_t ch;
|
||||
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
|
||||
|
||||
if (lit_is_code_point_utf16_low_surrogate (ch) && lit_is_code_point_utf16_high_surrogate (prev_ch))
|
||||
{
|
||||
JERRY_ASSERT (code_unit_size == prev_ch_size);
|
||||
utf8_pos -= prev_ch_size;
|
||||
lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_ch, ch);
|
||||
lit_code_point_to_utf8 (code_point, utf8_pos);
|
||||
size++;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy (utf8_pos, cesu8_pos, code_unit_size);
|
||||
size += code_unit_size;
|
||||
}
|
||||
|
||||
utf8_pos = utf8_string + size;
|
||||
cesu8_pos += code_unit_size;
|
||||
prev_ch = ch;
|
||||
prev_ch_size = code_unit_size;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (cesu8_pos == cesu8_end_pos);
|
||||
JERRY_ASSERT (utf8_pos <= utf8_end_pos);
|
||||
|
||||
return size;
|
||||
} /* lit_convert_cesu8_string_to_utf8_string */
|
||||
|
||||
/**
|
||||
* Convert surrogate pair to code point
|
||||
*
|
||||
|
||||
@ -114,6 +114,10 @@ lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t fi
|
||||
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t code_unit, lit_utf8_byte_t *buf_p);
|
||||
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
|
||||
lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
|
||||
lit_utf8_size_t lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string,
|
||||
lit_utf8_size_t cesu8_size,
|
||||
lit_utf8_byte_t *utf8_string,
|
||||
lit_utf8_size_t utf8_size);
|
||||
lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, ecma_char_t low_surrogate);
|
||||
|
||||
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, lit_utf8_size_t string1_size,
|
||||
|
||||
@ -345,7 +345,7 @@ main (void)
|
||||
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x90\x90\x80");
|
||||
args[1] = jerry_create_string ((jerry_char_t *) "\x73\x74\x72\x3a \xed\xa0\x81\xed\xb0\x80");
|
||||
|
||||
/* these size must be equal */
|
||||
/* These sizes must be equal */
|
||||
utf8_sz = jerry_get_string_size (args[0]);
|
||||
cesu8_sz = jerry_get_string_size (args[1]);
|
||||
|
||||
@ -360,6 +360,26 @@ main (void)
|
||||
jerry_release_value (args[0]);
|
||||
jerry_release_value (args[1]);
|
||||
|
||||
/* Test jerry_string_to_utf8_char_buffer, test string: 'str: {DESERET CAPITAL LETTER LONG I}' */
|
||||
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x90\x90\x80");
|
||||
args[1] = jerry_create_string ((jerry_char_t *) "\x73\x74\x72\x3a \xed\xa0\x81\xed\xb0\x80");
|
||||
|
||||
/* These sizes must be equal */
|
||||
utf8_sz = jerry_get_utf8_string_size (args[0]);
|
||||
cesu8_sz = jerry_get_utf8_string_size (args[1]);
|
||||
|
||||
TEST_ASSERT (utf8_sz == cesu8_sz);
|
||||
|
||||
char string_from_utf8_string[utf8_sz];
|
||||
char string_from_cesu8_string[cesu8_sz];
|
||||
|
||||
jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) string_from_utf8_string, utf8_sz);
|
||||
jerry_string_to_utf8_char_buffer (args[1], (jerry_char_t *) string_from_cesu8_string, cesu8_sz);
|
||||
|
||||
TEST_ASSERT (!strncmp (string_from_utf8, string_from_cesu8, utf8_sz));
|
||||
jerry_release_value (args[0]);
|
||||
jerry_release_value (args[1]);
|
||||
|
||||
/* Test string: 'str: {MATHEMATICAL FRAKTUR SMALL F}{MATHEMATICAL FRAKTUR SMALL G}' */
|
||||
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4");
|
||||
|
||||
@ -372,6 +392,12 @@ main (void)
|
||||
TEST_ASSERT (cesu8_length == 10 && utf8_length == 8);
|
||||
TEST_ASSERT (cesu8_sz != utf8_sz);
|
||||
TEST_ASSERT (utf8_sz == 14 && cesu8_sz == 18);
|
||||
|
||||
char test_string[utf8_sz];
|
||||
|
||||
TEST_ASSERT (jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) test_string, utf8_sz) == 14);
|
||||
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", utf8_sz));
|
||||
|
||||
jerry_release_value (args[0]);
|
||||
|
||||
/* Test string: 'str: {DESERET CAPITAL LETTER LONG I}' */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user