Add jerry_string_to_utf8_char_buffer API function. (#1501)

Copy the characters of a string into a specified utf-8 string buffer.

JerryScript-DCO-1.0-Signed-off-by: Robert Sipka rsipka.uszeged@partner.samsung.com
This commit is contained in:
Robert Sipka 2017-01-05 09:35:31 +01:00 committed by GitHub
parent 8ebbfda996
commit 445ca1d6de
9 changed files with 247 additions and 10 deletions

View File

@ -1352,6 +1352,49 @@ jerry_string_to_char_buffer (const jerry_value_t value,
- [jerry_create_string](#jerry_create_string)
- [jerry_get_string_size](#jerry_get_string_size)
## jerry_string_to_utf8_char_buffer
**Summary**
Copy the characters of a string into a specified utf-8 buffer.
The '\0' character could occur in character buffer. Returns 0,
if the value parameter is not a string or the buffer isn't
large enough for the whole string.
**Prototype**
```c
jerry_size_t
jerry_string_to_utf8_char_buffer (const jerry_value_t value,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
```
- `value` - input string value
- `buffer_p` - pointer to output buffer
- `buffer_size` - size of the buffer
- return value - number of bytes, actually copied to the buffer
**Example**
```c
{
jerry_value_t value;
... // create or acquire value
jerry_size_t req_sz = jerry_get_utf8_string_size (value);
jerry_char_t str_buf_p[req_sz];
jerry_string_to_utf8_char_buffer (value, str_buf_p, req_sz);
jerry_release_value (value);
}
```
**See also**
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
# Functions for array object values

View File

@ -858,10 +858,10 @@ ecma_string_get_array_index (const ecma_string_t *str_p) /**< ecma-string */
* @return number of bytes, actually copied to the buffer.
*/
lit_utf8_size_t __attr_return_value_should_be_checked___
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
JERRY_ASSERT (string_desc_p != NULL);
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
@ -908,6 +908,73 @@ ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-s
}
}
JERRY_ASSERT (size <= buffer_size);
return size;
} /* ecma_string_copy_to_cesu8_buffer */
/**
* Convert ecma-string's contents to an utf-8 string and put it to the buffer.
* It is the caller's responsibility to make sure that the string fits in the buffer.
*
* @return number of bytes, actually copied to the buffer.
*/
lit_utf8_size_t __attr_return_value_should_be_checked___
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, /**< ecma-string descriptor */
lit_utf8_byte_t *buffer_p, /**< destination buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
JERRY_ASSERT (string_desc_p != NULL);
JERRY_ASSERT (string_desc_p->refs_and_container >= ECMA_STRING_REF_ONE);
JERRY_ASSERT (buffer_p != NULL || buffer_size == 0);
JERRY_ASSERT (ecma_string_get_utf8_size (string_desc_p) <= buffer_size);
lit_utf8_size_t size;
switch (ECMA_STRING_GET_CONTAINER (string_desc_p))
{
case ECMA_STRING_CONTAINER_HEAP_UTF8_STRING:
{
size = lit_convert_cesu8_string_to_utf8_string ((lit_utf8_byte_t *) (string_desc_p + 1),
string_desc_p->u.utf8_string.size,
buffer_p,
buffer_size);
break;
}
case ECMA_STRING_CONTAINER_HEAP_LONG_UTF8_STRING:
{
size = lit_convert_cesu8_string_to_utf8_string ((lit_utf8_byte_t *) (((ecma_long_string_t *) string_desc_p) + 1),
string_desc_p->u.long_utf8_string_size,
buffer_p,
buffer_size);
break;
}
case ECMA_STRING_CONTAINER_UINT32_IN_DESC:
{
const uint32_t uint32_number = string_desc_p->u.uint32_number;
size = ecma_uint32_to_utf8_string (uint32_number, buffer_p, buffer_size);
break;
}
case ECMA_STRING_CONTAINER_MAGIC_STRING:
{
const lit_magic_string_id_t id = string_desc_p->u.magic_string_id;
size = lit_get_magic_string_size (id);
memcpy (buffer_p, lit_get_magic_string_utf8 (id), size);
break;
}
default:
{
JERRY_ASSERT (ECMA_STRING_GET_CONTAINER (string_desc_p) == ECMA_STRING_CONTAINER_MAGIC_STRING_EX);
const lit_magic_string_ex_id_t id = string_desc_p->u.magic_string_ex_id;
size = lit_convert_cesu8_string_to_utf8_string (lit_get_magic_string_ex_utf8 (id),
lit_get_magic_string_ex_size (id),
buffer_p,
buffer_size);
break;
}
}
JERRY_ASSERT (size <= buffer_size);
return size;
} /* ecma_string_copy_to_utf8_buffer */
@ -923,7 +990,7 @@ ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, /**< ecma-string
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t buffer_size) /**< size of buffer */
{
const lit_utf8_size_t size = ecma_string_copy_to_utf8_buffer (string_desc_p, buffer_p, buffer_size);
const lit_utf8_size_t size = ecma_string_copy_to_cesu8_buffer (string_desc_p, buffer_p, buffer_size);
JERRY_ASSERT (size == buffer_size);
} /* ecma_string_to_utf8_bytes */

View File

@ -178,7 +178,12 @@ ecma_number_t ecma_string_to_number (const ecma_string_t *str_p);
uint32_t ecma_string_get_array_index (const ecma_string_t *str_p);
lit_utf8_size_t __attr_return_value_should_be_checked___
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
ecma_string_copy_to_cesu8_buffer (const ecma_string_t *string_desc_p,
lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
lit_utf8_size_t __attr_return_value_should_be_checked___
ecma_string_copy_to_utf8_buffer (const ecma_string_t *string_desc_p,
lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);
void ecma_string_to_utf8_bytes (const ecma_string_t *string_desc_p, lit_utf8_byte_t *buffer_p,
lit_utf8_size_t buffer_size);

View File

@ -141,7 +141,7 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
JMEM_DEFINE_LOCAL_ARRAY (ret_str_buffer, size, lit_utf8_byte_t);
lit_utf8_byte_t *ret_str_buffer_p = ret_str_buffer;
lit_utf8_size_t bytes = ecma_string_copy_to_utf8_buffer (name_string_p, ret_str_buffer_p, name_size);
lit_utf8_size_t bytes = ecma_string_copy_to_cesu8_buffer (name_string_p, ret_str_buffer_p, name_size);
JERRY_ASSERT (bytes == name_size);
ret_str_buffer_p = ret_str_buffer_p + bytes;
JERRY_ASSERT (ret_str_buffer_p <= ret_str_buffer + size);
@ -156,7 +156,7 @@ ecma_builtin_error_prototype_object_to_string (ecma_value_t this_arg) /**< this
space_size);
JERRY_ASSERT (ret_str_buffer_p <= ret_str_buffer + size);
bytes = ecma_string_copy_to_utf8_buffer (msg_string_p, ret_str_buffer_p, msg_size);
bytes = ecma_string_copy_to_cesu8_buffer (msg_string_p, ret_str_buffer_p, msg_size);
JERRY_ASSERT (bytes == msg_size);
ret_str_buffer_p = ret_str_buffer_p + bytes;
JERRY_ASSERT (ret_str_buffer_p == ret_str_buffer + size);

View File

@ -230,6 +230,9 @@ jerry_size_t jerry_get_utf8_string_size (const jerry_value_t value);
jerry_length_t jerry_get_string_length (const jerry_value_t value);
jerry_length_t jerry_get_utf8_string_length (const jerry_value_t value);
jerry_size_t jerry_string_to_char_buffer (const jerry_value_t value, jerry_char_t *buffer_p, jerry_size_t buffer_size);
jerry_size_t jerry_string_to_utf8_char_buffer (const jerry_value_t value,
jerry_char_t *buffer_p,
jerry_size_t buffer_size);
/**
* Functions for array object values

View File

@ -1160,10 +1160,44 @@ jerry_string_to_char_buffer (const jerry_value_t value, /**< input string value
return 0;
}
return ecma_string_copy_to_cesu8_buffer (str_p,
(lit_utf8_byte_t *) buffer_p,
buffer_size);
} /* jerry_string_to_char_buffer */
/**
* Copy the characters of an utf-8 encoded string into a specified buffer.
*
* Note:
* The '\0' character could occur anywhere in the returned string
* Returns 0, if the value parameter is not a string or the buffer
* is not large enough for the whole string.
*
* @return number of bytes copied to the buffer.
*/
jerry_size_t
jerry_string_to_utf8_char_buffer (const jerry_value_t value, /**< input string value */
jerry_char_t *buffer_p, /**< [out] output characters buffer */
jerry_size_t buffer_size) /**< size of output buffer */
{
jerry_assert_api_available ();
if (!ecma_is_value_string (value) || buffer_p == NULL)
{
return 0;
}
ecma_string_t *str_p = ecma_get_string_from_value (value);
if (ecma_string_get_utf8_size (str_p) > buffer_size)
{
return 0;
}
return ecma_string_copy_to_utf8_buffer (str_p,
(lit_utf8_byte_t *) buffer_p,
buffer_size);
} /* jerry_string_to_char_buffer */
} /* jerry_string_to_utf8_char_buffer */
/**
* Checks whether the object or it's prototype objects have the given property.

View File

@ -778,6 +778,61 @@ lit_code_point_to_utf8 (lit_code_point_t code_point, /**< code point */
}
} /* lit_code_point_to_utf8 */
/**
* Convert cesu-8 string to an utf-8 string and put it into the buffer.
* It is the caller's responsibility to make sure that the string fits in the buffer.
*
* @return number of bytes copied to the buffer.
*/
lit_utf8_size_t
lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string, /**< cesu-8 string */
lit_utf8_size_t cesu8_size, /**< size of cesu-8 string */
lit_utf8_byte_t *utf8_string, /**< destination utf-8 buffer pointer
* (can be NULL if buffer_size == 0) */
lit_utf8_size_t utf8_size) /**< size of utf-8 buffer */
{
const lit_utf8_byte_t *cesu8_pos = cesu8_string;
const lit_utf8_byte_t *cesu8_end_pos = cesu8_string + cesu8_size;
lit_utf8_byte_t *utf8_pos = utf8_string;
lit_utf8_byte_t *utf8_end_pos = utf8_string + utf8_size;
lit_utf8_size_t size = 0;
ecma_char_t prev_ch = 0;
lit_utf8_size_t prev_ch_size = 0;
while (cesu8_pos < cesu8_end_pos)
{
ecma_char_t ch;
lit_utf8_size_t code_unit_size = lit_read_code_unit_from_utf8 (cesu8_pos, &ch);
if (lit_is_code_point_utf16_low_surrogate (ch) && lit_is_code_point_utf16_high_surrogate (prev_ch))
{
JERRY_ASSERT (code_unit_size == prev_ch_size);
utf8_pos -= prev_ch_size;
lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (prev_ch, ch);
lit_code_point_to_utf8 (code_point, utf8_pos);
size++;
}
else
{
memcpy (utf8_pos, cesu8_pos, code_unit_size);
size += code_unit_size;
}
utf8_pos = utf8_string + size;
cesu8_pos += code_unit_size;
prev_ch = ch;
prev_ch_size = code_unit_size;
}
JERRY_ASSERT (cesu8_pos == cesu8_end_pos);
JERRY_ASSERT (utf8_pos <= utf8_end_pos);
return size;
} /* lit_convert_cesu8_string_to_utf8_string */
/**
* Convert surrogate pair to code point
*

View File

@ -114,6 +114,10 @@ lit_utf8_size_t lit_get_unicode_char_size_by_utf8_first_byte (lit_utf8_byte_t fi
lit_utf8_size_t lit_code_unit_to_utf8 (ecma_char_t code_unit, lit_utf8_byte_t *buf_p);
lit_utf8_size_t lit_code_point_to_utf8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
lit_utf8_size_t lit_code_point_to_cesu8 (lit_code_point_t code_point, lit_utf8_byte_t *buf);
lit_utf8_size_t lit_convert_cesu8_string_to_utf8_string (const lit_utf8_byte_t *cesu8_string,
lit_utf8_size_t cesu8_size,
lit_utf8_byte_t *utf8_string,
lit_utf8_size_t utf8_size);
lit_code_point_t lit_convert_surrogate_pair_to_code_point (ecma_char_t high_surrogate, ecma_char_t low_surrogate);
bool lit_compare_utf8_strings_relational (const lit_utf8_byte_t *string1_p, lit_utf8_size_t string1_size,

View File

@ -345,7 +345,7 @@ main (void)
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x90\x90\x80");
args[1] = jerry_create_string ((jerry_char_t *) "\x73\x74\x72\x3a \xed\xa0\x81\xed\xb0\x80");
/* these size must be equal */
/* These sizes must be equal */
utf8_sz = jerry_get_string_size (args[0]);
cesu8_sz = jerry_get_string_size (args[1]);
@ -360,6 +360,26 @@ main (void)
jerry_release_value (args[0]);
jerry_release_value (args[1]);
/* Test jerry_string_to_utf8_char_buffer, test string: 'str: {DESERET CAPITAL LETTER LONG I}' */
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x90\x90\x80");
args[1] = jerry_create_string ((jerry_char_t *) "\x73\x74\x72\x3a \xed\xa0\x81\xed\xb0\x80");
/* These sizes must be equal */
utf8_sz = jerry_get_utf8_string_size (args[0]);
cesu8_sz = jerry_get_utf8_string_size (args[1]);
TEST_ASSERT (utf8_sz == cesu8_sz);
char string_from_utf8_string[utf8_sz];
char string_from_cesu8_string[cesu8_sz];
jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) string_from_utf8_string, utf8_sz);
jerry_string_to_utf8_char_buffer (args[1], (jerry_char_t *) string_from_cesu8_string, cesu8_sz);
TEST_ASSERT (!strncmp (string_from_utf8, string_from_cesu8, utf8_sz));
jerry_release_value (args[0]);
jerry_release_value (args[1]);
/* Test string: 'str: {MATHEMATICAL FRAKTUR SMALL F}{MATHEMATICAL FRAKTUR SMALL G}' */
args[0] = jerry_create_string_from_utf8 ((jerry_char_t *) "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4");
@ -372,6 +392,12 @@ main (void)
TEST_ASSERT (cesu8_length == 10 && utf8_length == 8);
TEST_ASSERT (cesu8_sz != utf8_sz);
TEST_ASSERT (utf8_sz == 14 && cesu8_sz == 18);
char test_string[utf8_sz];
TEST_ASSERT (jerry_string_to_utf8_char_buffer (args[0], (jerry_char_t *) test_string, utf8_sz) == 14);
TEST_ASSERT (!strncmp (test_string, "\x73\x74\x72\x3a \xf0\x9d\x94\xa3 \xf0\x9d\x94\xa4", utf8_sz));
jerry_release_value (args[0]);
/* Test string: 'str: {DESERET CAPITAL LETTER LONG I}' */