Add new input validator API functions (#1576)

Fixes #1549

JerryScript-DCO-1.0-Signed-off-by: László Langó llango.u-szeged@partner.samsung.com
This commit is contained in:
László Langó 2017-02-16 18:31:04 +01:00 committed by Dániel Bátyai
parent 93eb35081f
commit 799726aa42
10 changed files with 193 additions and 22 deletions

View File

@ -1233,6 +1233,7 @@ jerry_get_string_size (const jerry_value_t value);
- [jerry_create_string](#jerry_create_string)
- [jerry_get_string_length](#jerry_get_string_length)
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
## jerry_get_utf8_string_size
@ -1272,6 +1273,8 @@ jerry_get_utf8_string_size (const jerry_value_t value);
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_get_utf8_string_length](#jerry_get_utf8_string_length)
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
## jerry_get_string_length
@ -1308,6 +1311,8 @@ jerry_get_string_length (const jerry_value_t value);
- [jerry_create_string](#jerry_create_string)
- [jerry_get_string_size](#jerry_get_string_size)
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
## jerry_get_utf8_string_length
@ -1347,6 +1352,8 @@ jerry_get_utf8_string_length (const jerry_value_t value);
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
## jerry_string_to_char_buffer
@ -1394,6 +1401,8 @@ jerry_string_to_char_buffer (const jerry_value_t value,
- [jerry_create_string](#jerry_create_string)
- [jerry_get_string_size](#jerry_get_string_size)
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
## jerry_string_to_utf8_char_buffer
@ -1441,6 +1450,8 @@ jerry_string_to_utf8_char_buffer (const jerry_value_t value,
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
## jerry_substring_to_char_buffer
@ -1496,6 +1507,8 @@ jerry_substring_to_char_buffer (const jerry_value_t value,
- [jerry_create_string](#jerry_create_string)
- [jerry_get_string_size](#jerry_get_string_size)
- [jerry_get_string_length](#jerry_get_string_length)
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
## jerry_substring_to_utf8_char_buffer
@ -1548,9 +1561,12 @@ jerry_substring_to_utf8_char_buffer (const jerry_value_t value,
**See also**
- [jerry_create_string_from_utf8](#jerry_create_string)
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
- [jerry_get_utf8_string_length](#jerry_get_utf8_string_length)
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
# Functions for array object values
## jerry_get_array_length
@ -2260,6 +2276,7 @@ jerry_create_string (const jerry_char_t *str_p);
**See also**
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
- [jerry_create_string_sz](#jerry_create_string_sz)
@ -2298,8 +2315,10 @@ jerry_create_string_sz (const jerry_char_t *str_p,
**See also**
- [jerry_is_valid_cesu8_string](#jerry_is_valid_cesu8_string)
- [jerry_create_string](#jerry_create_string)
## jerry_create_string_from_utf8
**Summary**
@ -2333,6 +2352,7 @@ jerry_create_string_from_utf8 (const jerry_char_t *str_p);
**See also**
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
- [jerry_create_string_sz_from_utf8](#jerry_create_string_sz_from_utf8)
@ -2373,8 +2393,10 @@ jerry_create_string_sz (const jerry_char_t *str_p,
**See also**
- [jerry_is_valid_utf8_string](#jerry_is_valid_utf8_string)
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
## jerry_create_undefined
**Summary**
@ -3287,6 +3309,99 @@ bool foreach_function (const jerry_value_t prop_name,
- [jerry_object_property_foreach_t](#jerry_object_property_foreach_t)
# Input validator functions
## jerry_is_valid_utf8_string
**Summary**
Validate UTF-8 string.
**Prototype**
```c
bool
jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, /**< UTF-8 string */
jerry_size_t buf_size) /**< string size */
```
- `utf8_buf_p` - UTF-8 input string
- `buf_size` - input string size
**Example**
```c
{
const jerry_char_t script[] = "print ('Hello, World!');";
size_t script_size = strlen ((const char *) script);
if (jerry_is_valid_utf8_string (script, (jerry_size_t) script_size))
{
jerry_run_simple (script, script_size, JERRY_INIT_EMPTY);
}
}
```
**See also**
- [jerry_run_simple](#jerry_run_simple)
- [jerry_create_string_from_utf8](#jerry_create_string_from_utf8)
- [jerry_create_string_sz_from_utf8](#jerry_create_string_sz_from_utf8)
- [jerry_get_utf8_string_size](#jerry_get_utf8_string_size)
- [jerry_get_utf8_string_length](#jerry_get_utf8_string_length)
- [jerry_string_to_utf8_char_buffer](#jerry_string_to_utf8_char_buffer)
- [jerry_substring_to_utf8_char_buffer](#jerry_substring_to_utf8_char_buffer)
## jerry_is_valid_cesu8_string
**Summary**
Validate CESU-8 string.
**Prototype**
```c
bool
jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, /**< CESU-8 string */
jerry_size_t buf_size) /**< string size */
```
- `cesu8_buf_p` - CESU-8 input string
- `buf_size` - input string size
**Example**
```c
{
jerry_init (JERRY_INIT_EMPTY);
const jerry_char_t script[] = "Hello, World!";
size_t script_size = strlen ((const char *) script);
if (jerry_is_valid_cesu8_string (script, (jerry_size_t) script_size))
{
jerry_value_t string_value = jerry_create_string_sz (script,
(jerry_size_t) script_size));
... // usage of string_value
jerry_release_value (string_value);
}
jerry_cleanup ();
}
```
**See also**
- [jerry_create_string](#jerry_create_string)
- [jerry_create_string_sz](#jerry_create_string_sz)
- [jerry_get_string_size](#jerry_get_string_size)
- [jerry_get_string_length](#jerry_get_string_length)
- [jerry_string_to_char_buffer](#jerry_string_to_char_buffer)
- [jerry_substring_to_char_buffer](#jerry_substring_to_char_buffer)
# Snapshot functions
## jerry_parse_and_save_snapshot

View File

@ -151,7 +151,7 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri
lit_utf8_size_t string_size) /**< string size */
{
JERRY_ASSERT (string_p != NULL || string_size == 0);
JERRY_ASSERT (lit_is_cesu8_string_valid (string_p, string_size));
JERRY_ASSERT (lit_is_valid_cesu8_string (string_p, string_size));
lit_magic_string_id_t magic_string_id = lit_is_utf8_string_magic (string_p, string_size);
@ -269,7 +269,7 @@ ecma_new_ecma_string_from_utf8_converted_to_cesu8 (const lit_utf8_byte_t *string
{
converted_string_size += string_size;
JERRY_ASSERT (lit_is_utf8_string_valid (string_p, string_size));
JERRY_ASSERT (lit_is_valid_utf8_string (string_p, string_size));
lit_utf8_byte_t *data_p;

View File

@ -899,7 +899,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri, /**< uri argumen
}
if (!is_valid
|| !lit_is_utf8_string_valid (octets, bytes_count))
|| !lit_is_valid_utf8_string (octets, bytes_count))
{
ret_value = ecma_raise_uri_error (ECMA_ERR_MSG ("Invalid UTF8 string."));
break;
@ -923,7 +923,7 @@ ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri, /**< uri argumen
{
JERRY_ASSERT (output_start_p + output_size == output_char_p);
if (lit_is_cesu8_string_valid (output_start_p, output_size))
if (lit_is_valid_cesu8_string (output_start_p, output_size))
{
ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_size);
ret_value = ecma_make_string_value (output_string_p);

View File

@ -982,7 +982,7 @@ jerry_create_object (void)
} /* jerry_create_object */
/**
* Create string from a valid UTF8 string
* Create string from a valid UTF-8 string
*
* Note:
* returned value must be freed with jerry_release_value when it is no longer needed.
@ -996,7 +996,7 @@ jerry_create_string_from_utf8 (const jerry_char_t *str_p) /**< pointer to string
} /* jerry_create_string_from_utf8 */
/**
* Create string from a valid UTF8 string
* Create string from a valid UTF-8 string
*
* Note:
* returned value must be freed with jerry_release_value when it is no longer needed.
@ -1016,7 +1016,7 @@ jerry_create_string_sz_from_utf8 (const jerry_char_t *str_p, /**< pointer to str
} /* jerry_create_string_sz_from_utf8 */
/**
* Create string from a valid CESU8 string
* Create string from a valid CESU-8 string
*
* Note:
* returned value must be freed with jerry_release_value, when it is no longer needed.
@ -1030,7 +1030,7 @@ jerry_create_string (const jerry_char_t *str_p) /**< pointer to string */
} /* jerry_create_string */
/**
* Create string from a valid CESU8 string
* Create string from a valid CESU-8 string
*
* Note:
* returned value must be freed with jerry_release_value when it is no longer needed.
@ -2006,6 +2006,34 @@ jerry_foreach_object_property (const jerry_value_t obj_val, /**< object value */
return false;
} /* jerry_foreach_object_property */
/**
* Validate UTF-8 string
*
* @return true - if UTF-8 string is well-formed
* false - otherwise
*/
bool
jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, /**< UTF-8 string */
jerry_size_t buf_size) /**< string size */
{
return lit_is_valid_utf8_string ((lit_utf8_byte_t *) utf8_buf_p,
(lit_utf8_size_t) buf_size);
} /* jerry_is_valid_utf8_string */
/**
* Validate CESU-8 string
*
* @return true - if CESU-8 string is well-formed
* false - otherwise
*/
bool
jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, /**< CESU-8 string */
jerry_size_t buf_size) /**< string size */
{
return lit_is_valid_cesu8_string ((lit_utf8_byte_t *) cesu8_buf_p,
(lit_utf8_size_t) buf_size);
} /* jerry_is_valid_cesu8_string */
/**
* @}
*/

View File

@ -329,6 +329,12 @@ void jerry_set_object_native_handle (const jerry_value_t obj_val, uintptr_t hand
bool jerry_foreach_object_property (const jerry_value_t obj_val, jerry_object_property_foreach_t foreach_p,
void *user_data_p);
/**
* Input validator functions
*/
bool jerry_is_valid_utf8_string (const jerry_char_t *utf8_buf_p, jerry_size_t buf_size);
bool jerry_is_valid_cesu8_string (const jerry_char_t *cesu8_buf_p, jerry_size_t buf_size);
/**
* Snapshot functions
*/

View File

@ -28,7 +28,7 @@
* false otherwise
*/
bool
lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_utf8_size_t buf_size) /**< string size */
{
lit_utf8_size_t idx = 0;
@ -116,7 +116,7 @@ lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string *
}
return true;
} /* lit_is_utf8_string_valid */
} /* lit_is_valid_utf8_string */
/**
* Validate cesu-8 string
@ -125,14 +125,14 @@ lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string *
* false otherwise
*/
bool
lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string */
lit_is_valid_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, /**< cesu-8 string */
lit_utf8_size_t buf_size) /**< string size */
{
lit_utf8_size_t idx = 0;
while (idx < buf_size)
{
lit_utf8_byte_t c = utf8_buf_p[idx++];
lit_utf8_byte_t c = cesu8_buf_p[idx++];
if ((c & LIT_UTF8_1_BYTE_MASK) == LIT_UTF8_1_BYTE_MARKER)
{
continue;
@ -166,7 +166,7 @@ lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string
for (lit_utf8_size_t offset = 0; offset < extra_bytes_count; ++offset)
{
c = utf8_buf_p[idx + offset];
c = cesu8_buf_p[idx + offset];
if ((c & LIT_UTF8_EXTRA_BYTE_MASK) != LIT_UTF8_EXTRA_BYTE_MARKER)
{
/* invalid continuation byte */
@ -186,7 +186,7 @@ lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, /**< utf-8 string
}
return true;
} /* lit_is_cesu8_string_valid */
} /* lit_is_valid_cesu8_string */
/**
* Check if the code point is UTF-16 low surrogate

View File

@ -85,8 +85,8 @@
#define LIT_UTF8_FIRST_BYTE_MAX LIT_UTF8_5_BYTE_MARKER
/* validation */
bool lit_is_utf8_string_valid (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size);
bool lit_is_cesu8_string_valid (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size);
bool lit_is_valid_utf8_string (const lit_utf8_byte_t *utf8_buf_p, lit_utf8_size_t buf_size);
bool lit_is_valid_cesu8_string (const lit_utf8_byte_t *cesu8_buf_p, lit_utf8_size_t buf_size);
/* checks */
bool lit_is_code_point_utf16_low_surrogate (lit_code_point_t code_point);

View File

@ -663,6 +663,12 @@ main (int argc,
break;
}
if (!jerry_is_valid_utf8_string (source_p, (jerry_size_t) source_size))
{
ret_value = jerry_create_error (JERRY_ERROR_COMMON, (jerry_char_t *) ("Input must be a valid UTF-8 string."));
break;
}
if (jerry_is_feature_enabled (JERRY_FEATURE_SNAPSHOT_SAVE) && (is_save_snapshot_mode || is_save_literals_mode))
{
static uint8_t snapshot_save_buffer[ JERRY_BUFFER_SIZE ];
@ -686,6 +692,7 @@ main (int argc,
fclose (snapshot_file_p);
}
}
if (!jerry_value_has_error_flag (ret_value) && is_save_literals_mode)
{
const size_t literal_buffer_size = jerry_parse_and_save_literals ((jerry_char_t *) source_p,

View File

@ -0,0 +1,15 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
va'Ôc=

View File

@ -87,7 +87,7 @@ generate_cesu8_string (lit_utf8_byte_t *buf_p,
lit_utf8_size_t bytes_generated = generate_cesu8_char (char_size, buf_p);
TEST_ASSERT (lit_is_cesu8_string_valid (buf_p, bytes_generated));
TEST_ASSERT (lit_is_valid_cesu8_string (buf_p, bytes_generated));
size += bytes_generated;
buf_p += bytes_generated;
@ -183,19 +183,19 @@ main ()
/* Overlong-encoded code point */
lit_utf8_byte_t invalid_cesu8_string_1[] = {0xC0, 0x82};
TEST_ASSERT (!lit_is_cesu8_string_valid (invalid_cesu8_string_1, sizeof (invalid_cesu8_string_1)));
TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_1, sizeof (invalid_cesu8_string_1)));
/* Overlong-encoded code point */
lit_utf8_byte_t invalid_cesu8_string_2[] = {0xE0, 0x80, 0x81};
TEST_ASSERT (!lit_is_cesu8_string_valid (invalid_cesu8_string_2, sizeof (invalid_cesu8_string_2)));
TEST_ASSERT (!lit_is_valid_cesu8_string (invalid_cesu8_string_2, sizeof (invalid_cesu8_string_2)));
/* Pair of surrogates: 0xD901 0xDFF0 which encode Unicode character 0x507F0 */
lit_utf8_byte_t invalid_cesu8_string_3[] = {0xED, 0xA4, 0x81, 0xED, 0xBF, 0xB0};
TEST_ASSERT (lit_is_cesu8_string_valid (invalid_cesu8_string_3, sizeof (invalid_cesu8_string_3)));
TEST_ASSERT (lit_is_valid_cesu8_string (invalid_cesu8_string_3, sizeof (invalid_cesu8_string_3)));
/* Isolated high surrogate 0xD901 */
lit_utf8_byte_t valid_utf8_string_1[] = {0xED, 0xA4, 0x81};
TEST_ASSERT (lit_is_cesu8_string_valid (valid_utf8_string_1, sizeof (valid_utf8_string_1)));
TEST_ASSERT (lit_is_valid_cesu8_string (valid_utf8_string_1, sizeof (valid_utf8_string_1)));
lit_utf8_byte_t res_buf[3];
lit_utf8_size_t res_size;