From bf780ad3dc6fe336d6741a8cdc9d6d4f51f3e95e Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Tue, 25 Aug 2015 05:35:19 -0700 Subject: [PATCH] Global object escape routine. JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg@inf.u-szeged.hu --- .../builtin-objects/ecma-builtin-global.cpp | 163 +++++++++++++++++- .../builtin-objects/ecma-builtin-global.inc.h | 4 + jerry-core/lit/lit-magic-strings.inc.h | 1 + tests/jerry/global-escaping.js | 31 ++++ 4 files changed, 191 insertions(+), 8 deletions(-) create mode 100644 tests/jerry/global-escaping.js diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp b/jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp index e4d973a94..7f40d9fbd 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.cpp @@ -731,7 +731,7 @@ ecma_builtin_global_object_is_finite (ecma_value_t this_arg __attr_unused___, /* */ static bool ecma_builtin_global_object_character_is_in (uint32_t character, /**< character */ - uint8_t *bitset) /**< character set */ + const uint8_t *bitset) /**< character set */ { JERRY_ASSERT (character < 128); return (bitset[character >> 3] & (1 << (character & 0x7))) != 0; @@ -742,7 +742,7 @@ ecma_builtin_global_object_character_is_in (uint32_t character, /**< character * * One bit for each character between 0 - 127. * Bit is set if the character is in the unescaped URI set. */ -static uint8_t unescaped_uri_set[16] = +static const uint8_t unescaped_uri_set[16] = { 0x0, 0x0, 0x0, 0x0, 0xda, 0xff, 0xff, 0xaf, 0xff, 0xff, 0xff, 0x87, 0xfe, 0xff, 0xff, 0x47 @@ -753,7 +753,7 @@ static uint8_t unescaped_uri_set[16] = * One bit for each character between 0 - 127. * Bit is set if the character is in the unescaped component URI set. */ -static uint8_t unescaped_uri_component_set[16] = +static const uint8_t unescaped_uri_component_set[16] = { 0x0, 0x0, 0x0, 0x0, 0x82, 0x67, 0xff, 0x3, 0xfe, 0xff, 0xff, 0x87, 0xfe, 0xff, 0xff, 0x47 @@ -779,7 +779,7 @@ static uint8_t unescaped_uri_component_set[16] = */ static ecma_completion_value_t ecma_builtin_global_object_decode_uri_helper (ecma_value_t uri __attr_unused___, /**< uri argument */ - uint8_t *reserved_uri_bitset) /**< reserved characters bitset */ + const uint8_t *reserved_uri_bitset) /**< reserved characters bitset */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); @@ -1011,7 +1011,7 @@ ecma_builtin_global_object_byte_to_hex (lit_utf8_byte_t *dest_p, /**< destinatio { JERRY_ASSERT (byte < 256); - dest_p[0] = '%'; + dest_p[0] = LIT_CHAR_PERCENT; ecma_char_t hex_digit = (ecma_char_t) (byte >> 4); dest_p[1] = (lit_utf8_byte_t) ((hex_digit > 9) ? (hex_digit + ('A' - 10)) : (hex_digit + '0')); hex_digit = (lit_utf8_byte_t) (byte & 0xf); @@ -1026,7 +1026,7 @@ ecma_builtin_global_object_byte_to_hex (lit_utf8_byte_t *dest_p, /**< destinatio */ static ecma_completion_value_t ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argument */ - uint8_t* unescaped_uri_bitset) /**< unescaped bitset */ + const uint8_t *unescaped_uri_bitset_p) /**< unescaped bitset */ { ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); @@ -1066,7 +1066,7 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen /* Input validation. */ if (*input_char_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) { - if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset)) + if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset_p)) { output_length++; } @@ -1114,7 +1114,7 @@ ecma_builtin_global_object_encode_uri_helper (ecma_value_t uri, /**< uri argumen if (*input_char_p <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) { - if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset)) + if (ecma_builtin_global_object_character_is_in (*input_char_p, unescaped_uri_bitset_p)) { *output_char_p++ = *input_char_p; } @@ -1180,6 +1180,153 @@ ecma_builtin_global_object_encode_uri_component (ecma_value_t this_arg __attr_un return ecma_builtin_global_object_encode_uri_helper (uri_component, unescaped_uri_component_set); } /* ecma_builtin_global_object_encode_uri_component */ +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN + +/* + * Maximum value of a byte. + */ +#define ECMA_ESCAPE_MAXIMUM_BYTE_VALUE (255) + +/* + * Format is a percent sign followed by lowercase u and four hex digits. + */ +#define ECMA_ESCAPE_ENCODED_UNICODE_CHARACTER_SIZE (6) + +/* + * Escape characters bitset: + * One bit for each character between 0 - 127. + * Bit is set if the character does not need to be converted to %xx form. + * These characters are: a-z A-Z 0-9 @ * _ + - . / + */ +static const uint8_t ecma_escape_set[16] = +{ + 0x0, 0x0, 0x0, 0x0, 0x0, 0xec, 0xff, 0x3, + 0xff, 0xff, 0xff, 0x87, 0xfe, 0xff, 0xff, 0x7 +}; + +/** + * The Global object's 'escape' routine + * + * See also: + * ECMA-262 v5, B.2.1 + * + * @return completion value + * Returned value must be freed with ecma_free_completion_value. + */ +static ecma_completion_value_t +ecma_builtin_global_object_escape (ecma_value_t this_arg __attr_unused___, /**< this argument */ + ecma_value_t arg) /**< routine's first argument */ +{ + ecma_completion_value_t ret_value = ecma_make_empty_completion_value (); + + ECMA_TRY_CATCH (string, + ecma_op_to_string (arg), + ret_value); + + ecma_string_t *input_string_p = ecma_get_string_from_value (string); + lit_utf8_size_t input_size = ecma_string_get_size (input_string_p); + + MEM_DEFINE_LOCAL_ARRAY (input_start_p, + input_size, + lit_utf8_byte_t); + + ecma_string_to_utf8_string (input_string_p, + input_start_p, + (ssize_t) (input_size)); + + /* + * The escape routine has two major phases: first we compute + * the length of the output, then we encode the input. + */ + lit_utf8_iterator_t iterator = lit_utf8_iterator_create (input_start_p, input_size); + lit_utf8_size_t output_length = 0; + + while (!lit_utf8_iterator_is_eos (&iterator)) + { + ecma_char_t chr = lit_utf8_iterator_read_next (&iterator); + + if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) + { + if (ecma_builtin_global_object_character_is_in ((uint32_t) chr, ecma_escape_set)) + { + output_length++; + } + else + { + output_length += URI_ENCODED_BYTE_SIZE; + } + } + else if (chr > ECMA_ESCAPE_MAXIMUM_BYTE_VALUE) + { + output_length += ECMA_ESCAPE_ENCODED_UNICODE_CHARACTER_SIZE; + } + else + { + output_length += URI_ENCODED_BYTE_SIZE; + } + } + + MEM_DEFINE_LOCAL_ARRAY (output_start_p, + output_length, + lit_utf8_byte_t); + + lit_utf8_byte_t *output_char_p = output_start_p; + + lit_utf8_iterator_seek_bos (&iterator); + + while (!lit_utf8_iterator_is_eos (&iterator)) + { + ecma_char_t chr = lit_utf8_iterator_read_next (&iterator); + + if (chr <= LIT_UTF8_1_BYTE_CODE_POINT_MAX) + { + if (ecma_builtin_global_object_character_is_in ((uint32_t) chr, ecma_escape_set)) + { + *output_char_p = (lit_utf8_byte_t) chr; + output_char_p++; + } + else + { + ecma_builtin_global_object_byte_to_hex (output_char_p, (lit_utf8_byte_t) chr); + output_char_p += URI_ENCODED_BYTE_SIZE; + } + } + else if (chr > ECMA_ESCAPE_MAXIMUM_BYTE_VALUE) + { + /* + * Although ecma_builtin_global_object_byte_to_hex inserts a percent (%) sign + * the follow-up changes overwrites it. We call this function twice to + * produce four hexadecimal characters (%uxxxx format). + */ + ecma_builtin_global_object_byte_to_hex (output_char_p + 3, (lit_utf8_byte_t) (chr & 0xff)); + ecma_builtin_global_object_byte_to_hex (output_char_p + 1, (lit_utf8_byte_t) (chr >> JERRY_BITSINBYTE)); + output_char_p[0] = LIT_CHAR_PERCENT; + output_char_p[1] = LIT_CHAR_LOWERCASE_U; + output_char_p += ECMA_ESCAPE_ENCODED_UNICODE_CHARACTER_SIZE; + } + else + { + ecma_builtin_global_object_byte_to_hex (output_char_p, (lit_utf8_byte_t) chr); + output_char_p += URI_ENCODED_BYTE_SIZE; + } + } + + JERRY_ASSERT (output_start_p + output_length == output_char_p); + + ecma_string_t *output_string_p = ecma_new_ecma_string_from_utf8 (output_start_p, output_length); + + ret_value = ecma_make_normal_completion_value (ecma_make_string_value (output_string_p)); + + MEM_FINALIZE_LOCAL_ARRAY (output_start_p); + + MEM_FINALIZE_LOCAL_ARRAY (input_start_p); + + ECMA_FINALIZE (string); + return ret_value; +} /* ecma_builtin_global_object_escape */ + +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */ + /** * @} * @} diff --git a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h index 3b4142741..0ef05173b 100644 --- a/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h +++ b/jerry-core/ecma/builtin-objects/ecma-builtin-global.inc.h @@ -235,6 +235,10 @@ ROUTINE (LIT_MAGIC_STRING_ENCODE_URI, ecma_builtin_global_object_encode_uri, 1, ROUTINE (LIT_MAGIC_STRING_ENCODE_URI_COMPONENT, ecma_builtin_global_object_encode_uri_component, 1, 1) ROUTINE (LIT_MAGIC_STRING_PARSE_INT, ecma_builtin_global_object_parse_int, 2, 2) +#ifndef CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN +ROUTINE (LIT_MAGIC_STRING_ESCAPE, ecma_builtin_global_object_escape, 1, 1) +#endif /* CONFIG_ECMA_COMPACT_PROFILE_DISABLE_ANNEXB_BUILTIN */ + #undef OBJECT_ID #undef SIMPLE_VALUE #undef NUMBER_VALUE diff --git a/jerry-core/lit/lit-magic-strings.inc.h b/jerry-core/lit/lit-magic-strings.inc.h index 7208a6154..de484997e 100644 --- a/jerry-core/lit/lit-magic-strings.inc.h +++ b/jerry-core/lit/lit-magic-strings.inc.h @@ -72,6 +72,7 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI, "decodeURI") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_DECODE_URI_COMPONENT, "decodeURIComponent") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI, "encodeURI") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ENCODE_URI_COMPONENT, "encodeURIComponent") +LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ESCAPE, "escape") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_PROTOTYPE_OF_UL, "getPrototypeOf") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_DESCRIPTOR_UL, "getOwnPropertyDescriptor") LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_OWN_PROPERTY_NAMES_UL, "getOwnPropertyNames") diff --git a/tests/jerry/global-escaping.js b/tests/jerry/global-escaping.js new file mode 100644 index 000000000..8f80f4072 --- /dev/null +++ b/tests/jerry/global-escaping.js @@ -0,0 +1,31 @@ +// Copyright 2015 University of Szeged +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Escaping + +assert (escape ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") === + "%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"); +assert (escape ("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f") === + "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"); +assert (escape (" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN") === + "%20%21%22%23%24%25%26%27%28%29*+%2C-./0123456789%3A%3B%3C%3D%3E%3F@ABCDEFGHIJKLMN"); +assert (escape ("OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}\x7F") === + "OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7F"); + +assert (escape("\x80\x95\xaf\xfe\xff") === "%80%95%AF%FE%FF"); +assert (escape("\u0100\ud800\udc00") === "%u0100%uD800%uDC00"); + +assert (escape({}) === "%5Bobject%20Object%5D"); +assert (escape(true) === "true")