diff --git a/jerry-core/ecma/base/ecma-helpers-string.cpp b/jerry-core/ecma/base/ecma-helpers-string.cpp index 538df18a2..1e52a017d 100644 --- a/jerry-core/ecma/base/ecma-helpers-string.cpp +++ b/jerry-core/ecma/base/ecma-helpers-string.cpp @@ -373,8 +373,8 @@ ecma_init_ecma_string_from_magic_string_id (ecma_string_t *string_p, /**< descri string_p->refs = 1; string_p->is_stack_var = (is_stack_var != 0); string_p->container = ECMA_STRING_CONTAINER_MAGIC_STRING; - string_p->hash = lit_utf8_string_calc_hash_last_bytes (lit_get_magic_string_utf8 (magic_string_id), - lit_get_magic_string_size (magic_string_id)); + string_p->hash = lit_utf8_string_calc_hash (lit_get_magic_string_utf8 (magic_string_id), + lit_get_magic_string_size (magic_string_id)); string_p->u.common_field = 0; string_p->u.magic_string_id = magic_string_id; @@ -397,8 +397,8 @@ ecma_init_ecma_string_from_magic_string_ex_id (ecma_string_t *string_p, /**< des string_p->refs = 1; string_p->is_stack_var = (is_stack_var != 0); string_p->container = ECMA_STRING_CONTAINER_MAGIC_STRING_EX; - string_p->hash = lit_utf8_string_calc_hash_last_bytes (lit_get_magic_string_ex_utf8 (magic_string_ex_id), - lit_get_magic_string_ex_size (magic_string_ex_id)); + string_p->hash = lit_utf8_string_calc_hash (lit_get_magic_string_ex_utf8 (magic_string_ex_id), + lit_get_magic_string_ex_size (magic_string_ex_id)); string_p->u.common_field = 0; string_p->u.magic_string_ex_id = magic_string_ex_id; @@ -434,7 +434,7 @@ ecma_new_ecma_string_from_utf8 (const lit_utf8_byte_t *string_p, /**< utf-8 stri string_desc_p->refs = 1; string_desc_p->is_stack_var = false; string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_CHUNKS; - string_desc_p->hash = lit_utf8_string_calc_hash_last_bytes (string_p, string_size); + string_desc_p->hash = lit_utf8_string_calc_hash (string_p, string_size); string_desc_p->u.common_field = 0; ecma_collection_header_t *collection_p = ecma_new_chars_collection (string_p, string_size); @@ -470,33 +470,12 @@ ecma_new_ecma_string_from_uint32 (uint32_t uint32_number) /**< UInt32-represente string_desc_p->is_stack_var = false; string_desc_p->container = ECMA_STRING_CONTAINER_UINT32_IN_DESC; - uint32_t last_two_digits = uint32_number % 100; - uint32_t digit_pl = last_two_digits / 10; - uint32_t digit_l = last_two_digits % 10; - - FIXME (/* Use digit to char conversion routine */); - const lit_utf8_byte_t digits[10] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; - const bool is_one_char_or_more = (uint32_number >= 10); - const lit_utf8_byte_t last_chars[LIT_STRING_HASH_LAST_BYTES_COUNT] = - { - is_one_char_or_more ? digits[digit_pl] : digits[digit_l], - is_one_char_or_more ? digits[digit_l] : (lit_utf8_byte_t) '\0' - }; - - /* Only last two chars are really used for hash calculation */ - string_desc_p->hash = lit_utf8_string_calc_hash_last_bytes (last_chars, - is_one_char_or_more ? 2 : 1); - -#ifndef JERRY_NDEBUG lit_utf8_byte_t byte_buf[ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32]; ssize_t bytes_copied = ecma_uint32_to_utf8_string (uint32_number, byte_buf, ECMA_MAX_CHARS_IN_STRINGIFIED_UINT32); - JERRY_ASSERT ((ssize_t) ((lit_utf8_size_t) bytes_copied) == bytes_copied); - JERRY_ASSERT (string_desc_p->hash == lit_utf8_string_calc_hash_last_bytes (byte_buf, - (lit_utf8_size_t) bytes_copied)); -#endif /* !JERRY_NDEBUG */ + string_desc_p->hash = lit_utf8_string_calc_hash (byte_buf, (lit_utf8_size_t) bytes_copied); string_desc_p->u.common_field = 0; string_desc_p->u.uint32_number = uint32_number; @@ -537,7 +516,7 @@ ecma_new_ecma_string_from_number (ecma_number_t num) /**< ecma-number */ string_desc_p->refs = 1; string_desc_p->is_stack_var = false; string_desc_p->container = ECMA_STRING_CONTAINER_HEAP_NUMBER; - string_desc_p->hash = lit_utf8_string_calc_hash_last_bytes (str_buf, str_size); + string_desc_p->hash = lit_utf8_string_calc_hash (str_buf, str_size); string_desc_p->u.common_field = 0; ecma_number_t *num_p = ecma_alloc_number (); @@ -617,7 +596,6 @@ ecma_new_ecma_string_from_magic_string_ex_id (lit_magic_string_ex_id_t id) /**< return string_desc_p; } /* ecma_new_ecma_string_from_magic_string_ex_id */ - /** * Concatenate ecma-strings * @@ -659,56 +637,30 @@ ecma_concat_ecma_strings (ecma_string_t *string1_p, /**< first ecma-string */ string1_p = ecma_copy_or_ref_ecma_string (string1_p); string2_p = ecma_copy_or_ref_ecma_string (string2_p); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.concatenation.string1_cp, string1_p); + ECMA_SET_NON_NULL_POINTER (string_desc_p->u.concatenation.string2_cp, string2_p); + ecma_char_t str1_last_code_unit = ecma_string_get_char_at_pos (string1_p, ecma_string_get_length (string1_p) - 1); ecma_char_t str2_first_code_unit = ecma_string_get_char_at_pos (string2_p, 0); string_desc_p->u.concatenation.is_surrogate_pair_sliced = (lit_is_code_unit_high_surrogate (str1_last_code_unit) && lit_is_code_unit_low_surrogate (str2_first_code_unit)); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.concatenation.string1_cp, string1_p); - ECMA_SET_NON_NULL_POINTER (string_desc_p->u.concatenation.string2_cp, string2_p); - - JERRY_STATIC_ASSERT (LIT_STRING_HASH_LAST_BYTES_COUNT == 2); - - if (str2_size >= LIT_STRING_HASH_LAST_BYTES_COUNT) + if (!string_desc_p->u.concatenation.is_surrogate_pair_sliced) { - if (str2_size >= LIT_UTF8_MAX_BYTES_IN_CODE_UNIT + LIT_STRING_HASH_LAST_BYTES_COUNT - || !string_desc_p->u.concatenation.is_surrogate_pair_sliced) - { - string_desc_p->hash = string2_p->hash; - } - else - { - const lit_utf8_size_t bytes_buf_size = str2_size + 1; - lit_utf8_byte_t bytes_buf[LIT_UTF8_MAX_BYTES_IN_CODE_POINT + 1]; - - lit_code_point_t code_point = lit_convert_surrogate_pair_to_code_point (str1_last_code_unit, - str2_first_code_unit); - lit_utf8_size_t idx = lit_code_point_to_utf8 (code_point, bytes_buf); - JERRY_ASSERT (idx = LIT_UTF8_MAX_BYTES_IN_CODE_POINT); - - if (str2_size > LIT_UTF8_MAX_BYTES_IN_CODE_UNIT) - { - bytes_buf[idx] = ecma_string_get_byte_at_pos (string2_p, LIT_UTF8_MAX_BYTES_IN_CODE_UNIT); - } - - string_desc_p->hash = lit_utf8_string_calc_hash_last_bytes (bytes_buf + bytes_buf_size - - LIT_STRING_HASH_LAST_BYTES_COUNT, - LIT_STRING_HASH_LAST_BYTES_COUNT); - } - + lit_utf8_size_t buffer_size = ecma_string_get_size (string2_p); + MEM_DEFINE_LOCAL_ARRAY (utf8_str_p, buffer_size, lit_utf8_byte_t); + ecma_string_to_utf8_string (string_desc_p, utf8_str_p, (ssize_t) buffer_size); + string_desc_p->hash = lit_utf8_string_hash_combine (string1_p->hash, utf8_str_p, buffer_size); + MEM_FINALIZE_LOCAL_ARRAY (utf8_str_p); } else { - JERRY_ASSERT (str2_size == 1); - - lit_utf8_byte_t bytes_buf[LIT_STRING_HASH_LAST_BYTES_COUNT] = - { - ecma_string_get_byte_at_pos (string1_p, str1_size - 1u), - ecma_string_get_byte_at_pos (string2_p, 0) - }; - - string_desc_p->hash = lit_utf8_string_calc_hash_last_bytes (bytes_buf, LIT_STRING_HASH_LAST_BYTES_COUNT); + lit_utf8_size_t buffer_size = ecma_string_get_size (string_desc_p); + MEM_DEFINE_LOCAL_ARRAY (utf8_str_p, buffer_size, lit_utf8_byte_t); + ecma_string_to_utf8_string (string_desc_p, utf8_str_p, (ssize_t) buffer_size); + string_desc_p->hash = lit_utf8_string_calc_hash (utf8_str_p, buffer_size); + MEM_FINALIZE_LOCAL_ARRAY (utf8_str_p); } return string_desc_p; diff --git a/jerry-core/lit/lit-literal-storage.cpp b/jerry-core/lit/lit-literal-storage.cpp index 67262b0be..4608c9e7e 100644 --- a/jerry-core/lit/lit-literal-storage.cpp +++ b/jerry-core/lit/lit-literal-storage.cpp @@ -235,7 +235,7 @@ lit_literal_storage_t::create_charset_record (const lit_utf8_byte_t *str, /**< s ret->set_alignment_bytes_count (alignment); ret->set_charset (str, buf_size); - ret->set_hash (lit_utf8_string_calc_hash_last_bytes (str, ret->get_length ())); + ret->set_hash (lit_utf8_string_calc_hash (str, ret->get_length ())); return ret; } /* lit_literal_storage_t::create_charset_record */ @@ -478,4 +478,3 @@ template lit_charset_record_t *rcs_recordset_t::alloc_record (rcs_record_t::type_t type); template lit_number_record_t *rcs_recordset_t::alloc_record (rcs_record_t::type_t type); - diff --git a/jerry-core/lit/lit-literal.cpp b/jerry-core/lit/lit-literal.cpp index 8da138cc3..9684c850f 100644 --- a/jerry-core/lit/lit-literal.cpp +++ b/jerry-core/lit/lit-literal.cpp @@ -107,7 +107,7 @@ lit_find_literal_by_utf8_string (const lit_utf8_byte_t *str_p, /**< a string to { JERRY_ASSERT (str_p || !str_size); - lit_string_hash_t str_hash = lit_utf8_string_calc_hash_last_bytes (str_p, str_size); + lit_string_hash_t str_hash = lit_utf8_string_calc_hash (str_p, str_size); for (literal_t lit = lit_storage.get_first (); lit != NULL; lit = lit_storage.get_next (lit)) { diff --git a/jerry-core/lit/lit-strings.cpp b/jerry-core/lit/lit-strings.cpp index a8944e654..593c237a6 100644 --- a/jerry-core/lit/lit-strings.cpp +++ b/jerry-core/lit/lit-strings.cpp @@ -597,28 +597,48 @@ lit_read_code_point_from_utf8 (const lit_utf8_byte_t *buf_p, /**< buffer with ch return bytes_count; } /* lit_read_code_point_from_utf8 */ - /** - * Calculate hash from last LIT_STRING_HASH_LAST_BYTES_COUNT characters from the buffer. + * Calc hash using the specified hash_basis. + * + * NOTE: + * This is implementation of FNV-1a hash function, which is released into public domain. + * Constants used, are carefully picked primes by the authors. + * More info: http://www.isthe.com/chongo/tech/comp/fnv/ * * @return ecma-string's hash */ -lit_string_hash_t -lit_utf8_string_calc_hash_last_bytes (const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */ - lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */ +lit_string_hash_t __attr_always_inline___ +lit_utf8_string_hash_combine (lit_string_hash_t hash_basis, /**< hash to be combined with */ + const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */ + lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */ { JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0); - lit_utf8_byte_t byte1 = (utf8_buf_size > 0) ? utf8_buf_p[utf8_buf_size - 1] : (lit_utf8_byte_t) 0; - lit_utf8_byte_t byte2 = (utf8_buf_size > 1) ? utf8_buf_p[utf8_buf_size - 2] : (lit_utf8_byte_t) 0; + uint32_t hash = hash_basis; - uint32_t t1 = (uint32_t) byte1 + (uint32_t) byte2; - uint32_t t2 = t1 * 0x24418b66; - uint32_t t3 = (t2 >> 16) ^ (t2 & 0xffffu); - uint32_t t4 = (t3 >> 8) ^ (t3 & 0xffu); + for (uint32_t i = 0; i < utf8_buf_size; i++) + { + // 16777619 is 32 bit FNV_prime = 2^24 + 2^8 + 0x93 = 16777619 + hash = (hash ^ utf8_buf_p[i]) * 16777619; + } - return (lit_string_hash_t) t4; -} /* lit_utf8_string_calc_hash_last_bytes */ + return (lit_string_hash_t) hash; +} /* lit_utf8_string_hash_combine */ + +/** + * Calculate hash from the buffer. + * + * @return ecma-string's hash + */ +lit_string_hash_t __attr_always_inline___ +lit_utf8_string_calc_hash (const lit_utf8_byte_t *utf8_buf_p, /**< characters buffer */ + lit_utf8_size_t utf8_buf_size) /**< number of characters in the buffer */ +{ + JERRY_ASSERT (utf8_buf_p != NULL || utf8_buf_size == 0); + + // 32 bit offset_basis for FNV = 2166136261 + return lit_utf8_string_hash_combine ((lit_string_hash_t) 2166136261, utf8_buf_p, utf8_buf_size); +} /* lit_utf8_string_calc_hash */ /** * Return code unit at the specified position in string diff --git a/jerry-core/lit/lit-strings.h b/jerry-core/lit/lit-strings.h index cf846f563..a7eedad04 100644 --- a/jerry-core/lit/lit-strings.h +++ b/jerry-core/lit/lit-strings.h @@ -159,7 +159,8 @@ lit_utf8_size_t lit_zt_utf8_string_size (const lit_utf8_byte_t *); ecma_length_t lit_utf8_string_length (const lit_utf8_byte_t *, lit_utf8_size_t); /* hash */ -lit_string_hash_t lit_utf8_string_calc_hash_last_bytes (const lit_utf8_byte_t *, lit_utf8_size_t); +lit_string_hash_t lit_utf8_string_calc_hash (const lit_utf8_byte_t *, lit_utf8_size_t); +lit_string_hash_t lit_utf8_string_hash_combine (lit_string_hash_t, const lit_utf8_byte_t *, lit_utf8_size_t); /* code unit access */ ecma_char_t lit_utf8_string_code_unit_at (const lit_utf8_byte_t *, lit_utf8_size_t, ecma_length_t); diff --git a/tests/jerry/hash.js b/tests/jerry/hash.js new file mode 100644 index 000000000..8d54e63ba --- /dev/null +++ b/tests/jerry/hash.js @@ -0,0 +1,24 @@ +// Copyright 2015 Samsung Electronics Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +a = {}; +a['12345']=1; +a['13345']=3; +a['sss45']=4; +a['1'] = 2; + +assert (a[12345] === 1); +assert (a[1] === 2); +assert (a[13345] === 3); +assert (a['sss45'] === 4);