Fix IgnoreCase in RegExp engine

JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
This commit is contained in:
Szilard Ledan 2015-07-29 16:55:51 +02:00 committed by Dániel Bátyai
parent f48ed52209
commit 9ab0f23e48
3 changed files with 70 additions and 48 deletions

View File

@ -54,13 +54,6 @@
#define RE_GLOBAL_START_IDX 0
#define RE_GLOBAL_END_IDX 1
/**
* RegExp flags
*/
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
/**
* Parse RegExp flags (global, ignoreCase, multiline)
*
@ -229,6 +222,53 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */
return ret_value;
} /* ecma_op_create_regexp_object */
/**
* RegExp Canonicalize abstract operation
*
* See also: ECMA-262 v5, 15.10.2.8
*
* @return ecma_char_t canonicalized character
*/
ecma_char_t __attr_always_inline___
re_canonicalize (ecma_char_t ch, /**< character */
bool is_ignorecase) /**< IgnoreCase flag */
{
ecma_char_t ret_value = ch;
if (is_ignorecase)
{
if (ch < 128)
{
/* ASCII fast path. */
if (ch >= LIT_CHAR_LOWERCASE_A && ch <= LIT_CHAR_LOWERCASE_Z)
{
ret_value = (ecma_char_t) (ch - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
}
}
else
{
/* 2. */
ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
lit_utf8_size_t size = lit_char_to_upper_case (ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
/* 3. */
if (size == 1)
{
/* 4. */
ecma_char_t cu = u[0];
/* 5. */
if (cu >= 128)
{
/* 6. */
ret_value = cu;
}
}
}
}
return ret_value;
} /* re_canonicalize */
/**
* Recursive function for RegExp matching. Tests for a regular expression
* match and returns a MatchResult value.
@ -282,43 +322,12 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
}
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p);
ecma_char_t ch2 = lit_utf8_iterator_read_next (&iter);
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
{
ecma_char_t ch1_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
ecma_char_t ch2_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
lit_utf8_size_t ch1_length = lit_char_to_lower_case (ch1,
ch1_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
lit_utf8_size_t ch2_length = lit_char_to_lower_case (ch2,
ch2_buffer,
LIT_MAXIMUM_OTHER_CASE_LENGTH);
JERRY_ASSERT (ch1_length >= 1 && ch1_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
JERRY_ASSERT (ch2_length >= 1 && ch2_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
if (ch1_length != ch2_length)
{
JERRY_DDLOG ("fail\n");
re_ctx_p->recursion_depth--;
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
}
for (lit_utf8_size_t i = 0; i < ch1_length; i++)
{
if (ch1_buffer[i] != ch2_buffer[i])
{
JERRY_DDLOG ("fail\n");
re_ctx_p->recursion_depth--;
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
}
}
}
else if (ch1 != ch2)
if (ch1 != ch2)
{
JERRY_DDLOG ("fail\n");
re_ctx_p->recursion_depth--;
@ -520,7 +529,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
case RE_OP_CHAR_CLASS:
case RE_OP_INV_CHAR_CLASS:
{
uint32_t curr_ch, num_of_ranges;
uint32_t num_of_ranges;
bool is_match;
JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
@ -531,16 +540,16 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
}
curr_ch = lit_utf8_iterator_read_next (&iter);
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
num_of_ranges = re_get_value (&bc_p);
is_match = false;
while (num_of_ranges)
{
uint32_t ch1, ch2;
ch1 = (uint32_t) re_get_value (&bc_p);
ch2 = (uint32_t) re_get_value (&bc_p);
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
num_of_ranges, ch1, ch2, curr_ch);

View File

@ -39,6 +39,13 @@
*/
#define RE_EXECUTE_MATCH_LIMIT 10000
/**
* RegExp flags
*/
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
/**
* RegExp executor context
*/
@ -61,6 +68,10 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str
extern ecma_completion_value_t
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
extern ecma_char_t
re_canonicalize (ecma_char_t ch,
bool is_ignorecase);
/**
* @}
* @}

View File

@ -16,6 +16,7 @@
#include "ecma-exceptions.h"
#include "ecma-helpers.h"
#include "ecma-regexp-object.h"
#include "ecma-try-catch-macro.h"
#include "jrt-libc-includes.h"
#include "mem-heap.h"
@ -446,7 +447,8 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
re_append_u32 (bc_ctx_p, re_ctx_p->current_token.value);
re_append_u32 (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
{