mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Fix IgnoreCase in RegExp engine
JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
This commit is contained in:
parent
f48ed52209
commit
9ab0f23e48
@ -54,13 +54,6 @@
|
||||
#define RE_GLOBAL_START_IDX 0
|
||||
#define RE_GLOBAL_END_IDX 1
|
||||
|
||||
/**
|
||||
* RegExp flags
|
||||
*/
|
||||
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
|
||||
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
|
||||
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
|
||||
|
||||
/**
|
||||
* Parse RegExp flags (global, ignoreCase, multiline)
|
||||
*
|
||||
@ -229,6 +222,53 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */
|
||||
return ret_value;
|
||||
} /* ecma_op_create_regexp_object */
|
||||
|
||||
/**
|
||||
* RegExp Canonicalize abstract operation
|
||||
*
|
||||
* See also: ECMA-262 v5, 15.10.2.8
|
||||
*
|
||||
* @return ecma_char_t canonicalized character
|
||||
*/
|
||||
ecma_char_t __attr_always_inline___
|
||||
re_canonicalize (ecma_char_t ch, /**< character */
|
||||
bool is_ignorecase) /**< IgnoreCase flag */
|
||||
{
|
||||
ecma_char_t ret_value = ch;
|
||||
|
||||
if (is_ignorecase)
|
||||
{
|
||||
if (ch < 128)
|
||||
{
|
||||
/* ASCII fast path. */
|
||||
if (ch >= LIT_CHAR_LOWERCASE_A && ch <= LIT_CHAR_LOWERCASE_Z)
|
||||
{
|
||||
ret_value = (ecma_char_t) (ch - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* 2. */
|
||||
ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||
lit_utf8_size_t size = lit_char_to_upper_case (ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
/* 3. */
|
||||
if (size == 1)
|
||||
{
|
||||
/* 4. */
|
||||
ecma_char_t cu = u[0];
|
||||
/* 5. */
|
||||
if (cu >= 128)
|
||||
{
|
||||
/* 6. */
|
||||
ret_value = cu;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
} /* re_canonicalize */
|
||||
|
||||
/**
|
||||
* Recursive function for RegExp matching. Tests for a regular expression
|
||||
* match and returns a MatchResult value.
|
||||
@ -282,43 +322,12 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||
}
|
||||
|
||||
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p);
|
||||
ecma_char_t ch2 = lit_utf8_iterator_read_next (&iter);
|
||||
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
|
||||
ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
|
||||
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
|
||||
|
||||
if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
|
||||
{
|
||||
ecma_char_t ch1_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||
ecma_char_t ch2_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||
lit_utf8_size_t ch1_length = lit_char_to_lower_case (ch1,
|
||||
ch1_buffer,
|
||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
lit_utf8_size_t ch2_length = lit_char_to_lower_case (ch2,
|
||||
ch2_buffer,
|
||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
JERRY_ASSERT (ch1_length >= 1 && ch1_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
JERRY_ASSERT (ch2_length >= 1 && ch2_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||
|
||||
if (ch1_length != ch2_length)
|
||||
{
|
||||
JERRY_DDLOG ("fail\n");
|
||||
re_ctx_p->recursion_depth--;
|
||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||
}
|
||||
|
||||
for (lit_utf8_size_t i = 0; i < ch1_length; i++)
|
||||
{
|
||||
if (ch1_buffer[i] != ch2_buffer[i])
|
||||
{
|
||||
JERRY_DDLOG ("fail\n");
|
||||
re_ctx_p->recursion_depth--;
|
||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ch1 != ch2)
|
||||
if (ch1 != ch2)
|
||||
{
|
||||
JERRY_DDLOG ("fail\n");
|
||||
re_ctx_p->recursion_depth--;
|
||||
@ -520,7 +529,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
case RE_OP_CHAR_CLASS:
|
||||
case RE_OP_INV_CHAR_CLASS:
|
||||
{
|
||||
uint32_t curr_ch, num_of_ranges;
|
||||
uint32_t num_of_ranges;
|
||||
bool is_match;
|
||||
|
||||
JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
|
||||
@ -531,16 +540,16 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||
}
|
||||
|
||||
curr_ch = lit_utf8_iterator_read_next (&iter);
|
||||
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||
ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
|
||||
|
||||
num_of_ranges = re_get_value (&bc_p);
|
||||
is_match = false;
|
||||
|
||||
while (num_of_ranges)
|
||||
{
|
||||
uint32_t ch1, ch2;
|
||||
ch1 = (uint32_t) re_get_value (&bc_p);
|
||||
ch2 = (uint32_t) re_get_value (&bc_p);
|
||||
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
|
||||
num_of_ranges, ch1, ch2, curr_ch);
|
||||
|
||||
|
||||
@ -39,6 +39,13 @@
|
||||
*/
|
||||
#define RE_EXECUTE_MATCH_LIMIT 10000
|
||||
|
||||
/**
|
||||
* RegExp flags
|
||||
*/
|
||||
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
|
||||
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
|
||||
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
|
||||
|
||||
/**
|
||||
* RegExp executor context
|
||||
*/
|
||||
@ -61,6 +68,10 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str
|
||||
extern ecma_completion_value_t
|
||||
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
|
||||
|
||||
extern ecma_char_t
|
||||
re_canonicalize (ecma_char_t ch,
|
||||
bool is_ignorecase);
|
||||
|
||||
/**
|
||||
* @}
|
||||
* @}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include "ecma-exceptions.h"
|
||||
#include "ecma-helpers.h"
|
||||
#include "ecma-regexp-object.h"
|
||||
#include "ecma-try-catch-macro.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "mem-heap.h"
|
||||
@ -446,7 +447,8 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);
|
||||
|
||||
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
|
||||
re_append_u32 (bc_ctx_p, re_ctx_p->current_token.value);
|
||||
re_append_u32 (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
|
||||
if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
|
||||
{
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user