mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
Fix IgnoreCase in RegExp engine
JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
This commit is contained in:
parent
f48ed52209
commit
9ab0f23e48
@ -54,13 +54,6 @@
|
|||||||
#define RE_GLOBAL_START_IDX 0
|
#define RE_GLOBAL_START_IDX 0
|
||||||
#define RE_GLOBAL_END_IDX 1
|
#define RE_GLOBAL_END_IDX 1
|
||||||
|
|
||||||
/**
|
|
||||||
* RegExp flags
|
|
||||||
*/
|
|
||||||
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
|
|
||||||
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
|
|
||||||
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse RegExp flags (global, ignoreCase, multiline)
|
* Parse RegExp flags (global, ignoreCase, multiline)
|
||||||
*
|
*
|
||||||
@ -229,6 +222,53 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */
|
|||||||
return ret_value;
|
return ret_value;
|
||||||
} /* ecma_op_create_regexp_object */
|
} /* ecma_op_create_regexp_object */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RegExp Canonicalize abstract operation
|
||||||
|
*
|
||||||
|
* See also: ECMA-262 v5, 15.10.2.8
|
||||||
|
*
|
||||||
|
* @return ecma_char_t canonicalized character
|
||||||
|
*/
|
||||||
|
ecma_char_t __attr_always_inline___
|
||||||
|
re_canonicalize (ecma_char_t ch, /**< character */
|
||||||
|
bool is_ignorecase) /**< IgnoreCase flag */
|
||||||
|
{
|
||||||
|
ecma_char_t ret_value = ch;
|
||||||
|
|
||||||
|
if (is_ignorecase)
|
||||||
|
{
|
||||||
|
if (ch < 128)
|
||||||
|
{
|
||||||
|
/* ASCII fast path. */
|
||||||
|
if (ch >= LIT_CHAR_LOWERCASE_A && ch <= LIT_CHAR_LOWERCASE_Z)
|
||||||
|
{
|
||||||
|
ret_value = (ecma_char_t) (ch - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* 2. */
|
||||||
|
ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
||||||
|
lit_utf8_size_t size = lit_char_to_upper_case (ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
||||||
|
|
||||||
|
/* 3. */
|
||||||
|
if (size == 1)
|
||||||
|
{
|
||||||
|
/* 4. */
|
||||||
|
ecma_char_t cu = u[0];
|
||||||
|
/* 5. */
|
||||||
|
if (cu >= 128)
|
||||||
|
{
|
||||||
|
/* 6. */
|
||||||
|
ret_value = cu;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret_value;
|
||||||
|
} /* re_canonicalize */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursive function for RegExp matching. Tests for a regular expression
|
* Recursive function for RegExp matching. Tests for a regular expression
|
||||||
* match and returns a MatchResult value.
|
* match and returns a MatchResult value.
|
||||||
@ -282,43 +322,12 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
|||||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||||
}
|
}
|
||||||
|
|
||||||
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p);
|
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||||
ecma_char_t ch2 = lit_utf8_iterator_read_next (&iter);
|
ecma_char_t ch1 = (ecma_char_t) re_get_value (&bc_p); /* Already canonicalized. */
|
||||||
|
ecma_char_t ch2 = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
|
||||||
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
|
JERRY_DDLOG ("Character matching %d to %d: ", ch1, ch2);
|
||||||
|
|
||||||
if (re_ctx_p->flags & RE_FLAG_IGNORE_CASE)
|
if (ch1 != ch2)
|
||||||
{
|
|
||||||
ecma_char_t ch1_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
|
||||||
ecma_char_t ch2_buffer[LIT_MAXIMUM_OTHER_CASE_LENGTH];
|
|
||||||
lit_utf8_size_t ch1_length = lit_char_to_lower_case (ch1,
|
|
||||||
ch1_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
lit_utf8_size_t ch2_length = lit_char_to_lower_case (ch2,
|
|
||||||
ch2_buffer,
|
|
||||||
LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
JERRY_ASSERT (ch1_length >= 1 && ch1_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
JERRY_ASSERT (ch2_length >= 1 && ch2_length <= LIT_MAXIMUM_OTHER_CASE_LENGTH);
|
|
||||||
|
|
||||||
if (ch1_length != ch2_length)
|
|
||||||
{
|
|
||||||
JERRY_DDLOG ("fail\n");
|
|
||||||
re_ctx_p->recursion_depth--;
|
|
||||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
|
||||||
}
|
|
||||||
|
|
||||||
for (lit_utf8_size_t i = 0; i < ch1_length; i++)
|
|
||||||
{
|
|
||||||
if (ch1_buffer[i] != ch2_buffer[i])
|
|
||||||
{
|
|
||||||
JERRY_DDLOG ("fail\n");
|
|
||||||
re_ctx_p->recursion_depth--;
|
|
||||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (ch1 != ch2)
|
|
||||||
{
|
{
|
||||||
JERRY_DDLOG ("fail\n");
|
JERRY_DDLOG ("fail\n");
|
||||||
re_ctx_p->recursion_depth--;
|
re_ctx_p->recursion_depth--;
|
||||||
@ -520,7 +529,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
|||||||
case RE_OP_CHAR_CLASS:
|
case RE_OP_CHAR_CLASS:
|
||||||
case RE_OP_INV_CHAR_CLASS:
|
case RE_OP_INV_CHAR_CLASS:
|
||||||
{
|
{
|
||||||
uint32_t curr_ch, num_of_ranges;
|
uint32_t num_of_ranges;
|
||||||
bool is_match;
|
bool is_match;
|
||||||
|
|
||||||
JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
|
JERRY_DDLOG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
|
||||||
@ -531,16 +540,16 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
|
|||||||
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
return ecma_make_simple_completion_value (ECMA_SIMPLE_VALUE_FALSE); /* fail */
|
||||||
}
|
}
|
||||||
|
|
||||||
curr_ch = lit_utf8_iterator_read_next (&iter);
|
bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
|
||||||
|
ecma_char_t curr_ch = re_canonicalize (lit_utf8_iterator_read_next (&iter), is_ignorecase);
|
||||||
|
|
||||||
num_of_ranges = re_get_value (&bc_p);
|
num_of_ranges = re_get_value (&bc_p);
|
||||||
is_match = false;
|
is_match = false;
|
||||||
|
|
||||||
while (num_of_ranges)
|
while (num_of_ranges)
|
||||||
{
|
{
|
||||||
uint32_t ch1, ch2;
|
ecma_char_t ch1 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||||
ch1 = (uint32_t) re_get_value (&bc_p);
|
ecma_char_t ch2 = re_canonicalize ((ecma_char_t) re_get_value (&bc_p), is_ignorecase);
|
||||||
ch2 = (uint32_t) re_get_value (&bc_p);
|
|
||||||
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
|
JERRY_DDLOG ("num_of_ranges=%d, ch1=%d, ch2=%d, curr_ch=%d; ",
|
||||||
num_of_ranges, ch1, ch2, curr_ch);
|
num_of_ranges, ch1, ch2, curr_ch);
|
||||||
|
|
||||||
|
|||||||
@ -39,6 +39,13 @@
|
|||||||
*/
|
*/
|
||||||
#define RE_EXECUTE_MATCH_LIMIT 10000
|
#define RE_EXECUTE_MATCH_LIMIT 10000
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RegExp flags
|
||||||
|
*/
|
||||||
|
#define RE_FLAG_GLOBAL (1 << 0) /* ECMA-262 v5, 15.10.7.2 */
|
||||||
|
#define RE_FLAG_IGNORE_CASE (1 << 1) /* ECMA-262 v5, 15.10.7.3 */
|
||||||
|
#define RE_FLAG_MULTILINE (1 << 2) /* ECMA-262 v5, 15.10.7.4 */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* RegExp executor context
|
* RegExp executor context
|
||||||
*/
|
*/
|
||||||
@ -61,6 +68,10 @@ ecma_op_create_regexp_object (ecma_string_t *pattern_p, ecma_string_t *flags_str
|
|||||||
extern ecma_completion_value_t
|
extern ecma_completion_value_t
|
||||||
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
|
ecma_regexp_exec_helper (ecma_value_t, ecma_value_t, bool);
|
||||||
|
|
||||||
|
extern ecma_char_t
|
||||||
|
re_canonicalize (ecma_char_t ch,
|
||||||
|
bool is_ignorecase);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @}
|
* @}
|
||||||
* @}
|
* @}
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "ecma-exceptions.h"
|
#include "ecma-exceptions.h"
|
||||||
#include "ecma-helpers.h"
|
#include "ecma-helpers.h"
|
||||||
|
#include "ecma-regexp-object.h"
|
||||||
#include "ecma-try-catch-macro.h"
|
#include "ecma-try-catch-macro.h"
|
||||||
#include "jrt-libc-includes.h"
|
#include "jrt-libc-includes.h"
|
||||||
#include "mem-heap.h"
|
#include "mem-heap.h"
|
||||||
@ -446,7 +447,8 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
|||||||
re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);
|
re_ctx_p->current_token.value, re_ctx_p->current_token.qmin, re_ctx_p->current_token.qmax);
|
||||||
|
|
||||||
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
|
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
|
||||||
re_append_u32 (bc_ctx_p, re_ctx_p->current_token.value);
|
re_append_u32 (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
|
||||||
|
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||||
|
|
||||||
if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
|
if ((re_ctx_p->current_token.qmin != 1) || (re_ctx_p->current_token.qmax != 1))
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user