Add RegExp recursion depth limit (#2543)

The regexp engine does not have any recursion depth check, thus it can cause problems with various regexps. Added a new build option `--regexp-recursion-limit N` whose
default value is 0, which is for unlimited recursion depth. Also added a build-option-test.

Fixes #2448
Fixes #2190

JerryScript-DCO-1.0-Signed-off-by: Istvan Miklos imiklos2@inf.u-szeged.hu
This commit is contained in:
Istvan Miklos 2019-01-17 20:16:50 +01:00 committed by Akos Kiss
parent 162e2ddcb6
commit c23cf4176a
7 changed files with 105 additions and 0 deletions

View File

@ -39,6 +39,7 @@ set(FEATURE_SYSTEM_ALLOCATOR OFF CACHE BOOL "Enable system allocator?")
set(FEATURE_VALGRIND OFF CACHE BOOL "Enable Valgrind support?")
set(FEATURE_VM_EXEC_STOP OFF CACHE BOOL "Enable VM execution stopping?")
set(MEM_HEAP_SIZE_KB "512" CACHE STRING "Size of memory heap, in kilobytes")
set(REGEXP_RECURSION_LIMIT "0" CACHE STRING "Limit of regexp recursion depth")
# Option overrides
if(USING_MSVC)
@ -94,6 +95,7 @@ message(STATUS "FEATURE_SYSTEM_ALLOCATOR " ${FEATURE_SYSTEM_ALLOCATOR})
message(STATUS "FEATURE_VALGRIND " ${FEATURE_VALGRIND})
message(STATUS "FEATURE_VM_EXEC_STOP " ${FEATURE_VM_EXEC_STOP})
message(STATUS "MEM_HEAP_SIZE_KB " ${MEM_HEAP_SIZE_KB})
message(STATUS "REGEXP_RECURSION_LIMIT " ${REGEXP_RECURSION_LIMIT})
# Include directories
set(INCLUDE_CORE_PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
@ -228,6 +230,11 @@ if(FEATURE_REGEXP_STRICT_MODE)
set(DEFINES_JERRY ${DEFINES_JERRY} ENABLE_REGEXP_STRICT_MODE)
endif()
# RegExp recursion depth limit
if(REGEXP_RECURSION_LIMIT)
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_RECURSION_LIMIT=${REGEXP_RECURSION_LIMIT})
endif()
# RegExp byte-code dumps
if(FEATURE_REGEXP_DUMP)
set(DEFINES_JERRY ${DEFINES_JERRY} REGEXP_DUMP_BYTE_CODE)

View File

@ -63,6 +63,13 @@
*/
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)
/*
* Check RegExp recursion depth limit
*/
#ifdef REGEXP_RECURSION_LIMIT
JERRY_STATIC_ASSERT (REGEXP_RECURSION_LIMIT > 0, regexp_recursion_limit_must_be_greater_than_zero);
#endif /* REGEXP_RECURSION_LIMIT */
/**
* Parse RegExp flags (global, ignoreCase, multiline)
*
@ -344,6 +351,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
const lit_utf8_byte_t *str_p, /**< input string pointer */
const lit_utf8_byte_t **out_str_p) /**< [out] matching substring iterator */
{
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
const lit_utf8_byte_t *str_curr_p = str_p;
while (true)
@ -356,12 +364,14 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
*out_str_p = str_curr_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_TRUE; /* match */
}
case RE_OP_CHAR:
{
if (str_curr_p >= re_ctx_p->input_end_p)
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -373,6 +383,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ch1 != ch2)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -384,6 +395,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
{
if (str_curr_p >= re_ctx_p->input_end_p)
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -393,6 +405,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (lit_char_is_line_terminator (ch))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -412,6 +425,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -422,6 +436,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_ASSERT_END:
@ -437,6 +452,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -447,6 +463,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_ASSERT_WORD_BOUNDARY:
@ -478,6 +495,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_wordchar_left == is_wordchar_right)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -489,6 +507,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_wordchar_left != is_wordchar_right)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -556,6 +575,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}
JMEM_FINALIZE_LOCAL_ARRAY (saved_bck_p);
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value;
}
case RE_OP_CHAR_CLASS:
@ -568,6 +588,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (str_curr_p >= re_ctx_p->input_end_p)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -598,6 +619,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (!is_match)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -607,6 +629,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (is_match)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -637,6 +660,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (str_curr_p >= re_ctx_p->input_end_p)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -646,6 +670,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ch1 != ch2)
{
JERRY_TRACE_MSG ("fail\n");
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -669,6 +694,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -683,6 +709,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
bc_p = old_bc_p;
re_ctx_p->saved_p[RE_GLOBAL_START_IDX] = old_start_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_SAVE_AND_MATCH:
@ -690,6 +717,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
JERRY_TRACE_MSG ("End of pattern is reached: match\n");
re_ctx_p->saved_p[RE_GLOBAL_END_IDX] = str_curr_p;
*out_str_p = str_curr_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_TRUE; /* match */
}
case RE_OP_ALTERNATIVE:
@ -754,6 +782,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -812,6 +841,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -836,6 +866,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -845,6 +876,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
}
re_ctx_p->saved_p[start_idx] = old_start_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
@ -890,6 +922,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -938,6 +971,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (re_ctx_p->num_of_iterations_p[iter_idx] >= min
&& str_curr_p== re_ctx_p->saved_p[start_idx])
{
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
@ -959,6 +993,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -983,6 +1018,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -1004,6 +1040,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -1015,6 +1052,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
/* restore if fails */
re_ctx_p->saved_p[end_idx] = old_end_p;
re_ctx_p->num_of_iterations_p[iter_idx]--;
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
case RE_OP_NON_GREEDY_ITERATOR:
@ -1039,6 +1077,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -1062,6 +1101,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
str_curr_p = sub_str_p;
num_of_iter++;
}
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
default:
@ -1105,6 +1145,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
if (ecma_is_value_true (match_value))
{
*out_str_p = sub_str_p;
REGEXP_RECURSION_COUNTER_INCREASE ();
return match_value; /* match */
}
else if (ECMA_IS_VALUE_ERROR (match_value))
@ -1120,6 +1161,7 @@ re_match_regexp (re_matcher_ctx_t *re_ctx_p, /**< RegExp matcher context */
lit_utf8_read_prev (&str_curr_p);
num_of_iter--;
}
REGEXP_RECURSION_COUNTER_INCREASE ();
return ECMA_VALUE_FALSE; /* fail */
}
}
@ -1208,6 +1250,7 @@ ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
re_ctx.input_start_p = input_curr_p;
const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_buffer_size;
re_ctx.input_end_p = input_end_p;
REGEXP_RECURSION_COUNTER_INIT ();
/* 1. Read bytecode header and init regexp matcher context. */
re_ctx.flags = bc_p->header.status_flags;

View File

@ -28,6 +28,46 @@
* @{
*/
#ifdef REGEXP_RECURSION_LIMIT
/**
* Decrease the recursion counter and test it.
* If the counter reaches the limit of the recursion depth
* it will return with a range error.
*/
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST() \
do \
{ \
if (--re_ctx_p->recursion_counter == 0) \
{ \
return ecma_raise_range_error (ECMA_ERR_MSG ("RegExp recursion limit is exceeded.")); \
} \
} \
while (0)
/**
* Increase the recursion counter.
*/
#define REGEXP_RECURSION_COUNTER_INCREASE() (++re_ctx_p->recursion_counter)
/**
* Set the recursion counter to the max depth of the recursion.
*/
#define REGEXP_RECURSION_COUNTER_INIT() (re_ctx.recursion_counter = REGEXP_RECURSION_LIMIT)
#else /* !REGEXP_RECURSION_LIMIT */
/**
* Decrease the recursion counter and test it.
* If the counter reaches the limit of the recursion depth
* it will return with a range error.
*/
#define REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST()
/**
* Increase the recursion counter.
*/
#define REGEXP_RECURSION_COUNTER_INCREASE()
/**
* Set the recursion counter to the max depth of the recursion.
*/
#define REGEXP_RECURSION_COUNTER_INIT()
#endif /* REGEXP_RECURSION_LIMIT */
/**
* RegExp flags
* Note:
@ -48,6 +88,9 @@ typedef struct
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
#ifdef REGEXP_RECURSION_LIMIT
uint32_t recursion_counter; /**< RegExp recursion counter */
#endif /* REGEXP_RECURSION_LIMIT */
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t *num_of_iterations_p; /**< number of iterations */

View File

@ -246,6 +246,7 @@ static ecma_value_t
re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
bool expect_eof) /**< expect end of file */
{
REGEXP_RECURSION_COUNTER_DECREASE_AND_TEST ();
uint32_t idx;
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
@ -440,6 +441,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
else
{
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
REGEXP_RECURSION_COUNTER_INCREASE ();
should_loop = false;
}
break;
@ -453,6 +455,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
else
{
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
REGEXP_RECURSION_COUNTER_INCREASE ();
should_loop = false;
}
@ -559,6 +562,7 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
re_ctx.flags = flags;
re_ctx.highest_backref = 0;
re_ctx.num_of_non_captures = 0;
REGEXP_RECURSION_COUNTER_INIT ();
re_bytecode_ctx_t bc_ctx;
bc_ctx.block_start_p = NULL;

View File

@ -41,6 +41,9 @@ typedef struct
uint32_t num_of_captures; /**< number of capture groups */
uint32_t num_of_non_captures; /**< number of non-capture groups */
uint32_t highest_backref; /**< highest backreference */
#ifdef REGEXP_RECURSION_LIMIT
uint32_t recursion_counter; /**< RegExp recursion counter */
#endif /* REGEXP_RECURSION_LIMIT */
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
re_token_t current_token; /**< current token */
re_parser_ctx_t *parser_ctx_p; /**< pointer of RegExp parser context */

View File

@ -126,6 +126,8 @@ def get_arguments():
help='specify profile file')
coregrp.add_argument('--regexp-strict-mode', metavar='X', choices=['ON', 'OFF'], type=str.upper,
help=devhelp('enable regexp strict mode (%(choices)s)'))
coregrp.add_argument('--regexp-recursion-limit', metavar='N', type=int,
help='regexp recursion depth limit')
coregrp.add_argument('--show-opcodes', metavar='X', choices=['ON', 'OFF'], type=str.upper,
help=devhelp('enable parser byte-code dumps (%(choices)s)'))
coregrp.add_argument('--show-regexp-opcodes', metavar='X', choices=['ON', 'OFF'], type=str.upper,
@ -194,6 +196,7 @@ def generate_build_options(arguments):
build_options_append('FEATURE_MEM_STRESS_TEST', arguments.mem_stress_test)
build_options_append('FEATURE_PROFILE', arguments.profile)
build_options_append('FEATURE_REGEXP_STRICT_MODE', arguments.regexp_strict_mode)
build_options_append('REGEXP_RECURSION_LIMIT', arguments.regexp_recursion_limit)
build_options_append('FEATURE_PARSER_DUMP', arguments.show_opcodes)
build_options_append('FEATURE_REGEXP_DUMP', arguments.show_regexp_opcodes)
build_options_append('FEATURE_SNAPSHOT_EXEC', arguments.snapshot_exec)

View File

@ -154,6 +154,8 @@ JERRY_BUILDOPTIONS = [
['--jerry-cmdline-test=on']),
Options('buildoption_test-cmdline_snapshot',
['--jerry-cmdline-snapshot=on']),
Options('buildoption_test-regexp_recursion_limit',
['--regexp-recursion-limit=1000']),
]
def get_arguments():