mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2026-02-07 16:11:39 +00:00
Refactor RegExp builtin (#3136)
JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
This commit is contained in:
parent
c3510fc03d
commit
c3bb516e4a
@ -179,7 +179,7 @@ snapshot_add_compiled_code (ecma_compiled_code_t *compiled_code_p, /**< compiled
|
||||
|
||||
globals_p->snapshot_buffer_write_offset += sizeof (ecma_compiled_code_t);
|
||||
|
||||
ecma_value_t pattern = ((re_compiled_code_t *) compiled_code_p)->pattern;
|
||||
ecma_value_t pattern = ((re_compiled_code_t *) compiled_code_p)->source;
|
||||
ecma_string_t *pattern_string_p = ecma_get_string_from_value (pattern);
|
||||
|
||||
ecma_length_t pattern_size = 0;
|
||||
|
||||
@ -1403,7 +1403,7 @@ ecma_bytecode_deref (ecma_compiled_code_t *bytecode_p) /**< byte code pointer */
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
re_compiled_code_t *re_bytecode_p = (re_compiled_code_t *) bytecode_p;
|
||||
|
||||
ecma_deref_ecma_string (ecma_get_string_from_value (re_bytecode_p->pattern));
|
||||
ecma_deref_ecma_string (ecma_get_string_from_value (re_bytecode_p->source));
|
||||
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|
||||
}
|
||||
|
||||
|
||||
@ -45,6 +45,185 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
/**
|
||||
* Helper function to retrieve the flags associated with a RegExp object
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* ECMA_VALUE_EMPTY - otherwise
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_flags_helper (ecma_value_t this, /**< this value */
|
||||
uint16_t *flags_p) /**< [out] flags */
|
||||
{
|
||||
if (!ecma_is_value_object (this)
|
||||
|| !ecma_object_class_is (ecma_get_object_from_value (this), LIT_MAGIC_STRING_REGEXP_UL))
|
||||
{
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type"));
|
||||
}
|
||||
|
||||
ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) ecma_get_object_from_value (this);
|
||||
re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
|
||||
re_obj_p->u.class_prop.u.value);
|
||||
|
||||
if (bc_p != NULL)
|
||||
{
|
||||
*flags_p = bc_p->header.status_flags;
|
||||
}
|
||||
|
||||
return ECMA_VALUE_EMPTY;
|
||||
} /* ecma_builtin_regexp_prototype_flags_helper */
|
||||
|
||||
/**
|
||||
* The RegExp.prototype object's 'flags' accessor property
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v6, 21.2.5.3
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* string value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_get_flags (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
uint16_t flags = RE_FLAG_EMPTY;
|
||||
ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags);
|
||||
if (ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
ecma_stringbuilder_t result = ecma_stringbuilder_create ();
|
||||
|
||||
if (flags & RE_FLAG_GLOBAL)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_G);
|
||||
}
|
||||
|
||||
if (flags & RE_FLAG_IGNORE_CASE)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_I);
|
||||
}
|
||||
|
||||
if (flags & RE_FLAG_MULTILINE)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_M);
|
||||
}
|
||||
|
||||
return ecma_make_string_value (ecma_stringbuilder_finalize (&result));
|
||||
} /* ecma_builtin_regexp_prototype_get_flags */
|
||||
|
||||
/**
|
||||
* The RegExp.prototype object's 'source' accessor property
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v6, 21.2.5.10
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* string value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_get_source (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
if (!ecma_is_value_object (this_arg)
|
||||
|| !ecma_object_class_is (ecma_get_object_from_value (this_arg), LIT_MAGIC_STRING_REGEXP_UL))
|
||||
{
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type"));
|
||||
}
|
||||
|
||||
ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) ecma_get_object_from_value (this_arg);
|
||||
re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
|
||||
re_obj_p->u.class_prop.u.value);
|
||||
|
||||
if (bc_p != NULL)
|
||||
{
|
||||
ecma_ref_ecma_string (ecma_get_string_from_value (bc_p->source));
|
||||
return bc_p->source;
|
||||
}
|
||||
|
||||
return ecma_make_string_value (ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP));
|
||||
} /* ecma_builtin_regexp_prototype_get_source */
|
||||
|
||||
/**
|
||||
* The RegExp.prototype object's 'global' accessor property
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v6, 21.2.5.4
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* ECMA_VALUE_TRUE - if 'global' flag is set
|
||||
* ECMA_VALUE_FALSE - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_get_global (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
uint16_t flags = RE_FLAG_EMPTY;
|
||||
ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags);
|
||||
if (ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
return ecma_make_boolean_value (flags & RE_FLAG_GLOBAL);
|
||||
} /* ecma_builtin_regexp_prototype_get_global */
|
||||
|
||||
/**
|
||||
* The RegExp.prototype object's 'ignoreCase' accessor property
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v6, 21.2.5.5
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* ECMA_VALUE_TRUE - if 'ignoreCase' flag is set
|
||||
* ECMA_VALUE_FALSE - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_get_ignorecase (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
uint16_t flags = RE_FLAG_EMPTY;
|
||||
ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags);
|
||||
if (ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
return ecma_make_boolean_value (flags & RE_FLAG_IGNORE_CASE);
|
||||
} /* ecma_builtin_regexp_prototype_get_ignorecase */
|
||||
|
||||
/**
|
||||
* The RegExp.prototype object's 'multiline' accessor property
|
||||
*
|
||||
* See also:
|
||||
* ECMA-262 v6, 21.2.5.7
|
||||
*
|
||||
* @return ECMA_VALUE_ERROR - if 'this' is not a RegExp object
|
||||
* ECMA_VALUE_TRUE - if 'multiline' flag is set
|
||||
* ECMA_VALUE_FALSE - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value.
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_get_multiline (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
uint16_t flags = RE_FLAG_EMPTY;
|
||||
ecma_value_t ret_value = ecma_builtin_regexp_prototype_flags_helper (this_arg, &flags);
|
||||
if (ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
return ecma_make_boolean_value (flags & RE_FLAG_MULTILINE);
|
||||
} /* ecma_builtin_regexp_prototype_get_multiline */
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
#if ENABLED (JERRY_BUILTIN_ANNEXB)
|
||||
|
||||
/**
|
||||
@ -68,89 +247,40 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
|
||||
/* The builtin RegExp.prototype object does not have [[RegExpMatcher]] internal slot */
|
||||
|| ecma_get_object_from_value (this_arg) == ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE))
|
||||
{
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Incomplete RegExp type"));
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type"));
|
||||
}
|
||||
|
||||
uint16_t flags = 0;
|
||||
|
||||
if (ecma_is_value_object (pattern_arg)
|
||||
&& ecma_object_class_is (ecma_get_object_from_value (pattern_arg), LIT_MAGIC_STRING_REGEXP_UL))
|
||||
&& ecma_object_class_is (ecma_get_object_from_value (pattern_arg), LIT_MAGIC_STRING_REGEXP_UL)
|
||||
&& ecma_get_object_from_value (pattern_arg) != ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE))
|
||||
{
|
||||
if (!ecma_is_value_undefined (flags_arg))
|
||||
{
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Invalid argument of RegExp compile."));
|
||||
}
|
||||
/* Compile from existing RegExp pbject. */
|
||||
ecma_object_t *target_p = ecma_get_object_from_value (pattern_arg);
|
||||
|
||||
/* Get source. */
|
||||
ecma_string_t *magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_SOURCE);
|
||||
ecma_value_t source_value = ecma_op_object_get_own_data_prop (target_p, magic_string_p);
|
||||
ecma_string_t *pattern_string_p = ecma_get_string_from_value (source_value);
|
||||
|
||||
/* Get flags. */
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_GLOBAL);
|
||||
ecma_value_t global_value = ecma_op_object_get_own_data_prop (target_p, magic_string_p);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (global_value));
|
||||
|
||||
if (ecma_is_value_true (global_value))
|
||||
{
|
||||
flags |= RE_FLAG_GLOBAL;
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Invalid argument"));
|
||||
}
|
||||
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_IGNORECASE_UL);
|
||||
ecma_value_t ignore_case_value = ecma_op_object_get_own_data_prop (target_p, magic_string_p);
|
||||
/* Compile from existing RegExp object. */
|
||||
ecma_extended_object_t *target_p = (ecma_extended_object_t *) ecma_get_object_from_value (pattern_arg);
|
||||
re_compiled_code_t *target_bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
|
||||
target_p->u.class_prop.u.value);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (ignore_case_value));
|
||||
ecma_object_t *this_object_p = ecma_get_object_from_value (this_arg);
|
||||
ecma_extended_object_t *current_p = (ecma_extended_object_t *) this_object_p;
|
||||
|
||||
if (ecma_is_value_true (ignore_case_value))
|
||||
{
|
||||
flags |= RE_FLAG_IGNORE_CASE;
|
||||
}
|
||||
re_compiled_code_t *current_bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
|
||||
current_p->u.class_prop.u.value);
|
||||
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_MULTILINE);
|
||||
ecma_value_t multiline_value = ecma_op_object_get_own_data_prop (target_p, magic_string_p);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (multiline_value));
|
||||
|
||||
if (ecma_is_value_true (multiline_value))
|
||||
{
|
||||
flags |= RE_FLAG_MULTILINE;
|
||||
}
|
||||
|
||||
ecma_value_t obj_this = ecma_op_to_object (this_arg);
|
||||
if (ECMA_IS_VALUE_ERROR (obj_this))
|
||||
{
|
||||
return obj_this;
|
||||
}
|
||||
ecma_object_t *this_obj_p = ecma_get_object_from_value (obj_this);
|
||||
|
||||
/* Get bytecode property. */
|
||||
ecma_value_t *bc_prop_p = &(((ecma_extended_object_t *) this_obj_p)->u.class_prop.u.value);
|
||||
|
||||
/* TODO: We currently have to re-compile the bytecode, because
|
||||
* we can't copy it without knowing its length. */
|
||||
const re_compiled_code_t *new_bc_p = NULL;
|
||||
ecma_value_t bc_comp = re_compile_bytecode (&new_bc_p, pattern_string_p, flags);
|
||||
/* Should always succeed, since we're compiling from a source that has been compiled previously. */
|
||||
JERRY_ASSERT (ecma_is_value_empty (bc_comp));
|
||||
|
||||
ecma_deref_ecma_string (pattern_string_p);
|
||||
|
||||
re_compiled_code_t *old_bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, *bc_prop_p);
|
||||
|
||||
if (old_bc_p != NULL)
|
||||
{
|
||||
/* Free the old bytecode */
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) old_bc_p);
|
||||
}
|
||||
|
||||
ECMA_SET_INTERNAL_VALUE_POINTER (*bc_prop_p, new_bc_p);
|
||||
|
||||
re_initialize_props (this_obj_p, pattern_string_p, flags);
|
||||
ecma_free_value (obj_this);
|
||||
JERRY_ASSERT (current_bc_p != NULL);
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) current_bc_p);
|
||||
|
||||
JERRY_ASSERT (target_bc_p != NULL);
|
||||
ecma_bytecode_ref ((ecma_compiled_code_t *) target_bc_p);
|
||||
ECMA_SET_INTERNAL_VALUE_POINTER (current_p->u.class_prop.u.value, target_bc_p);
|
||||
ecma_regexp_initialize_props (this_object_p,
|
||||
ecma_get_string_from_value (target_bc_p->source),
|
||||
target_bc_p->header.status_flags);
|
||||
return ecma_copy_value (this_arg);
|
||||
}
|
||||
|
||||
@ -175,7 +305,7 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
|
||||
return flags_str_value;
|
||||
}
|
||||
|
||||
ecma_value_t parsed_flags_val = re_parse_regexp_flags (ecma_get_string_from_value (flags_str_value), &flags);
|
||||
ecma_value_t parsed_flags_val = ecma_regexp_parse_flags (ecma_get_string_from_value (flags_str_value), &flags);
|
||||
ecma_free_value (flags_str_value);
|
||||
if (ECMA_IS_VALUE_ERROR (parsed_flags_val))
|
||||
{
|
||||
@ -193,26 +323,16 @@ ecma_builtin_regexp_prototype_compile (ecma_value_t this_arg, /**< this argument
|
||||
return bc_val;
|
||||
}
|
||||
|
||||
ecma_value_t obj_this = ecma_op_to_object (this_arg);
|
||||
if (ECMA_IS_VALUE_ERROR (obj_this))
|
||||
{
|
||||
ecma_deref_ecma_string (pattern_string_p);
|
||||
return obj_this;
|
||||
}
|
||||
ecma_object_t *this_obj_p = ecma_get_object_from_value (obj_this);
|
||||
ecma_object_t *this_obj_p = ecma_get_object_from_value (this_arg);
|
||||
ecma_value_t *bc_prop_p = &(((ecma_extended_object_t *) this_obj_p)->u.class_prop.u.value);
|
||||
|
||||
re_compiled_code_t *old_bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t, *bc_prop_p);
|
||||
|
||||
if (old_bc_p != NULL)
|
||||
{
|
||||
/* Free the old bytecode */
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) old_bc_p);
|
||||
}
|
||||
JERRY_ASSERT (old_bc_p != NULL);
|
||||
ecma_bytecode_deref ((ecma_compiled_code_t *) old_bc_p);
|
||||
|
||||
ECMA_SET_INTERNAL_VALUE_POINTER (*bc_prop_p, new_bc_p);
|
||||
re_initialize_props (this_obj_p, pattern_string_p, flags);
|
||||
ecma_free_value (obj_this);
|
||||
ecma_regexp_initialize_props (this_obj_p, pattern_string_p, flags);
|
||||
ecma_deref_ecma_string (pattern_string_p);
|
||||
|
||||
return ecma_copy_value (this_arg);
|
||||
@ -254,26 +374,7 @@ ecma_builtin_regexp_prototype_exec (ecma_value_t this_arg, /**< this argument */
|
||||
return input_str_value;
|
||||
}
|
||||
|
||||
ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
|
||||
ecma_value_t *bytecode_prop_p = &(((ecma_extended_object_t *) obj_p)->u.class_prop.u.value);
|
||||
|
||||
void *bytecode_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (void, *bytecode_prop_p);
|
||||
|
||||
ecma_value_t ret_value;
|
||||
if (bytecode_p == NULL)
|
||||
{
|
||||
/* Missing bytecode means empty RegExp: '/(?:)/', so always return empty string. */
|
||||
ecma_value_t empty_str_val = ecma_make_magic_string_value (LIT_MAGIC_STRING__EMPTY);
|
||||
ret_value = ecma_op_create_array_object (&empty_str_val, 1, false);
|
||||
re_set_result_array_properties (ecma_get_object_from_value (ret_value),
|
||||
ecma_get_string_from_value (input_str_value),
|
||||
1,
|
||||
0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);
|
||||
}
|
||||
ecma_value_t ret_value = ecma_regexp_exec_helper (obj_this, input_str_value, false);
|
||||
|
||||
ecma_free_value (obj_this);
|
||||
ecma_free_value (input_str_value);
|
||||
@ -296,15 +397,15 @@ static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */
|
||||
ecma_value_t arg) /**< routine's argument */
|
||||
{
|
||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
||||
ecma_value_t result = ecma_builtin_regexp_prototype_exec (this_arg, arg);
|
||||
|
||||
ECMA_TRY_CATCH (match_value,
|
||||
ecma_builtin_regexp_prototype_exec (this_arg, arg),
|
||||
ret_value);
|
||||
if (ECMA_IS_VALUE_ERROR (result))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
ret_value = ecma_make_boolean_value (!ecma_is_value_null (match_value));
|
||||
|
||||
ECMA_FINALIZE (match_value);
|
||||
ecma_value_t ret_value = ecma_make_boolean_value (!ecma_is_value_null (result));
|
||||
ecma_free_value (result);
|
||||
|
||||
return ret_value;
|
||||
} /* ecma_builtin_regexp_prototype_test */
|
||||
@ -321,77 +422,53 @@ ecma_builtin_regexp_prototype_test (ecma_value_t this_arg, /**< this argument */
|
||||
static ecma_value_t
|
||||
ecma_builtin_regexp_prototype_to_string (ecma_value_t this_arg) /**< this argument */
|
||||
{
|
||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
||||
|
||||
if (!ecma_is_value_object (this_arg)
|
||||
|| !ecma_object_class_is (ecma_get_object_from_value (this_arg), LIT_MAGIC_STRING_REGEXP_UL))
|
||||
{
|
||||
ret_value = ecma_raise_type_error (ECMA_ERR_MSG ("Incomplete RegExp type"));
|
||||
return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type"));
|
||||
}
|
||||
|
||||
ecma_object_t *obj_p = ecma_get_object_from_value (this_arg);
|
||||
ecma_extended_object_t *re_obj_p = (ecma_extended_object_t *) obj_p;
|
||||
|
||||
re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
|
||||
re_obj_p->u.class_prop.u.value);
|
||||
|
||||
ecma_string_t *source_p;
|
||||
uint16_t flags;
|
||||
|
||||
if (bc_p != NULL)
|
||||
{
|
||||
source_p = ecma_get_string_from_value (bc_p->source);
|
||||
flags = bc_p->header.status_flags;
|
||||
}
|
||||
else
|
||||
{
|
||||
ECMA_TRY_CATCH (obj_this,
|
||||
ecma_op_to_object (this_arg),
|
||||
ret_value);
|
||||
|
||||
ecma_object_t *obj_p = ecma_get_object_from_value (obj_this);
|
||||
|
||||
/* Get RegExp source from the source property */
|
||||
ecma_string_t *magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_SOURCE);
|
||||
ecma_value_t source_value = ecma_op_object_get_own_data_prop (obj_p, magic_string_p);
|
||||
|
||||
ecma_string_t *output_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_SLASH_CHAR);
|
||||
ecma_string_t *source_str_p = ecma_get_string_from_value (source_value);
|
||||
output_str_p = ecma_concat_ecma_strings (output_str_p, source_str_p);
|
||||
ecma_deref_ecma_string (source_str_p);
|
||||
|
||||
lit_utf8_byte_t flags[4];
|
||||
lit_utf8_byte_t *flags_p = flags;
|
||||
|
||||
*flags_p++ = LIT_CHAR_SLASH;
|
||||
|
||||
/* Check the global flag */
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_GLOBAL);
|
||||
ecma_value_t global_value = ecma_op_object_get_own_data_prop (obj_p, magic_string_p);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (global_value));
|
||||
|
||||
if (ecma_is_value_true (global_value))
|
||||
{
|
||||
*flags_p++ = LIT_CHAR_LOWERCASE_G;
|
||||
}
|
||||
|
||||
/* Check the ignoreCase flag */
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_IGNORECASE_UL);
|
||||
ecma_value_t ignore_case_value = ecma_op_object_get_own_data_prop (obj_p, magic_string_p);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (ignore_case_value));
|
||||
|
||||
if (ecma_is_value_true (ignore_case_value))
|
||||
{
|
||||
*flags_p++ = LIT_CHAR_LOWERCASE_I;
|
||||
}
|
||||
|
||||
/* Check the multiline flag */
|
||||
magic_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_MULTILINE);
|
||||
ecma_value_t multiline_value = ecma_op_object_get_own_data_prop (obj_p, magic_string_p);
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_boolean (multiline_value));
|
||||
|
||||
if (ecma_is_value_true (multiline_value))
|
||||
{
|
||||
*flags_p++ = LIT_CHAR_LOWERCASE_M;
|
||||
}
|
||||
|
||||
lit_utf8_size_t size = (lit_utf8_size_t) (flags_p - flags);
|
||||
output_str_p = ecma_append_chars_to_string (output_str_p, flags, size, size);
|
||||
|
||||
ret_value = ecma_make_string_value (output_str_p);
|
||||
|
||||
ECMA_FINALIZE (obj_this);
|
||||
source_p = ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP);
|
||||
flags = RE_FLAG_EMPTY;
|
||||
}
|
||||
|
||||
return ret_value;
|
||||
ecma_stringbuilder_t result = ecma_stringbuilder_create ();
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_SLASH);
|
||||
ecma_stringbuilder_append (&result, source_p);
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_SLASH);
|
||||
|
||||
if (flags & RE_FLAG_GLOBAL)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_G);
|
||||
}
|
||||
|
||||
if (flags & RE_FLAG_IGNORE_CASE)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_I);
|
||||
}
|
||||
|
||||
if (flags & RE_FLAG_MULTILINE)
|
||||
{
|
||||
ecma_stringbuilder_append_byte (&result, LIT_CHAR_LOWERCASE_M);
|
||||
}
|
||||
|
||||
return ecma_make_string_value (ecma_stringbuilder_finalize (&result));
|
||||
} /* ecma_builtin_regexp_prototype_to_string */
|
||||
|
||||
/**
|
||||
|
||||
@ -26,6 +26,27 @@ OBJECT_VALUE (LIT_MAGIC_STRING_CONSTRUCTOR,
|
||||
ECMA_BUILTIN_ID_REGEXP,
|
||||
ECMA_PROPERTY_CONFIGURABLE_WRITABLE)
|
||||
|
||||
#if ENABLED (JERRY_ES2015)
|
||||
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_FLAGS,
|
||||
ecma_builtin_regexp_prototype_get_flags,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
|
||||
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_SOURCE,
|
||||
ecma_builtin_regexp_prototype_get_source,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
|
||||
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_GLOBAL,
|
||||
ecma_builtin_regexp_prototype_get_global,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
|
||||
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_IGNORECASE_UL,
|
||||
ecma_builtin_regexp_prototype_get_ignorecase,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
|
||||
ACCESSOR_READ_ONLY (LIT_MAGIC_STRING_MULTILINE,
|
||||
ecma_builtin_regexp_prototype_get_multiline,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
#else /* !ENABLED (JERRY_ES2015) */
|
||||
/* ECMA-262 v5, 15.10.7.1 */
|
||||
STRING_VALUE (LIT_MAGIC_STRING_SOURCE,
|
||||
LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP,
|
||||
@ -45,6 +66,7 @@ SIMPLE_VALUE (LIT_MAGIC_STRING_IGNORECASE_UL,
|
||||
SIMPLE_VALUE (LIT_MAGIC_STRING_MULTILINE,
|
||||
ECMA_VALUE_FALSE,
|
||||
ECMA_PROPERTY_FIXED)
|
||||
#endif /* ENABLED (JERRY_ES2015) */
|
||||
|
||||
/* ECMA-262 v5, 15.10.7.5 */
|
||||
NUMBER_VALUE (LIT_MAGIC_STRING_LASTINDEX_UL,
|
||||
|
||||
@ -110,7 +110,7 @@ ecma_builtin_regexp_dispatch_construct (const ecma_value_t *arguments_list_p, /*
|
||||
|
||||
ecma_string_t *flags_string_p = ecma_get_string_from_value (flags_str_value);
|
||||
JERRY_ASSERT (flags_string_p != NULL);
|
||||
ret_value = re_parse_regexp_flags (flags_string_p, &flags);
|
||||
ret_value = ecma_regexp_parse_flags (flags_string_p, &flags);
|
||||
ecma_free_value (flags_str_value); // implicit frees flags_string_p
|
||||
|
||||
if (ECMA_IS_VALUE_ERROR (ret_value))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -35,34 +35,66 @@
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RE_FLAG_EMPTY = 0u, /* Empty RegExp flags */
|
||||
RE_FLAG_GLOBAL = (1u << 1), /**< ECMA-262 v5, 15.10.7.2 */
|
||||
RE_FLAG_IGNORE_CASE = (1u << 2), /**< ECMA-262 v5, 15.10.7.3 */
|
||||
RE_FLAG_MULTILINE = (1u << 3) /**< ECMA-262 v5, 15.10.7.4 */
|
||||
} re_flags_t;
|
||||
} ecma_regexp_flags_t;
|
||||
|
||||
/**
|
||||
* Structure for storing capturing group results
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *begin_p; /**< substring start pointer */
|
||||
const lit_utf8_byte_t *end_p; /**< substring end pointer */
|
||||
} ecma_regexp_capture_t;
|
||||
|
||||
/**
|
||||
* Structure for storing non-capturing group results
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t *str_p; /**< string pointer */
|
||||
} ecma_regexp_non_capture_t;
|
||||
|
||||
#if (JERRY_STACK_LIMIT != 0)
|
||||
/**
|
||||
* Value used ase result when stack limit is reached
|
||||
*/
|
||||
#define ECMA_RE_OUT_OF_STACK ((const lit_utf8_byte_t *) UINTPTR_MAX)
|
||||
|
||||
/**
|
||||
* Checks if the stack limit has been reached during regexp matching
|
||||
*/
|
||||
#define ECMA_RE_STACK_LIMIT_REACHED(p) (JERRY_UNLIKELY (p == ECMA_RE_OUT_OF_STACK))
|
||||
#else /* JERRY_STACK_LIMIT == 0 */
|
||||
#define ECMA_RE_STACK_LIMIT_REACHED(p) (false)
|
||||
#endif /* JERRY_STACK_LIMIT != 0 */
|
||||
|
||||
/**
|
||||
* RegExp executor context
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
const lit_utf8_byte_t **saved_p; /**< saved result string pointers, ECMA 262 v5, 15.10.2.1, State */
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input pattern string */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern string */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t *num_of_iterations_p; /**< number of iterations */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
} re_matcher_ctx_t;
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input string */
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input string */
|
||||
uint32_t captures_count; /**< number of capture groups */
|
||||
ecma_regexp_capture_t *captures_p; /**< capturing groups */
|
||||
uint32_t non_captures_count; /**< number of non-capture groups */
|
||||
ecma_regexp_non_capture_t *non_captures_p; /**< non-capturing groups */
|
||||
uint32_t *iterations_p; /**< number of iterations */
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
} ecma_regexp_ctx_t;
|
||||
|
||||
ecma_value_t ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p);
|
||||
ecma_value_t ecma_op_create_regexp_object (ecma_string_t *pattern_p, uint16_t flags);
|
||||
ecma_value_t ecma_regexp_exec_helper (ecma_value_t regexp_value, ecma_value_t input_string, bool ignore_global);
|
||||
ecma_value_t ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg, ecma_string_t **pattern_string_p);
|
||||
ecma_char_t re_canonicalize (ecma_char_t ch, bool is_ignorecase);
|
||||
void re_set_result_array_properties (ecma_object_t *array_obj_p, ecma_string_t *input_str_p, uint32_t num_of_elements,
|
||||
int32_t index);
|
||||
ecma_value_t re_parse_regexp_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
|
||||
void re_initialize_props (ecma_object_t *re_obj_p, ecma_string_t *source_p, uint16_t flags);
|
||||
ecma_char_t ecma_regexp_canonicalize (ecma_char_t ch, bool is_ignorecase);
|
||||
ecma_char_t ecma_regexp_canonicalize_char (ecma_char_t ch);
|
||||
ecma_value_t ecma_regexp_parse_flags (ecma_string_t *flags_str_p, uint16_t *flags_p);
|
||||
void ecma_regexp_initialize_props (ecma_object_t *re_obj_p, ecma_string_t *source_p, uint16_t flags);
|
||||
|
||||
/**
|
||||
* @}
|
||||
|
||||
@ -26,9 +26,6 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_RIGHT_PAREN, ")")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_ASTERIX_CHAR, "*")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COMMA_CHAR, ",")
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SLASH_CHAR, "/")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_COLON_CHAR, ":")
|
||||
#if ENABLED (JERRY_BUILTIN_MATH)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_E_U, "E")
|
||||
@ -207,6 +204,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_CLEAR, "clear")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_EVERY, "every")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FALSE, "false")
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLAGS, "flags")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_MATH)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FLOOR, "floor")
|
||||
#endif
|
||||
@ -290,7 +290,10 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_FREEZE, "freeze")
|
||||
#if ENABLED (JERRY_BUILTIN_DATE)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_DAY_UL, "getDay")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
#if !ENABLED (JERRY_ES2015) && ENABLED (JERRY_BUILTIN_REGEXP) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_BUILTIN_STRING) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && !( ENABLED (JERRY_ES2015))
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GLOBAL, "global")
|
||||
#endif
|
||||
#if ENABLED (JERRY_ES2015_BUILTIN_TYPEDARRAY)
|
||||
@ -319,7 +322,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_REPEAT, "repeat")
|
||||
|| ENABLED (JERRY_ES2015_BUILTIN_SYMBOL)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SEARCH, "search")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
#if !ENABLED (JERRY_ES2015) && ENABLED (JERRY_BUILTIN_REGEXP) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && !( ENABLED (JERRY_ES2015))
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_SOURCE, "source")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_ARRAY)
|
||||
@ -518,6 +523,10 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IS_INTEGER, "isInteger")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_LASTINDEX_UL, "lastIndex")
|
||||
#endif
|
||||
#if !ENABLED (JERRY_ES2015) && ENABLED (JERRY_BUILTIN_REGEXP) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && !( ENABLED (JERRY_ES2015))
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_MULTILINE, "multiline")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PROTOTYPE, "prototype")
|
||||
@ -566,7 +575,9 @@ LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_MINUTES_UL, "getMinutes")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_SECONDS_UL, "getSeconds")
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_GET_UTC_DATE_UL, "getUTCDate")
|
||||
#endif
|
||||
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
||||
#if !ENABLED (JERRY_ES2015) && ENABLED (JERRY_BUILTIN_REGEXP) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && ENABLED (JERRY_ES2015) \
|
||||
|| ENABLED (JERRY_BUILTIN_REGEXP) && !( ENABLED (JERRY_ES2015))
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_IGNORECASE_UL, "ignoreCase")
|
||||
#endif
|
||||
LIT_MAGIC_STRING_DEF (LIT_MAGIC_STRING_PARSE_FLOAT, "parseFloat")
|
||||
|
||||
@ -27,7 +27,6 @@ LIT_MAGIC_STRING_ASTERIX_CHAR = "*"
|
||||
LIT_MAGIC_STRING_SPACE_CHAR = " "
|
||||
LIT_MAGIC_STRING_RIGHT_PAREN = ")"
|
||||
LIT_MAGIC_STRING_COMMA_CHAR = ","
|
||||
LIT_MAGIC_STRING_SLASH_CHAR = "/"
|
||||
LIT_MAGIC_STRING_COLON_CHAR = ":"
|
||||
LIT_MAGIC_STRING_E_U = "E"
|
||||
LIT_MAGIC_STRING_LEFT_SQUARE_CHAR = "["
|
||||
@ -102,6 +101,7 @@ LIT_MAGIC_STRING_CATCH = "catch"
|
||||
LIT_MAGIC_STRING_CLEAR = "clear"
|
||||
LIT_MAGIC_STRING_EVERY = "every"
|
||||
LIT_MAGIC_STRING_FALSE = "false"
|
||||
LIT_MAGIC_STRING_FLAGS = "flags"
|
||||
LIT_MAGIC_STRING_FLOOR = "floor"
|
||||
LIT_MAGIC_STRING_INDEX = "index"
|
||||
LIT_MAGIC_STRING_INPUT = "input"
|
||||
|
||||
@ -30,8 +30,18 @@
|
||||
|
||||
/**
|
||||
* Size of block of RegExp bytecode. Used for allocation
|
||||
*
|
||||
* @return pointer to the RegExp compiled code header
|
||||
*/
|
||||
#define REGEXP_BYTECODE_BLOCK_SIZE 256UL
|
||||
#define REGEXP_BYTECODE_BLOCK_SIZE 64UL
|
||||
|
||||
void
|
||||
re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
bc_ctx_p->block_start_p = jmem_heap_alloc_block (REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + REGEXP_BYTECODE_BLOCK_SIZE;
|
||||
bc_ctx_p->current_p = bc_ctx_p->block_start_p + sizeof (re_compiled_code_t);
|
||||
} /* re_initialize_regexp_bytecode */
|
||||
|
||||
/**
|
||||
* Realloc the bytecode container
|
||||
@ -42,26 +52,22 @@ static uint8_t *
|
||||
re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
JERRY_ASSERT (bc_ctx_p->block_end_p >= bc_ctx_p->block_start_p);
|
||||
size_t old_size = (size_t) (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p);
|
||||
const size_t old_size = (size_t) (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p);
|
||||
|
||||
/* If one of the members of RegExp bytecode context is NULL, then all member should be NULL
|
||||
* (it means first allocation), otherwise all of the members should be a non NULL pointer. */
|
||||
JERRY_ASSERT ((!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p)
|
||||
|| (bc_ctx_p->current_p && bc_ctx_p->block_end_p && bc_ctx_p->block_start_p));
|
||||
|
||||
size_t new_block_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE;
|
||||
const size_t new_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE;
|
||||
JERRY_ASSERT (bc_ctx_p->current_p >= bc_ctx_p->block_start_p);
|
||||
size_t current_ptr_offset = (size_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
|
||||
const size_t current_ptr_offset = (size_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
|
||||
|
||||
uint8_t *new_block_start_p = (uint8_t *) jmem_heap_alloc_block (new_block_size);
|
||||
if (bc_ctx_p->current_p)
|
||||
{
|
||||
memcpy (new_block_start_p, bc_ctx_p->block_start_p, (size_t) (current_ptr_offset));
|
||||
jmem_heap_free_block (bc_ctx_p->block_start_p, old_size);
|
||||
}
|
||||
bc_ctx_p->block_start_p = new_block_start_p;
|
||||
bc_ctx_p->block_end_p = new_block_start_p + new_block_size;
|
||||
bc_ctx_p->current_p = new_block_start_p + current_ptr_offset;
|
||||
bc_ctx_p->block_start_p = jmem_heap_realloc_block (bc_ctx_p->block_start_p,
|
||||
old_size,
|
||||
new_size);
|
||||
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + new_size;
|
||||
bc_ctx_p->current_p = bc_ctx_p->block_start_p + current_ptr_offset;
|
||||
|
||||
return bc_ctx_p->current_p;
|
||||
} /* re_realloc_regexp_bytecode_block */
|
||||
@ -69,54 +75,71 @@ re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytec
|
||||
/**
|
||||
* Append a new bytecode to the and of the bytecode container
|
||||
*/
|
||||
static void
|
||||
re_bytecode_list_append (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint8_t *bytecode_p, /**< input bytecode */
|
||||
size_t length) /**< length of input */
|
||||
static uint8_t *
|
||||
re_bytecode_reserve (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const size_t size) /**< size */
|
||||
{
|
||||
JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + length > bc_ctx_p->block_end_p)
|
||||
if (current_p + size > bc_ctx_p->block_end_p)
|
||||
{
|
||||
current_p = re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
}
|
||||
|
||||
memcpy (current_p, bytecode_p, length);
|
||||
bc_ctx_p->current_p += length;
|
||||
} /* re_bytecode_list_append */
|
||||
bc_ctx_p->current_p += size;
|
||||
return current_p;
|
||||
} /* re_bytecode_reserve */
|
||||
|
||||
/**
|
||||
* Insert a new bytecode to the bytecode container
|
||||
*/
|
||||
void
|
||||
re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
size_t offset, /**< distance from the start of the container */
|
||||
uint8_t *bytecode_p, /**< input bytecode */
|
||||
size_t length) /**< length of input */
|
||||
static void
|
||||
re_bytecode_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
const size_t offset, /**< distance from the start of the container */
|
||||
const size_t size) /**< size */
|
||||
{
|
||||
JERRY_ASSERT (length <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
||||
|
||||
uint8_t *current_p = bc_ctx_p->current_p;
|
||||
if (current_p + length > bc_ctx_p->block_end_p)
|
||||
if (current_p + size > bc_ctx_p->block_end_p)
|
||||
{
|
||||
re_realloc_regexp_bytecode_block (bc_ctx_p);
|
||||
}
|
||||
|
||||
uint8_t *src_p = bc_ctx_p->block_start_p + offset;
|
||||
if ((re_get_bytecode_length (bc_ctx_p) - offset) > 0)
|
||||
uint8_t *dest_p = bc_ctx_p->block_start_p + offset;
|
||||
const size_t bytecode_length = re_get_bytecode_length (bc_ctx_p);
|
||||
if (bytecode_length - offset > 0)
|
||||
{
|
||||
uint8_t *dest_p = src_p + length;
|
||||
uint8_t *tmp_block_start_p;
|
||||
tmp_block_start_p = (uint8_t *) jmem_heap_alloc_block (re_get_bytecode_length (bc_ctx_p) - offset);
|
||||
memcpy (tmp_block_start_p, src_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
|
||||
memcpy (dest_p, tmp_block_start_p, (size_t) (re_get_bytecode_length (bc_ctx_p) - offset));
|
||||
jmem_heap_free_block (tmp_block_start_p, re_get_bytecode_length (bc_ctx_p) - offset);
|
||||
memmove (dest_p + size, dest_p, bytecode_length - offset);
|
||||
}
|
||||
memcpy (src_p, bytecode_p, length);
|
||||
|
||||
bc_ctx_p->current_p += length;
|
||||
} /* re_bytecode_list_insert */
|
||||
bc_ctx_p->current_p += size;
|
||||
} /* re_bytecode_insert */
|
||||
|
||||
/**
|
||||
* Encode ecma_char_t into bytecode
|
||||
*/
|
||||
static void
|
||||
re_encode_char (uint8_t *dest_p, /**< destination */
|
||||
const ecma_char_t c) /**< character */
|
||||
{
|
||||
*dest_p++ = (uint8_t) ((c >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (c & 0xFF);
|
||||
} /* re_encode_char */
|
||||
|
||||
/**
|
||||
* Encode uint32_t into bytecode
|
||||
*/
|
||||
static void
|
||||
re_encode_u32 (uint8_t *dest_p, /**< destination */
|
||||
const uint32_t u) /**< uint32 value */
|
||||
{
|
||||
*dest_p++ = (uint8_t) ((u >> 24) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((u >> 16) & 0xFF);
|
||||
*dest_p++ = (uint8_t) ((u >> 8) & 0xFF);
|
||||
*dest_p = (uint8_t) (u & 0xFF);
|
||||
} /* re_encode_u32 */
|
||||
|
||||
/**
|
||||
* Get a character from the RegExp bytecode and increase the bytecode position
|
||||
@ -124,10 +147,12 @@ re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode conte
|
||||
* @return ecma character
|
||||
*/
|
||||
inline ecma_char_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_char (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
re_get_char (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
ecma_char_t chr;
|
||||
memcpy (&chr, *bc_p, sizeof (ecma_char_t));
|
||||
const uint8_t *src_p = *bc_p;
|
||||
ecma_char_t chr = (ecma_char_t) *src_p++;
|
||||
chr = (ecma_char_t) (chr << 8);
|
||||
chr = (ecma_char_t) (chr | *src_p);
|
||||
(*bc_p) += sizeof (ecma_char_t);
|
||||
return chr;
|
||||
} /* re_get_char */
|
||||
@ -138,11 +163,9 @@ re_get_char (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
* @return current RegExp opcode
|
||||
*/
|
||||
inline re_opcode_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_opcode (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
re_get_opcode (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
uint8_t bytecode = **bc_p;
|
||||
(*bc_p) += sizeof (uint8_t);
|
||||
return (re_opcode_t) bytecode;
|
||||
return (re_opcode_t) *((*bc_p)++);
|
||||
} /* re_get_opcode */
|
||||
|
||||
/**
|
||||
@ -151,10 +174,17 @@ re_get_opcode (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
* @return opcode parameter
|
||||
*/
|
||||
inline uint32_t JERRY_ATTR_ALWAYS_INLINE
|
||||
re_get_value (uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
re_get_value (const uint8_t **bc_p) /**< pointer to bytecode start */
|
||||
{
|
||||
uint32_t value;
|
||||
memcpy (&value, *bc_p, sizeof (uint32_t));
|
||||
const uint8_t *src_p = *bc_p;
|
||||
uint32_t value = (uint32_t) (*src_p++);
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
value <<= 8;
|
||||
value |= ((uint32_t) (*src_p++));
|
||||
|
||||
(*bc_p) += sizeof (uint32_t);
|
||||
return value;
|
||||
} /* re_get_value */
|
||||
@ -175,9 +205,10 @@ re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode contex
|
||||
*/
|
||||
void
|
||||
re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
re_opcode_t opcode) /**< input opcode */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_bytecode_list_append (bc_ctx_p, (uint8_t *) &opcode, sizeof (uint8_t));
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint8_t));
|
||||
*dest_p = (uint8_t) opcode;
|
||||
} /* re_append_opcode */
|
||||
|
||||
/**
|
||||
@ -185,9 +216,10 @@ re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
*/
|
||||
void
|
||||
re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t value) /**< input value */
|
||||
const uint32_t value) /**< input value */
|
||||
{
|
||||
re_bytecode_list_append (bc_ctx_p, (uint8_t *) &value, sizeof (uint32_t));
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint32_t));
|
||||
re_encode_u32 (dest_p, value);
|
||||
} /* re_append_u32 */
|
||||
|
||||
/**
|
||||
@ -195,9 +227,10 @@ re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
*/
|
||||
void
|
||||
re_append_char (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
ecma_char_t input_char) /**< input char */
|
||||
const ecma_char_t input_char) /**< input char */
|
||||
{
|
||||
re_bytecode_list_append (bc_ctx_p, (uint8_t *) &input_char, sizeof (ecma_char_t));
|
||||
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (ecma_char_t));
|
||||
re_encode_char (dest_p, input_char);
|
||||
} /* re_append_char */
|
||||
|
||||
/**
|
||||
@ -216,10 +249,11 @@ re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context
|
||||
*/
|
||||
void
|
||||
re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t offset, /**< distance from the start of the container */
|
||||
re_opcode_t opcode) /**< input opcode */
|
||||
const uint32_t offset, /**< distance from the start of the container */
|
||||
const re_opcode_t opcode) /**< input opcode */
|
||||
{
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (uint8_t *) &opcode, sizeof (uint8_t));
|
||||
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint8_t));
|
||||
*(bc_ctx_p->block_start_p + offset) = (uint8_t) opcode;
|
||||
} /* re_insert_opcode */
|
||||
|
||||
/**
|
||||
@ -230,7 +264,8 @@ re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
||||
uint32_t offset, /**< distance from the start of the container */
|
||||
uint32_t value) /**< input value */
|
||||
{
|
||||
re_bytecode_list_insert (bc_ctx_p, offset, (uint8_t *) &value, sizeof (uint32_t));
|
||||
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint32_t));
|
||||
re_encode_u32 (bc_ctx_p->block_start_p + offset, value);
|
||||
} /* re_insert_u32 */
|
||||
|
||||
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
||||
@ -242,10 +277,10 @@ re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
||||
{
|
||||
re_compiled_code_t *compiled_code_p = (re_compiled_code_t *) bc_ctx_p->block_start_p;
|
||||
JERRY_DEBUG_MSG ("%d ", compiled_code_p->header.status_flags);
|
||||
JERRY_DEBUG_MSG ("%d ", compiled_code_p->num_of_captures);
|
||||
JERRY_DEBUG_MSG ("%d | ", compiled_code_p->num_of_non_captures);
|
||||
JERRY_DEBUG_MSG ("%d ", compiled_code_p->captures_count);
|
||||
JERRY_DEBUG_MSG ("%d | ", compiled_code_p->non_captures_count);
|
||||
|
||||
uint8_t *bytecode_p = (uint8_t *) (compiled_code_p + 1);
|
||||
const uint8_t *bytecode_p = (const uint8_t *) (compiled_code_p + 1);
|
||||
|
||||
re_opcode_t op;
|
||||
while ((op = re_get_opcode (&bytecode_p)))
|
||||
|
||||
@ -85,9 +85,9 @@ typedef enum
|
||||
typedef struct
|
||||
{
|
||||
ecma_compiled_code_t header; /**< compiled code header */
|
||||
ecma_value_t pattern; /**< original RegExp pattern */
|
||||
uint32_t num_of_captures; /**< number of capturing brackets */
|
||||
uint32_t num_of_non_captures; /**< number of non capturing brackets */
|
||||
ecma_value_t source; /**< original RegExp pattern */
|
||||
uint32_t captures_count; /**< number of capturing brackets */
|
||||
uint32_t non_captures_count; /**< number of non capturing brackets */
|
||||
} re_compiled_code_t;
|
||||
|
||||
/**
|
||||
@ -100,19 +100,24 @@ typedef struct
|
||||
uint8_t *current_p; /**< current position in bytecode */
|
||||
} re_bytecode_ctx_t;
|
||||
|
||||
re_opcode_t re_get_opcode (uint8_t **bc_p);
|
||||
ecma_char_t re_get_char (uint8_t **bc_p);
|
||||
uint32_t re_get_value (uint8_t **bc_p);
|
||||
re_opcode_t re_get_opcode (const uint8_t **bc_p);
|
||||
ecma_char_t re_get_char (const uint8_t **bc_p);
|
||||
uint32_t re_get_value (const uint8_t **bc_p);
|
||||
uint32_t JERRY_ATTR_PURE re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p);
|
||||
|
||||
void re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, re_opcode_t opcode);
|
||||
void re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, uint32_t value);
|
||||
void re_append_char (re_bytecode_ctx_t *bc_ctx_p, ecma_char_t input_char);
|
||||
void re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p);
|
||||
|
||||
void re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, const re_opcode_t opcode);
|
||||
void re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t value);
|
||||
void re_append_char (re_bytecode_ctx_t *bc_ctx_p, const ecma_char_t input_char);
|
||||
void re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, uint32_t value);
|
||||
|
||||
void re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, uint32_t offset, re_opcode_t opcode);
|
||||
void re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, uint32_t offset, uint32_t value);
|
||||
void re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p, size_t offset, uint8_t *bytecode_p, size_t length);
|
||||
void re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const re_opcode_t opcode);
|
||||
void re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const uint32_t value);
|
||||
void re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p,
|
||||
const size_t offset,
|
||||
const uint8_t *bytecode_p,
|
||||
const size_t length);
|
||||
|
||||
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
||||
void re_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
#include "ecma-helpers.h"
|
||||
#include "ecma-regexp-object.h"
|
||||
#include "ecma-try-catch-macro.h"
|
||||
#include "lit-char-helpers.h"
|
||||
#include "jcontext.h"
|
||||
#include "jrt-libc-includes.h"
|
||||
#include "jmem.h"
|
||||
@ -36,20 +37,6 @@
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Callback function of character class generation
|
||||
*/
|
||||
static void
|
||||
re_append_char_class (void *re_ctx_p, /**< RegExp compiler context */
|
||||
ecma_char_t start, /**< character class range from */
|
||||
ecma_char_t end) /**< character class range to */
|
||||
{
|
||||
re_compiler_ctx_t *ctx_p = (re_compiler_ctx_t *) re_ctx_p;
|
||||
re_append_char (ctx_p->bytecode_ctx_p, start);
|
||||
re_append_char (ctx_p->bytecode_ctx_p, end);
|
||||
ctx_p->parser_ctx_p->num_of_classes++;
|
||||
} /* re_append_char_class */
|
||||
|
||||
/**
|
||||
* Insert simple atom iterator
|
||||
*
|
||||
@ -234,6 +221,276 @@ re_insert_into_group_with_jump (re_compiler_ctx_t *re_ctx_p, /**< RegExp compile
|
||||
return re_insert_into_group (re_ctx_p, group_start_offset, idx, is_capturable);
|
||||
} /* re_insert_into_group_with_jump */
|
||||
|
||||
/**
|
||||
* Append a character class range to the bytecode
|
||||
*/
|
||||
static void
|
||||
re_append_char_class (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context */
|
||||
ecma_char_t start, /**< character class range from */
|
||||
ecma_char_t end) /**< character class range to */
|
||||
{
|
||||
re_append_char (re_ctx_p->bytecode_ctx_p, ecma_regexp_canonicalize (start, re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
re_append_char (re_ctx_p->bytecode_ctx_p, ecma_regexp_canonicalize (end, re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
re_ctx_p->parser_ctx_p->classes_count++;
|
||||
} /* re_append_char_class */
|
||||
|
||||
/**
|
||||
* Read the input pattern and parse the range of character class
|
||||
*
|
||||
* @return empty ecma value - if parsed successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
re_parse_char_class (re_compiler_ctx_t *re_ctx_p, /**< number of classes */
|
||||
re_token_t *out_token_p) /**< [out] output token */
|
||||
{
|
||||
re_parser_ctx_t *const parser_ctx_p = re_ctx_p->parser_ctx_p;
|
||||
out_token_p->qmax = out_token_p->qmin = 1;
|
||||
parser_ctx_p->classes_count = 0;
|
||||
|
||||
ecma_char_t start = LIT_CHAR_UNDEF;
|
||||
bool is_range = false;
|
||||
const bool is_char_class = (re_ctx_p->current_token.type == RE_TOK_START_CHAR_CLASS
|
||||
|| re_ctx_p->current_token.type == RE_TOK_START_INV_CHAR_CLASS);
|
||||
|
||||
const ecma_char_t prev_char = lit_utf8_peek_prev (parser_ctx_p->input_curr_p);
|
||||
if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
|
||||
}
|
||||
|
||||
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, start);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (ch == LIT_CHAR_MINUS)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '-'"));
|
||||
}
|
||||
|
||||
if (start != LIT_CHAR_UNDEF
|
||||
&& !is_range
|
||||
&& *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
is_range = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
|
||||
}
|
||||
|
||||
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_LOWERCASE_B)
|
||||
{
|
||||
ch = LIT_CHAR_BS;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
ch = LIT_CHAR_FF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_N)
|
||||
{
|
||||
ch = LIT_CHAR_LF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_T)
|
||||
{
|
||||
ch = LIT_CHAR_TAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_R)
|
||||
{
|
||||
ch = LIT_CHAR_CR;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_V)
|
||||
{
|
||||
ch = LIT_CHAR_VTAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_C)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
ch = *parser_ctx_p->input_curr_p;
|
||||
|
||||
if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_0 && ch <= LIT_CHAR_9))
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.10 (Point 3) */
|
||||
ch = (ch % 32);
|
||||
parser_ctx_p->input_curr_p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = LIT_CHAR_LOWERCASE_C;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_X && re_hex_lookup (parser_ctx_p, 2))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 2;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_U && re_hex_lookup (parser_ctx_p, 4))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 4;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_TAB, LIT_CHAR_CR);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_SP, LIT_CHAR_SP);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NBSP, LIT_CHAR_NBSP);
|
||||
re_append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); /* Ogham Space Mark */
|
||||
re_append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); /* Mongolian Vowel Separator */
|
||||
re_append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); /* En Quad - Hair Space */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LS, LIT_CHAR_PS);
|
||||
re_append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); /* Narrow No-Break Space */
|
||||
re_append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */
|
||||
re_append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_TAB - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_CR + 1, LIT_CHAR_SP - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_SP + 1, LIT_CHAR_NBSP - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NBSP + 1, 0x167FUL);
|
||||
re_append_char_class (re_ctx_p, 0x1681UL, 0x180DUL);
|
||||
re_append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL);
|
||||
re_append_char_class (re_ctx_p, 0x200BUL, LIT_CHAR_LS - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_PS + 1, 0x202EUL);
|
||||
re_append_char_class (re_ctx_p, 0x2030UL, 0x205EUL);
|
||||
re_append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
|
||||
re_append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_0, LIT_CHAR_9);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_0 - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_9 + 1, LIT_CHAR_UPPERCASE_A - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1);
|
||||
re_append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (lit_char_is_octal_digit ((ecma_char_t) ch)
|
||||
&& ch != LIT_CHAR_0)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
|
||||
}
|
||||
} /* ch == LIT_CHAR_BACKSLASH */
|
||||
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
if (is_range)
|
||||
{
|
||||
if (start > ch)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, wrong order"));
|
||||
}
|
||||
else
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, ch);
|
||||
start = LIT_CHAR_UNDEF;
|
||||
is_range = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
re_append_char_class (re_ctx_p, start, start);
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
while (is_char_class);
|
||||
|
||||
return re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
} /* re_parse_char_class */
|
||||
|
||||
/**
|
||||
* Parse alternatives
|
||||
*
|
||||
@ -251,18 +508,17 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
re_bytecode_ctx_t *bc_ctx_p = re_ctx_p->bytecode_ctx_p;
|
||||
ecma_value_t ret_value = ECMA_VALUE_EMPTY;
|
||||
|
||||
uint32_t alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
bool should_loop = true;
|
||||
uint32_t alternative_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
|
||||
while (ecma_is_value_empty (ret_value) && should_loop)
|
||||
while (ecma_is_value_empty (ret_value))
|
||||
{
|
||||
ecma_value_t next_token_result = re_parse_next_token (re_ctx_p->parser_ctx_p,
|
||||
&(re_ctx_p->current_token));
|
||||
if (ECMA_IS_VALUE_ERROR (next_token_result))
|
||||
{
|
||||
ret_value = next_token_result;
|
||||
break;
|
||||
return next_token_result;
|
||||
}
|
||||
|
||||
JERRY_ASSERT (ecma_is_value_empty (next_token_result));
|
||||
|
||||
uint32_t new_atom_start_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
@ -271,7 +527,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
{
|
||||
case RE_TOK_START_CAPTURE_GROUP:
|
||||
{
|
||||
idx = re_ctx_p->num_of_captures++;
|
||||
idx = re_ctx_p->captures_count++;
|
||||
JERRY_TRACE_MSG ("Compile a capture group start (idx: %u)\n", (unsigned int) idx);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
@ -285,7 +541,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
}
|
||||
case RE_TOK_START_NON_CAPTURE_GROUP:
|
||||
{
|
||||
idx = re_ctx_p->num_of_non_captures++;
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
JERRY_TRACE_MSG ("Compile a non-capture group start (idx: %u)\n", (unsigned int) idx);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
@ -304,8 +560,8 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
(unsigned int) re_ctx_p->current_token.qmax);
|
||||
|
||||
re_append_opcode (bc_ctx_p, RE_OP_CHAR);
|
||||
re_append_char (bc_ctx_p, re_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
re_append_char (bc_ctx_p, ecma_regexp_canonicalize ((ecma_char_t) re_ctx_p->current_token.value,
|
||||
re_ctx_p->flags & RE_FLAG_IGNORE_CASE));
|
||||
|
||||
ret_value = re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
|
||||
break;
|
||||
@ -321,9 +577,9 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
case RE_TOK_ALTERNATIVE:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile an alternative\n");
|
||||
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
re_append_opcode (bc_ctx_p, RE_OP_ALTERNATIVE);
|
||||
alterantive_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
alternative_offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
break;
|
||||
}
|
||||
case RE_TOK_ASSERT_START:
|
||||
@ -353,7 +609,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
case RE_TOK_ASSERT_START_POS_LOOKAHEAD:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a positive lookahead assertion\n");
|
||||
idx = re_ctx_p->num_of_non_captures++;
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_POS);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
@ -370,7 +626,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
case RE_TOK_ASSERT_START_NEG_LOOKAHEAD:
|
||||
{
|
||||
JERRY_TRACE_MSG ("Compile a negative lookahead assertion\n");
|
||||
idx = re_ctx_p->num_of_non_captures++;
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
re_append_opcode (bc_ctx_p, RE_OP_LOOKAHEAD_NEG);
|
||||
|
||||
ret_value = re_parse_alternative (re_ctx_p, false);
|
||||
@ -387,7 +643,7 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
case RE_TOK_BACKREFERENCE:
|
||||
{
|
||||
uint32_t backref = (uint32_t) re_ctx_p->current_token.value;
|
||||
idx = re_ctx_p->num_of_non_captures++;
|
||||
idx = re_ctx_p->non_captures_count++;
|
||||
|
||||
if (backref > re_ctx_p->highest_backref)
|
||||
{
|
||||
@ -417,14 +673,12 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
: RE_OP_CHAR_CLASS);
|
||||
uint32_t offset = re_get_bytecode_length (re_ctx_p->bytecode_ctx_p);
|
||||
|
||||
ret_value = re_parse_char_class (re_ctx_p->parser_ctx_p,
|
||||
re_append_char_class,
|
||||
re_ctx_p,
|
||||
ret_value = re_parse_char_class (re_ctx_p,
|
||||
&(re_ctx_p->current_token));
|
||||
|
||||
if (!ECMA_IS_VALUE_ERROR (ret_value))
|
||||
{
|
||||
re_insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->num_of_classes);
|
||||
re_insert_u32 (bc_ctx_p, offset, re_ctx_p->parser_ctx_p->classes_count);
|
||||
ret_value = re_insert_simple_iterator (re_ctx_p, new_atom_start_offset);
|
||||
}
|
||||
|
||||
@ -436,33 +690,25 @@ re_parse_alternative (re_compiler_ctx_t *re_ctx_p, /**< RegExp compiler context
|
||||
|
||||
if (expect_eof)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of paren."));
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of paren."));
|
||||
}
|
||||
else
|
||||
{
|
||||
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
|
||||
should_loop = false;
|
||||
}
|
||||
break;
|
||||
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
return ECMA_VALUE_EMPTY;
|
||||
}
|
||||
case RE_TOK_EOF:
|
||||
{
|
||||
if (!expect_eof)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern."));
|
||||
}
|
||||
else
|
||||
{
|
||||
re_insert_u32 (bc_ctx_p, alterantive_offset, re_get_bytecode_length (bc_ctx_p) - alterantive_offset);
|
||||
should_loop = false;
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected end of pattern."));
|
||||
}
|
||||
|
||||
break;
|
||||
re_insert_u32 (bc_ctx_p, alternative_offset, re_get_bytecode_length (bc_ctx_p) - alternative_offset);
|
||||
return ECMA_VALUE_EMPTY;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected RegExp token."));
|
||||
break;
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Unexpected RegExp token."));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -488,7 +734,7 @@ re_find_bytecode_in_cache (ecma_string_t *pattern_str_p, /**< pattern string */
|
||||
|
||||
if (cached_bytecode_p != NULL)
|
||||
{
|
||||
ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->pattern);
|
||||
ecma_string_t *cached_pattern_str_p = ecma_get_string_from_value (cached_bytecode_p->source);
|
||||
|
||||
if ((cached_bytecode_p->header.status_flags & RE_FLAGS_MASK) == flags
|
||||
&& ecma_compare_ecma_strings (cached_pattern_str_p, pattern_str_p))
|
||||
@ -559,14 +805,11 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
|
||||
re_compiler_ctx_t re_ctx;
|
||||
re_ctx.flags = flags;
|
||||
re_ctx.highest_backref = 0;
|
||||
re_ctx.num_of_non_captures = 0;
|
||||
re_ctx.non_captures_count = 0;
|
||||
|
||||
re_bytecode_ctx_t bc_ctx;
|
||||
bc_ctx.block_start_p = NULL;
|
||||
bc_ctx.block_end_p = NULL;
|
||||
bc_ctx.current_p = NULL;
|
||||
|
||||
re_ctx.bytecode_ctx_p = &bc_ctx;
|
||||
re_initialize_regexp_bytecode (&bc_ctx);
|
||||
|
||||
ECMA_STRING_TO_UTF8_STRING (pattern_str_p, pattern_start_p, pattern_start_size);
|
||||
|
||||
@ -574,23 +817,23 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
|
||||
parser_ctx.input_start_p = pattern_start_p;
|
||||
parser_ctx.input_curr_p = (lit_utf8_byte_t *) pattern_start_p;
|
||||
parser_ctx.input_end_p = pattern_start_p + pattern_start_size;
|
||||
parser_ctx.num_of_groups = -1;
|
||||
parser_ctx.groups_count = -1;
|
||||
re_ctx.parser_ctx_p = &parser_ctx;
|
||||
|
||||
/* 1. Parse RegExp pattern */
|
||||
re_ctx.num_of_captures = 1;
|
||||
/* Parse RegExp pattern */
|
||||
re_ctx.captures_count = 1;
|
||||
re_append_opcode (&bc_ctx, RE_OP_SAVE_AT_START);
|
||||
|
||||
ecma_value_t parse_alt_result = re_parse_alternative (&re_ctx, true);
|
||||
ecma_value_t result = re_parse_alternative (&re_ctx, true);
|
||||
|
||||
ECMA_FINALIZE_UTF8_STRING (pattern_start_p, pattern_start_size);
|
||||
|
||||
if (ECMA_IS_VALUE_ERROR (parse_alt_result))
|
||||
if (ECMA_IS_VALUE_ERROR (result))
|
||||
{
|
||||
ret_value = parse_alt_result;
|
||||
ret_value = result;
|
||||
}
|
||||
/* 2. Check for invalid backreference */
|
||||
else if (re_ctx.highest_backref >= re_ctx.num_of_captures)
|
||||
/* Check for invalid backreference */
|
||||
else if (re_ctx.highest_backref >= re_ctx.captures_count)
|
||||
{
|
||||
ret_value = ecma_raise_syntax_error ("Invalid backreference.\n");
|
||||
}
|
||||
@ -599,20 +842,14 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
|
||||
re_append_opcode (&bc_ctx, RE_OP_SAVE_AND_MATCH);
|
||||
re_append_opcode (&bc_ctx, RE_OP_EOF);
|
||||
|
||||
/* 3. Insert extra informations for bytecode header */
|
||||
re_compiled_code_t re_compiled_code;
|
||||
|
||||
re_compiled_code.header.refs = 1;
|
||||
re_compiled_code.header.status_flags = re_ctx.flags;
|
||||
/* Initialize bytecode header */
|
||||
re_compiled_code_t *re_compiled_code_p = (re_compiled_code_t *) bc_ctx.block_start_p;
|
||||
re_compiled_code_p->header.refs = 1;
|
||||
re_compiled_code_p->header.status_flags = re_ctx.flags;
|
||||
ecma_ref_ecma_string (pattern_str_p);
|
||||
re_compiled_code.pattern = ecma_make_string_value (pattern_str_p);
|
||||
re_compiled_code.num_of_captures = re_ctx.num_of_captures * 2;
|
||||
re_compiled_code.num_of_non_captures = re_ctx.num_of_non_captures;
|
||||
|
||||
re_bytecode_list_insert (&bc_ctx,
|
||||
0,
|
||||
(uint8_t *) &re_compiled_code,
|
||||
sizeof (re_compiled_code_t));
|
||||
re_compiled_code_p->source = ecma_make_string_value (pattern_str_p);
|
||||
re_compiled_code_p->captures_count = re_ctx.captures_count;
|
||||
re_compiled_code_p->non_captures_count = re_ctx.non_captures_count;
|
||||
}
|
||||
|
||||
size_t byte_code_size = (size_t) (bc_ctx.block_end_p - bc_ctx.block_start_p);
|
||||
@ -633,10 +870,7 @@ re_compile_bytecode (const re_compiled_code_t **out_bytecode_p, /**< [out] point
|
||||
}
|
||||
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
|
||||
|
||||
/* The RegExp bytecode contains at least a RE_OP_SAVE_AT_START opdoce, so it cannot be NULL. */
|
||||
JERRY_ASSERT (bc_ctx.block_start_p != NULL);
|
||||
*out_bytecode_p = (re_compiled_code_t *) bc_ctx.block_start_p;
|
||||
|
||||
((re_compiled_code_t *) bc_ctx.block_start_p)->header.size = (uint16_t) (byte_code_size >> JMEM_ALIGNMENT_LOG);
|
||||
|
||||
if (cache_idx == RE_CACHE_SIZE)
|
||||
|
||||
@ -38,8 +38,8 @@
|
||||
typedef struct
|
||||
{
|
||||
uint16_t flags; /**< RegExp flags */
|
||||
uint32_t num_of_captures; /**< number of capture groups */
|
||||
uint32_t num_of_non_captures; /**< number of non-capture groups */
|
||||
uint32_t captures_count; /**< number of capture groups */
|
||||
uint32_t non_captures_count; /**< number of non-capture groups */
|
||||
uint32_t highest_backref; /**< highest backreference */
|
||||
re_bytecode_ctx_t *bytecode_ctx_p; /**< pointer of RegExp bytecode context */
|
||||
re_token_t current_token; /**< current token */
|
||||
|
||||
@ -40,26 +40,26 @@
|
||||
* @return true - if lookup number of characters ahead are hex digits
|
||||
* false - otherwise
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
re_hex_lookup (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */
|
||||
uint32_t lookup) /**< size of lookup */
|
||||
{
|
||||
bool is_digit = true;
|
||||
const lit_utf8_byte_t *curr_p = parser_ctx_p->input_curr_p;
|
||||
|
||||
for (uint32_t i = 0; is_digit && i < lookup; i++)
|
||||
if (JERRY_UNLIKELY (curr_p + lookup > parser_ctx_p->input_end_p))
|
||||
{
|
||||
if (curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
is_digit = lit_char_is_hex_digit (*curr_p++);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < lookup; i++)
|
||||
{
|
||||
if (!lit_char_is_hex_digit (*curr_p++))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return is_digit;
|
||||
return true;
|
||||
} /* re_hex_lookup */
|
||||
|
||||
/**
|
||||
@ -86,7 +86,7 @@ re_parse_non_greedy_char (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser cont
|
||||
*
|
||||
* @return uint32_t - parsed octal number
|
||||
*/
|
||||
static uint32_t
|
||||
uint32_t
|
||||
re_parse_octal (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */
|
||||
{
|
||||
uint32_t number = 0;
|
||||
@ -110,7 +110,7 @@ re_parse_octal (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
static ecma_value_t
|
||||
ecma_value_t
|
||||
re_parse_iterator (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context */
|
||||
re_token_t *re_token_p) /**< [out] output token */
|
||||
{
|
||||
@ -253,7 +253,7 @@ static void
|
||||
re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser context */
|
||||
{
|
||||
int char_class_in = 0;
|
||||
parser_ctx_p->num_of_groups = 0;
|
||||
parser_ctx_p->groups_count = 0;
|
||||
const lit_utf8_byte_t *curr_p = parser_ctx_p->input_start_p;
|
||||
|
||||
while (curr_p < parser_ctx_p->input_end_p)
|
||||
@ -287,7 +287,7 @@ re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser contex
|
||||
&& *curr_p != LIT_CHAR_QUESTION
|
||||
&& !char_class_in)
|
||||
{
|
||||
parser_ctx_p->num_of_groups++;
|
||||
parser_ctx_p->groups_count++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -295,264 +295,6 @@ re_count_num_of_groups (re_parser_ctx_t *parser_ctx_p) /**< RegExp parser contex
|
||||
}
|
||||
} /* re_count_num_of_groups */
|
||||
|
||||
/**
|
||||
* Read the input pattern and parse the range of character class
|
||||
*
|
||||
* @return empty ecma value - if parsed successfully
|
||||
* error ecma value - otherwise
|
||||
*
|
||||
* Returned value must be freed with ecma_free_value
|
||||
*/
|
||||
ecma_value_t
|
||||
re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
|
||||
re_char_class_callback append_char_class, /**< callback function,
|
||||
* which adds the char-ranges
|
||||
* to the bytecode */
|
||||
void *re_ctx_p, /**< regexp compiler context */
|
||||
re_token_t *out_token_p) /**< [out] output token */
|
||||
{
|
||||
re_token_type_t token_type = ((re_compiler_ctx_t *) re_ctx_p)->current_token.type;
|
||||
out_token_p->qmax = out_token_p->qmin = 1;
|
||||
ecma_char_t start = LIT_CHAR_UNDEF;
|
||||
bool is_range = false;
|
||||
parser_ctx_p->num_of_classes = 0;
|
||||
|
||||
const ecma_char_t prev_char = lit_utf8_peek_prev (parser_ctx_p->input_curr_p);
|
||||
if (prev_char != LIT_CHAR_LEFT_SQUARE && prev_char != LIT_CHAR_CIRCUMFLEX)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string"));
|
||||
}
|
||||
|
||||
ecma_char_t ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
append_char_class (re_ctx_p, start, start);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (ch == LIT_CHAR_MINUS)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '-'"));
|
||||
}
|
||||
|
||||
if (start != LIT_CHAR_UNDEF
|
||||
&& !is_range
|
||||
&& *parser_ctx_p->input_curr_p != LIT_CHAR_RIGHT_SQUARE)
|
||||
{
|
||||
is_range = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_BACKSLASH)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p >= parser_ctx_p->input_end_p)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\'"));
|
||||
}
|
||||
|
||||
ch = lit_utf8_read_next (&parser_ctx_p->input_curr_p);
|
||||
|
||||
if (ch == LIT_CHAR_LOWERCASE_B)
|
||||
{
|
||||
ch = LIT_CHAR_BS;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_F)
|
||||
{
|
||||
ch = LIT_CHAR_FF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_N)
|
||||
{
|
||||
ch = LIT_CHAR_LF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_T)
|
||||
{
|
||||
ch = LIT_CHAR_TAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_R)
|
||||
{
|
||||
ch = LIT_CHAR_CR;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_V)
|
||||
{
|
||||
ch = LIT_CHAR_VTAB;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_C)
|
||||
{
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p)
|
||||
{
|
||||
ch = *parser_ctx_p->input_curr_p;
|
||||
|
||||
if ((ch >= LIT_CHAR_ASCII_UPPERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_UPPERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_ASCII_LOWERCASE_LETTERS_BEGIN && ch <= LIT_CHAR_ASCII_LOWERCASE_LETTERS_END)
|
||||
|| (ch >= LIT_CHAR_0 && ch <= LIT_CHAR_9))
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.10 (Point 3) */
|
||||
ch = (ch % 32);
|
||||
parser_ctx_p->input_curr_p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = LIT_CHAR_LOWERCASE_C;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_X && re_hex_lookup (parser_ctx_p, 2))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 2, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\x'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 2;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_U && re_hex_lookup (parser_ctx_p, 4))
|
||||
{
|
||||
ecma_char_t code_unit;
|
||||
|
||||
if (!lit_read_code_unit_from_hex (parser_ctx_p->input_curr_p, 4, &code_unit))
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, end of string after '\\u'"));
|
||||
}
|
||||
|
||||
parser_ctx_p->input_curr_p += 4;
|
||||
if (parser_ctx_p->input_curr_p < parser_ctx_p->input_end_p
|
||||
&& is_range == false
|
||||
&& lit_utf8_peek_next (parser_ctx_p->input_curr_p) == LIT_CHAR_MINUS)
|
||||
{
|
||||
start = code_unit;
|
||||
continue;
|
||||
}
|
||||
|
||||
ch = code_unit;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_BEGIN, LIT_CHAR_ASCII_DIGITS_END);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_D)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_ASCII_DIGITS_BEGIN - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_ASCII_DIGITS_END + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_TAB, LIT_CHAR_CR);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_SP, LIT_CHAR_SP);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_NBSP, LIT_CHAR_NBSP);
|
||||
append_char_class (re_ctx_p, 0x1680UL, 0x1680UL); /* Ogham Space Mark */
|
||||
append_char_class (re_ctx_p, 0x180EUL, 0x180EUL); /* Mongolian Vowel Separator */
|
||||
append_char_class (re_ctx_p, 0x2000UL, 0x200AUL); /* En Quad - Hair Space */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_LS, LIT_CHAR_PS);
|
||||
append_char_class (re_ctx_p, 0x202FUL, 0x202FUL); /* Narrow No-Break Space */
|
||||
append_char_class (re_ctx_p, 0x205FUL, 0x205FUL); /* Medium Mathematical Space */
|
||||
append_char_class (re_ctx_p, 0x3000UL, 0x3000UL); /* Ideographic Space */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_BOM, LIT_CHAR_BOM);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_S)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_TAB - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_CR + 1, LIT_CHAR_SP - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_SP + 1, LIT_CHAR_NBSP - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_NBSP + 1, 0x167FUL);
|
||||
append_char_class (re_ctx_p, 0x1681UL, 0x180DUL);
|
||||
append_char_class (re_ctx_p, 0x180FUL, 0x1FFFUL);
|
||||
append_char_class (re_ctx_p, 0x200BUL, LIT_CHAR_LS - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_PS + 1, 0x202EUL);
|
||||
append_char_class (re_ctx_p, 0x2030UL, 0x205EUL);
|
||||
append_char_class (re_ctx_p, 0x2060UL, 0x2FFFUL);
|
||||
append_char_class (re_ctx_p, 0x3001UL, LIT_CHAR_BOM - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_BOM + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_LOWERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_0, LIT_CHAR_9);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_A, LIT_CHAR_UPPERCASE_Z);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE, LIT_CHAR_UNDERSCORE);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_A, LIT_CHAR_LOWERCASE_Z);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (ch == LIT_CHAR_UPPERCASE_W)
|
||||
{
|
||||
/* See ECMA-262 v5, 15.10.2.12 */
|
||||
append_char_class (re_ctx_p, LIT_CHAR_NULL, LIT_CHAR_0 - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_9 + 1, LIT_CHAR_UPPERCASE_A - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_UPPERCASE_Z + 1, LIT_CHAR_UNDERSCORE - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_UNDERSCORE + 1, LIT_CHAR_LOWERCASE_A - 1);
|
||||
append_char_class (re_ctx_p, LIT_CHAR_LOWERCASE_Z + 1, LIT_UTF16_CODE_UNIT_MAX);
|
||||
ch = LIT_CHAR_UNDEF;
|
||||
}
|
||||
else if (lit_char_is_octal_digit ((ecma_char_t) ch)
|
||||
&& ch != LIT_CHAR_0)
|
||||
{
|
||||
lit_utf8_decr (&parser_ctx_p->input_curr_p);
|
||||
ch = (ecma_char_t) re_parse_octal (parser_ctx_p);
|
||||
}
|
||||
} /* ch == LIT_CHAR_BACKSLASH */
|
||||
|
||||
if (start != LIT_CHAR_UNDEF)
|
||||
{
|
||||
if (is_range)
|
||||
{
|
||||
if (start > ch)
|
||||
{
|
||||
return ecma_raise_syntax_error (ECMA_ERR_MSG ("invalid character class, wrong order"));
|
||||
}
|
||||
else
|
||||
{
|
||||
append_char_class (re_ctx_p, start, ch);
|
||||
start = LIT_CHAR_UNDEF;
|
||||
is_range = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
append_char_class (re_ctx_p, start, start);
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
start = ch;
|
||||
}
|
||||
}
|
||||
while (token_type == RE_TOK_START_CHAR_CLASS || token_type == RE_TOK_START_INV_CHAR_CLASS);
|
||||
|
||||
return re_parse_iterator (parser_ctx_p, out_token_p);
|
||||
} /* re_parse_char_class */
|
||||
|
||||
/**
|
||||
* Read the input pattern and parse the next token for the RegExp compiler
|
||||
*
|
||||
@ -730,12 +472,12 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
}
|
||||
else
|
||||
{
|
||||
if (parser_ctx_p->num_of_groups == -1)
|
||||
if (parser_ctx_p->groups_count == -1)
|
||||
{
|
||||
re_count_num_of_groups (parser_ctx_p);
|
||||
}
|
||||
|
||||
if (parser_ctx_p->num_of_groups)
|
||||
if (parser_ctx_p->groups_count)
|
||||
{
|
||||
parser_ctx_p->input_curr_p--;
|
||||
uint32_t number = 0;
|
||||
@ -765,7 +507,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
|
||||
}
|
||||
while (true);
|
||||
|
||||
if ((int) number <= parser_ctx_p->num_of_groups)
|
||||
if ((int) number <= parser_ctx_p->groups_count)
|
||||
{
|
||||
out_token_p->type = RE_TOK_BACKREFERENCE;
|
||||
}
|
||||
|
||||
@ -94,18 +94,14 @@ typedef struct
|
||||
const lit_utf8_byte_t *input_start_p; /**< start of input pattern */
|
||||
const lit_utf8_byte_t *input_curr_p; /**< current position in input pattern */
|
||||
const lit_utf8_byte_t *input_end_p; /**< end of input pattern */
|
||||
int num_of_groups; /**< number of groups */
|
||||
uint32_t num_of_classes; /**< number of character classes */
|
||||
int groups_count; /**< number of groups */
|
||||
uint32_t classes_count; /**< number of character classes */
|
||||
} re_parser_ctx_t;
|
||||
|
||||
typedef void (*re_char_class_callback) (void *re_ctx_p, ecma_char_t start, ecma_char_t end);
|
||||
|
||||
ecma_value_t
|
||||
re_parse_char_class (re_parser_ctx_t *parser_ctx_p, re_char_class_callback append_char_class, void *re_ctx_p,
|
||||
re_token_t *out_token_p);
|
||||
|
||||
ecma_value_t
|
||||
re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p);
|
||||
bool re_hex_lookup (re_parser_ctx_t *parser_ctx_p, uint32_t lookup);
|
||||
uint32_t re_parse_octal (re_parser_ctx_t *parser_ctx_p);
|
||||
ecma_value_t re_parse_iterator (re_parser_ctx_t *parser_ctx_p, re_token_t *re_token_p);
|
||||
ecma_value_t re_parse_next_token (re_parser_ctx_t *parser_ctx_p, re_token_t *out_token_p);
|
||||
|
||||
/**
|
||||
* @}
|
||||
|
||||
20
tests/jerry/es2015/regexp-lastindex.js
Normal file
20
tests/jerry/es2015/regexp-lastindex.js
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
var t = new RegExp ("abc","g");
|
||||
t.lastIndex = -12;
|
||||
result = t.exec("abc abc");
|
||||
assert(result[0] === "abc");
|
||||
assert(result.index === 0);
|
||||
assert(t.lastIndex === 3);
|
||||
19
tests/jerry/es5.1/regexp-lastindex.js
Normal file
19
tests/jerry/es5.1/regexp-lastindex.js
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
var t = new RegExp ("abc","g");
|
||||
t.lastIndex = -12;
|
||||
result = t.exec("abc abc");
|
||||
assert(!result);
|
||||
assert(t.lastIndex === 0);
|
||||
@ -167,9 +167,3 @@ t.lastIndex = "2"
|
||||
var result = t.exec("abc abc");
|
||||
assert(result[0] === "abc");
|
||||
assert(result.index === 6);
|
||||
|
||||
t = new RegExp ("abc","g");
|
||||
t.lastIndex = -12;
|
||||
result = t.exec("abc abc");
|
||||
assert(!result);
|
||||
assert(t.lastIndex === 0);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user