jerryscript/jerry-core/ecma/operations/ecma-regexp-object.c

/* Copyright JS Foundation and other contributors, http://js.foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "ecma-alloc.h"
#include "ecma-array-object.h"
#include "ecma-builtin-helpers.h"
#include "ecma-exceptions.h"
#include "ecma-gc.h"
#include "ecma-globals.h"
#include "ecma-objects.h"
#include "ecma-regexp-object.h"
#include "ecma-try-catch-macro.h"
#include "jcontext.h"
#include "jrt-libc-includes.h"
#include "lit-char-helpers.h"
#include "re-compiler.h"

#if ENABLED (JERRY_BUILTIN_REGEXP)

#define ECMA_BUILTINS_INTERNAL
#include "ecma-builtins-internal.h"

/** \addtogroup ecma ECMA
 * @{
 *
 * \addtogroup ecmaregexpobject ECMA RegExp object related routines
 * @{
 */

/**
 * Index of the global capturing group
 */
#define RE_GLOBAL_CAPTURE 0

/**
 * Check if a RegExp opcode is a capture group or not
 */
#define RE_IS_CAPTURE_GROUP(x) (((x) < RE_OP_NON_CAPTURE_GROUP_START) ? 1 : 0)

/**
 * Parse RegExp flags (global, ignoreCase, multiline)
 *
 * See also: ECMA-262 v5, 15.10.4.1
 *
 * @return empty ecma value - if parsed successfully
 *         error ecma value - otherwise
 *
 *         Returned value must be freed with ecma_free_value
 */
ecma_value_t
ecma_regexp_parse_flags (ecma_string_t *flags_str_p, /**< Input string with flags */
                         uint16_t *flags_p) /**< [out] parsed flag bits */
{
  ecma_value_t ret_value = ECMA_VALUE_EMPTY;

  ECMA_STRING_TO_UTF8_STRING (flags_str_p, flags_start_p, flags_start_size);

  const lit_utf8_byte_t *flags_str_curr_p = flags_start_p;
  const lit_utf8_byte_t *flags_str_end_p = flags_start_p + flags_start_size;

  while (flags_str_curr_p < flags_str_end_p
         && ecma_is_value_empty (ret_value))
  {
    switch (*flags_str_curr_p++)
    {
      case 'g':
      {
        if (*flags_p & RE_FLAG_GLOBAL)
        {
          ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags."));
        }
        *flags_p |= RE_FLAG_GLOBAL;
        break;
      }
      case 'i':
      {
        if (*flags_p & RE_FLAG_IGNORE_CASE)
        {
          ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags."));
        }
        *flags_p |= RE_FLAG_IGNORE_CASE;
        break;
      }
      case 'm':
      {
        if (*flags_p & RE_FLAG_MULTILINE)
        {
          ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags."));
        }
        *flags_p |= RE_FLAG_MULTILINE;
        break;
      }
      default:
      {
        ret_value = ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp flags."));
        break;
      }
    }
  }

  ECMA_FINALIZE_UTF8_STRING (flags_start_p, flags_start_size);

  return ret_value;
} /* ecma_regexp_parse_flags */

/*
 * Create the properties of a RegExp instance.
 */
static void
ecma_regexp_create_props (ecma_object_t *re_object_p) /**< RegExp object */
{
#if !ENABLED (JERRY_ES2015)
  ecma_create_named_data_property (re_object_p,
                                   ecma_get_magic_string (LIT_MAGIC_STRING_SOURCE),
                                   ECMA_PROPERTY_FIXED,
                                   NULL);
  ecma_create_named_data_property (re_object_p,
                                   ecma_get_magic_string (LIT_MAGIC_STRING_GLOBAL),
                                   ECMA_PROPERTY_FIXED,
                                   NULL);
  ecma_create_named_data_property (re_object_p,
                                   ecma_get_magic_string (LIT_MAGIC_STRING_IGNORECASE_UL),
                                   ECMA_PROPERTY_FIXED,
                                   NULL);
  ecma_create_named_data_property (re_object_p,
                                   ecma_get_magic_string (LIT_MAGIC_STRING_MULTILINE),
                                   ECMA_PROPERTY_FIXED,
                                   NULL);
#endif /* !ENABLED (JERRY_ES2015) */
  ecma_create_named_data_property (re_object_p,
                                   ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
                                   ECMA_PROPERTY_FLAG_WRITABLE,
                                   NULL);
} /* ecma_regexp_create_props */

/*
 * Helper function to assign a value to a property
 */
static void
ecma_regexp_helper_assign_prop (ecma_object_t *re_object_p, /**< RegExp object */
                                lit_magic_string_id_t prop_id, /**< property name ide */
                                ecma_value_t value) /**< value */
{
  ecma_property_ref_t property_ref;
  ecma_op_object_get_own_property (re_object_p,
                                   ecma_get_magic_string (prop_id),
                                   &property_ref,
                                   ECMA_PROPERTY_GET_VALUE);
  ecma_named_data_property_assign_value (re_object_p,
                                         property_ref.value_p,
                                         value);
} /* ecma_regexp_helper_assign_prop */

/**
 * Initializes the properties of a RegExp instance.
 */
void
ecma_regexp_initialize_props (ecma_object_t *re_object_p, /**< RegExp object */
                              ecma_string_t *source_p, /**< source string */
                              uint16_t flags) /**< flags */
{
#if !ENABLED (JERRY_ES2015)
  ecma_regexp_helper_assign_prop (re_object_p,
                                  LIT_MAGIC_STRING_SOURCE,
                                  ecma_make_string_value (source_p));

  ecma_regexp_helper_assign_prop (re_object_p,
                                  LIT_MAGIC_STRING_GLOBAL,
                                  ecma_make_boolean_value (flags & RE_FLAG_GLOBAL));

  ecma_regexp_helper_assign_prop (re_object_p,
                                  LIT_MAGIC_STRING_IGNORECASE_UL,
                                  ecma_make_boolean_value (flags & RE_FLAG_IGNORE_CASE));

  ecma_regexp_helper_assign_prop (re_object_p,
                                  LIT_MAGIC_STRING_MULTILINE,
                                  ecma_make_boolean_value (flags & RE_FLAG_MULTILINE));
#else /* ENABLED (JERRY_ES2015) */
  JERRY_UNUSED (source_p);
  JERRY_UNUSED (flags);
#endif /* !ENABLED (JERRY_ES2015) */

  ecma_regexp_helper_assign_prop (re_object_p,
                                  LIT_MAGIC_STRING_LASTINDEX_UL,
                                  ecma_make_uint32_value (0));
} /* ecma_regexp_initialize_props */

/**
 * RegExp object creation operation.
 *
 * See also: ECMA-262 v5, 15.10.4.1
 *
 * @return constructed RegExp object
 *         Returned value must be freed with ecma_free_value
 */
ecma_value_t
ecma_op_create_regexp_object_from_bytecode (re_compiled_code_t *bytecode_p) /**< RegExp bytecode */
{
  JERRY_ASSERT (bytecode_p != NULL);

  ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE);

  ecma_object_t *object_p = ecma_create_object (re_prototype_obj_p,
                                                sizeof (ecma_extended_object_t),
                                                ECMA_OBJECT_TYPE_CLASS);

  ecma_extended_object_t *ext_object_p = (ecma_extended_object_t *) object_p;

  /* Set the internal [[Class]] property */
  ext_object_p->u.class_prop.class_id = LIT_MAGIC_STRING_REGEXP_UL;

  /* Set bytecode internal property. */
  ECMA_SET_INTERNAL_VALUE_POINTER (ext_object_p->u.class_prop.u.value, bytecode_p);
  ecma_bytecode_ref ((ecma_compiled_code_t *) bytecode_p);

  /* Create and initialize RegExp object properties */
  ecma_regexp_create_props (object_p);
  ecma_regexp_initialize_props (object_p,
                                ecma_get_string_from_value (bytecode_p->source),
                                bytecode_p->header.status_flags);

  return ecma_make_object_value (object_p);
} /* ecma_op_create_regexp_object_from_bytecode */

/**
 * RegExp object creation operation.
 *
 * See also: ECMA-262 v5, 15.10.4.1
 *
 * @return constructed RegExp object - if pattern and flags were parsed successfully
 *         error ecma value          - otherwise
 *
 *         Returned value must be freed with ecma_free_value
 */
ecma_value_t
ecma_op_create_regexp_object (ecma_string_t *pattern_p, /**< input pattern */
                              uint16_t flags) /**< flags */
{
  JERRY_ASSERT (pattern_p != NULL);

  ecma_object_t *re_prototype_obj_p = ecma_builtin_get (ECMA_BUILTIN_ID_REGEXP_PROTOTYPE);

  ecma_object_t *object_p = ecma_create_object (re_prototype_obj_p,
                                                sizeof (ecma_extended_object_t),
                                                ECMA_OBJECT_TYPE_CLASS);
  ecma_extended_object_t *ext_object_p = (ecma_extended_object_t *) object_p;
  ext_object_p->u.class_prop.class_id = LIT_MAGIC_STRING_UNDEFINED;

  ecma_regexp_create_props (object_p);
  ecma_regexp_initialize_props (object_p, pattern_p, flags);

  /* Compile bytecode. */
  const re_compiled_code_t *bc_p = NULL;
  ecma_value_t ret_value = re_compile_bytecode (&bc_p, pattern_p, flags);
  if (ECMA_IS_VALUE_ERROR (ret_value))
  {
    ecma_deref_object (object_p);
    return ret_value;
  }

  JERRY_ASSERT (ecma_is_value_empty (ret_value));

  /* Set [[Class]] and bytecode internal properties. */
  ext_object_p->u.class_prop.class_id = LIT_MAGIC_STRING_REGEXP_UL;
  ECMA_SET_INTERNAL_VALUE_POINTER (ext_object_p->u.class_prop.u.value, bc_p);

  return ecma_make_object_value (object_p);
} /* ecma_op_create_regexp_object */

/**
 * Canonicalize a character
 *
 * @return ecma_char_t canonicalized character
 */
ecma_char_t
ecma_regexp_canonicalize_char (ecma_char_t ch) /**< character */
{
  if (JERRY_LIKELY (ch <= LIT_UTF8_1_BYTE_CODE_POINT_MAX))
  {
    if (ch >= LIT_CHAR_LOWERCASE_A && ch <= LIT_CHAR_LOWERCASE_Z)
    {
      return (ecma_char_t) (ch - (LIT_CHAR_LOWERCASE_A - LIT_CHAR_UPPERCASE_A));
    }

    return ch;
  }

  ecma_char_t u[LIT_MAXIMUM_OTHER_CASE_LENGTH];
  const ecma_length_t size = lit_char_to_upper_case (ch, u, LIT_MAXIMUM_OTHER_CASE_LENGTH);

  /* 3. */
  if (size != 1)
  {
    return ch;
  }
  /* 4. */
  const ecma_char_t cu = u[0];
  /* 5. */
  if (cu >= 128)
  {
    /* 6. */
    return cu;
  }

  return ch;
} /* ecma_regexp_canonicalize_char */

/**
 * RegExp Canonicalize abstract operation
 *
 * See also: ECMA-262 v5, 15.10.2.8
 *
 * @return ecma_char_t canonicalized character
 */
inline ecma_char_t JERRY_ATTR_ALWAYS_INLINE
ecma_regexp_canonicalize (ecma_char_t ch, /**< character */
                          bool is_ignorecase) /**< IgnoreCase flag */
{
  if (is_ignorecase)
  {
    return ecma_regexp_canonicalize_char (ch);
  }

  return ch;
} /* ecma_regexp_canonicalize */

/**
 * Recursive function for RegExp matching.
 *
 * See also:
 *          ECMA-262 v5, 15.10.2.1
 *
 * @return true  - if matched
 *         false - otherwise
 */
static const lit_utf8_byte_t *
ecma_regexp_match (ecma_regexp_ctx_t *re_ctx_p, /**< RegExp matcher context */
                   const uint8_t *bc_p, /**< pointer to the current RegExp bytecode */
                   const lit_utf8_byte_t *str_curr_p) /**< input string pointer */
{
#if (JERRY_STACK_LIMIT != 0)
  if (JERRY_UNLIKELY (ecma_get_current_stack_usage () > CONFIG_MEM_STACK_LIMIT))
  {
    return ECMA_RE_OUT_OF_STACK;
  }
#endif /* JERRY_STACK_LIMIT != 0 */

  while (true)
  {
    re_opcode_t op = re_get_opcode (&bc_p);

    switch (op)
    {
      case RE_OP_MATCH:
      {
        JERRY_TRACE_MSG ("Execute RE_OP_MATCH: match\n");
        return str_curr_p;
      }
      case RE_OP_CHAR:
      {
        if (str_curr_p >= re_ctx_p->input_end_p)
        {
          return NULL; /* fail */
        }

        const bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        ecma_char_t ch1 = (ecma_char_t) re_get_char (&bc_p); /* Already canonicalized. */
        ecma_char_t ch2 = ecma_regexp_canonicalize (lit_utf8_read_next (&str_curr_p), is_ignorecase);
        JERRY_TRACE_MSG ("Character matching %d to %d: ", ch1, ch2);

        if (ch1 != ch2)
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        JERRY_TRACE_MSG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_PERIOD:
      {
        if (str_curr_p >= re_ctx_p->input_end_p)
        {
          return NULL; /* fail */
        }

        const ecma_char_t ch = lit_utf8_read_next (&str_curr_p);
        JERRY_TRACE_MSG ("Period matching '.' to %u: ", (unsigned int) ch);

        if (lit_char_is_line_terminator (ch))
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        JERRY_TRACE_MSG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_ASSERT_START:
      {
        JERRY_TRACE_MSG ("Execute RE_OP_ASSERT_START: ");

        if (str_curr_p <= re_ctx_p->input_start_p)
        {
          JERRY_TRACE_MSG ("match\n");
          break; /* tail merge */
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_peek_prev (str_curr_p)))
        {
          JERRY_TRACE_MSG ("match\n");
          break; /* tail merge */
        }

        JERRY_TRACE_MSG ("fail\n");
        return NULL; /* fail */
      }
      case RE_OP_ASSERT_END:
      {
        JERRY_TRACE_MSG ("Execute RE_OP_ASSERT_END: ");

        if (str_curr_p >= re_ctx_p->input_end_p)
        {
          JERRY_TRACE_MSG ("match\n");
          break; /* tail merge */
        }

        if (!(re_ctx_p->flags & RE_FLAG_MULTILINE))
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        if (lit_char_is_line_terminator (lit_utf8_peek_next (str_curr_p)))
        {
          JERRY_TRACE_MSG ("match\n");
          break; /* tail merge */
        }

        JERRY_TRACE_MSG ("fail\n");
        return NULL; /* fail */
      }
      case RE_OP_ASSERT_WORD_BOUNDARY:
      case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
      {
        const bool is_wordchar_left = ((str_curr_p > re_ctx_p->input_start_p)
                                       && lit_char_is_word_char (lit_utf8_peek_prev (str_curr_p)));

        const bool is_wordchar_right = ((str_curr_p < re_ctx_p->input_end_p)
                                        && lit_char_is_word_char (lit_utf8_peek_next (str_curr_p)));

        if (op == RE_OP_ASSERT_WORD_BOUNDARY)
        {
          JERRY_TRACE_MSG ("Execute RE_OP_ASSERT_WORD_BOUNDARY: ");
          if (is_wordchar_left == is_wordchar_right)
          {
            JERRY_TRACE_MSG ("fail\n");
            return NULL; /* fail */
          }
        }
        else
        {
          JERRY_ASSERT (op == RE_OP_ASSERT_NOT_WORD_BOUNDARY);
          JERRY_TRACE_MSG ("Execute RE_OP_ASSERT_NOT_WORD_BOUNDARY: ");

          if (is_wordchar_left != is_wordchar_right)
          {
            JERRY_TRACE_MSG ("fail\n");
            return NULL; /* fail */
          }
        }

        JERRY_TRACE_MSG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_LOOKAHEAD_POS:
      case RE_OP_LOOKAHEAD_NEG:
      {
        const lit_utf8_byte_t *matched_p = NULL;
        const size_t captures_size = re_ctx_p->captures_count * sizeof (ecma_regexp_capture_t);
        ecma_regexp_capture_t *saved_captures_p = (ecma_regexp_capture_t *) jmem_heap_alloc_block (captures_size);
        memcpy (saved_captures_p, re_ctx_p->captures_p, captures_size);

        do
        {
          const uint32_t offset = re_get_value (&bc_p);

          if (matched_p == NULL)
          {
            matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

            if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
            {
              jmem_heap_free_block (saved_captures_p, captures_size);
              return matched_p;
            }
          }
          bc_p += offset;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);

        JERRY_TRACE_MSG ("Execute RE_OP_LOOKAHEAD_POS/NEG: ");
        if ((op == RE_OP_LOOKAHEAD_POS && matched_p != NULL)
            || (op == RE_OP_LOOKAHEAD_NEG && matched_p == NULL))
        {
          JERRY_TRACE_MSG ("match\n");
          matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);
        }
        else
        {
          JERRY_TRACE_MSG ("fail\n");
          matched_p = NULL; /* fail */
        }

        if (matched_p == NULL)
        {
          /* restore saved */
          memcpy (re_ctx_p->captures_p, saved_captures_p, captures_size);
        }

        jmem_heap_free_block (saved_captures_p, captures_size);
        return matched_p;
      }
      case RE_OP_CHAR_CLASS:
      case RE_OP_INV_CHAR_CLASS:
      {
        JERRY_TRACE_MSG ("Execute RE_OP_CHAR_CLASS/RE_OP_INV_CHAR_CLASS, ");
        if (str_curr_p >= re_ctx_p->input_end_p)
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        const bool is_ignorecase = re_ctx_p->flags & RE_FLAG_IGNORE_CASE;
        const ecma_char_t curr_ch = ecma_regexp_canonicalize (lit_utf8_read_next (&str_curr_p), is_ignorecase);

        uint32_t range_count = re_get_value (&bc_p);
        bool is_match = false;

        while (range_count-- > 0)
        {
          const ecma_char_t ch1 = re_get_char (&bc_p);
          if (curr_ch < ch1)
          {
            bc_p += sizeof (ecma_char_t);
            continue;
          }

          const ecma_char_t ch2 = re_get_char (&bc_p);
          is_match = (curr_ch <= ch2);
          if (is_match)
          {
            /* Skip the remaining ranges in the bytecode. */
            bc_p += range_count * 2 * sizeof (ecma_char_t);
            break;
          }
        }

        JERRY_ASSERT (op == RE_OP_CHAR_CLASS || op == RE_OP_INV_CHAR_CLASS);

        if ((op == RE_OP_CHAR_CLASS) != is_match)
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        JERRY_TRACE_MSG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_BACKREFERENCE:
      {
        const uint32_t backref_idx = re_get_value (&bc_p);
        JERRY_TRACE_MSG ("Execute RE_OP_BACKREFERENCE (idx: %u): ", (unsigned int) backref_idx);
        JERRY_ASSERT (backref_idx >= 1 && backref_idx < re_ctx_p->captures_count);
        const ecma_regexp_capture_t capture = re_ctx_p->captures_p[backref_idx];

        if (capture.begin_p == NULL || capture.end_p == NULL)
        {
          JERRY_TRACE_MSG ("match\n");
          break; /* capture is 'undefined', always matches! */
        }

        const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture.end_p - capture.begin_p);

        if (str_curr_p + capture_size > re_ctx_p->input_end_p)
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        if (memcmp (str_curr_p, capture.begin_p, capture_size))
        {
          JERRY_TRACE_MSG ("fail\n");
          return NULL; /* fail */
        }

        str_curr_p += capture_size;
        JERRY_TRACE_MSG ("match\n");
        break; /* tail merge */
      }
      case RE_OP_SAVE_AT_START:
      {
        JERRY_TRACE_MSG ("Execute RE_OP_SAVE_AT_START\n");
        re_ctx_p->captures_p[RE_GLOBAL_CAPTURE].begin_p = str_curr_p;

        do
        {
          const uint32_t offset = re_get_value (&bc_p);
          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }

          bc_p += offset;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);
        bc_p -= sizeof (uint8_t);

        return NULL; /* fail */
      }
      case RE_OP_SAVE_AND_MATCH:
      {
        JERRY_TRACE_MSG ("End of pattern is reached: match\n");
        re_ctx_p->captures_p[RE_GLOBAL_CAPTURE].end_p = str_curr_p;
        return str_curr_p; /* match */
      }
      case RE_OP_ALTERNATIVE:
      {
        /*
        *  Alternatives should be jumped over, when an alternative opcode appears.
        */
        uint32_t offset = re_get_value (&bc_p);
        JERRY_TRACE_MSG ("Execute RE_OP_ALTERNATIVE");
        bc_p += offset;

        while (*bc_p == RE_OP_ALTERNATIVE)
        {
          JERRY_TRACE_MSG (", jump: %u", (unsigned int) offset);
          bc_p++;
          offset = re_get_value (&bc_p);
          bc_p += offset;
        }

        JERRY_TRACE_MSG ("\n");
        break; /* tail merge */
      }
      case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
      {
        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first, if zero iteration is allowed.
        */
        const lit_utf8_byte_t *old_begin_p = NULL;
        const uint8_t *const bc_start_p = bc_p; /* save the bytecode start position of the group start */
        const uint32_t start_idx = re_get_value (&bc_p);
        const uint32_t offset = re_get_value (&bc_p);

        uint32_t *iterator_p;
        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx < re_ctx_p->captures_count);
          re_ctx_p->captures_p[start_idx].begin_p = str_curr_p;
          iterator_p = &(re_ctx_p->iterations_p[start_idx - 1]);
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->non_captures_count);
          iterator_p = &(re_ctx_p->iterations_p[start_idx + re_ctx_p->captures_count - 1]);
        }
        *iterator_p = 0;

        /* Jump all over to the end of the END opcode. */
        bc_p += offset;

        /* Try to match after the close paren if zero is allowed */
        const lit_utf8_byte_t *matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

        if (matched_p != NULL)
        {
          return str_curr_p; /* match */
        }

        if (RE_IS_CAPTURE_GROUP (op))
        {
          re_ctx_p->captures_p[start_idx].begin_p = old_begin_p;
        }

        bc_p = bc_start_p;
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GROUP_START:
      case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
      case RE_OP_NON_CAPTURE_GROUP_START:
      case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
      {
        const uint8_t *bc_end_p = NULL;
        const uint32_t start_idx = re_get_value (&bc_p);

        if (op != RE_OP_CAPTURE_GROUP_START
            && op != RE_OP_NON_CAPTURE_GROUP_START)
        {
          const uint32_t offset = re_get_value (&bc_p);
          bc_end_p = bc_p + offset;
        }

        const lit_utf8_byte_t **group_begin_p;
        uint32_t *iterator_p;
        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (start_idx < re_ctx_p->captures_count);
          group_begin_p = &(re_ctx_p->captures_p[start_idx].begin_p);
          iterator_p = &(re_ctx_p->iterations_p[start_idx - 1]);
        }
        else
        {
          JERRY_ASSERT (start_idx < re_ctx_p->non_captures_count);
          group_begin_p = &(re_ctx_p->non_captures_p[start_idx].str_p);
          iterator_p = &(re_ctx_p->iterations_p[start_idx + re_ctx_p->captures_count - 1]);
        }

        const lit_utf8_byte_t *const old_begin_p = *group_begin_p;
        const uint32_t old_iter_count = *iterator_p;
        *group_begin_p = str_curr_p;
        *iterator_p = 0;

        do
        {
          const uint32_t offset = re_get_value (&bc_p);
          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }

          bc_p += offset;
        }
        while (re_get_opcode (&bc_p) == RE_OP_ALTERNATIVE);

        bc_p -= sizeof (uint8_t);
        *iterator_p = old_iter_count;

        /* Try to match after the close paren if zero is allowed. */
        if (op == RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START
            || op == RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START)
        {
          JERRY_ASSERT (bc_end_p);
          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_end_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }
        }

        *group_begin_p = old_begin_p;
        return NULL; /* fail */
      }
      case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
      {
        /*
        *  On non-greedy iterations we have to execute the bytecode
        *  after the group first. Try to iterate only if it fails.
        */
        const uint8_t *const bc_start_p = bc_p; /* save the bytecode start position of the group end */
        const uint32_t end_idx = re_get_value (&bc_p);
        const uint32_t min = re_get_value (&bc_p);
        const uint32_t max = re_get_value (&bc_p);
        re_get_value (&bc_p); /* start offset */

        const lit_utf8_byte_t **group_end_p;
        uint32_t *iterator_p;
        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx < re_ctx_p->captures_count);
          group_end_p = &(re_ctx_p->captures_p[end_idx].end_p);
          iterator_p = &(re_ctx_p->iterations_p[end_idx - 1]);
        }
        else
        {
          JERRY_ASSERT (end_idx < re_ctx_p->non_captures_count);
          group_end_p = &(re_ctx_p->non_captures_p[end_idx].str_p);
          iterator_p = &(re_ctx_p->iterations_p[end_idx + re_ctx_p->captures_count - 1]);
        }

        (*iterator_p)++;

        if (*iterator_p >= min && *iterator_p <= max)
        {
          const lit_utf8_byte_t *const old_end_p = *group_end_p;
          *group_end_p = str_curr_p;

          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }

          *group_end_p = old_end_p;
        }
        (*iterator_p)--;
        bc_p = bc_start_p;

        /* Non-greedy fails, try to iterate. */
        /* FALLTHRU */
      }
      case RE_OP_CAPTURE_GREEDY_GROUP_END:
      case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
      {
        const uint32_t end_idx = re_get_value (&bc_p);
        const uint32_t min = re_get_value (&bc_p);
        const uint32_t max = re_get_value (&bc_p);
        uint32_t offset = re_get_value (&bc_p);

        const lit_utf8_byte_t **group_begin_p;
        const lit_utf8_byte_t **group_end_p;
        uint32_t *iterator_p;

        if (RE_IS_CAPTURE_GROUP (op))
        {
          JERRY_ASSERT (end_idx < re_ctx_p->captures_count);
          group_begin_p = &(re_ctx_p->captures_p[end_idx].begin_p);
          group_end_p = &(re_ctx_p->captures_p[end_idx].end_p);
          iterator_p = &(re_ctx_p->iterations_p[end_idx - 1]);
        }
        else
        {
          JERRY_ASSERT (end_idx <= re_ctx_p->non_captures_count);
          group_begin_p = &(re_ctx_p->non_captures_p[end_idx].str_p);
          group_end_p = &(re_ctx_p->non_captures_p[end_idx].str_p);
          iterator_p = &(re_ctx_p->iterations_p[end_idx + re_ctx_p->captures_count - 1]);
        }

        /* Check the empty iteration if the minimum number of iterations is reached. */
        if (*iterator_p >= min && str_curr_p == *group_begin_p)
        {
          return NULL; /* fail */
        }

        (*iterator_p)++;

        const uint8_t *const bc_start_p = bc_p; /* Save the bytecode end position of the END opcodes. */
        const lit_utf8_byte_t *const old_end_p = *group_end_p;
        *group_end_p = str_curr_p;

        if (*iterator_p < max)
        {
          bc_p -= offset;
          offset = re_get_value (&bc_p);

          const lit_utf8_byte_t *const old_begin_p = *group_begin_p;
          *group_begin_p = str_curr_p;

          const lit_utf8_byte_t *matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }

          /* Try to match alternatives if any. */
          bc_p += offset;
          while (*bc_p == RE_OP_ALTERNATIVE)
          {
            bc_p++; /* RE_OP_ALTERNATIVE */
            offset = re_get_value (&bc_p);

            *group_begin_p = str_curr_p;

            matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

            if (matched_p != NULL)
            {
              return matched_p; /* match */
            }

            bc_p += offset;
          }

          *group_begin_p = old_begin_p;
        }

        if (*iterator_p >= min && *iterator_p <= max)
        {
          /* Try to match the rest of the bytecode. */
          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_start_p, str_curr_p);

          if (matched_p != NULL)
          {
            return matched_p; /* match */
          }
        }

        /* restore if fails */
        *group_end_p = old_end_p;
        (*iterator_p)--;
        return NULL; /* fail */
      }
      case RE_OP_NON_GREEDY_ITERATOR:
      {
        const uint32_t min = re_get_value (&bc_p);
        const uint32_t max = re_get_value (&bc_p);

        const uint32_t offset = re_get_value (&bc_p);
        JERRY_TRACE_MSG ("Non-greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                         (unsigned long) min, (unsigned long) max, (long) offset);

        uint32_t iter_count = 0;
        while (iter_count <= max)
        {
          if (iter_count >= min)
          {
            const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p + offset, str_curr_p);

            if (matched_p != NULL)
            {
              return matched_p; /* match */
            }
          }

          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
          {
            return matched_p;
          }

          if (matched_p == NULL)
          {
            break;
          }

          str_curr_p = matched_p;
          iter_count++;
        }

        return NULL; /* fail */
      }
      default:
      {
        JERRY_ASSERT (op == RE_OP_GREEDY_ITERATOR);

        const uint32_t min = re_get_value (&bc_p);
        const uint32_t max = re_get_value (&bc_p);

        const uint32_t offset = re_get_value (&bc_p);
        JERRY_TRACE_MSG ("Greedy iterator, min=%lu, max=%lu, offset=%ld\n",
                         (unsigned long) min, (unsigned long) max, (long) offset);

        uint32_t iter_count = 0;
        while (iter_count < max)
        {
          const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p, str_curr_p);

          if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
          {
            return matched_p;
          }

          if (matched_p == NULL)
          {
            break;
          }

          str_curr_p = matched_p;
          iter_count++;
        }

        if (iter_count >= min)
        {
          while (true)
          {
            const lit_utf8_byte_t *const matched_p = ecma_regexp_match (re_ctx_p, bc_p + offset, str_curr_p);

            if (matched_p != NULL)
            {
              return matched_p; /* match */
            }

            if (iter_count == min)
            {
              break;
            }

            lit_utf8_read_prev (&str_curr_p);
            iter_count--;
          }
        }

        return NULL; /* fail */
      }
    }
  }
} /* ecma_regexp_match */

static ecma_value_t
ecma_regexp_create_result_object (ecma_regexp_ctx_t *re_ctx_p,
                                  ecma_string_t *input_string_p,
                                  uint32_t index)
{
  ecma_value_t result_array = ecma_op_create_array_object (0, 0, false);
  ecma_object_t *result_p = ecma_get_object_from_value (result_array);

  for (uint32_t i = 0; i < re_ctx_p->captures_count; i++)
  {
    const ecma_regexp_capture_t capture = re_ctx_p->captures_p[i];

    if (capture.begin_p != NULL && capture.end_p >= capture.begin_p)
    {
      const lit_utf8_size_t capture_size = (lit_utf8_size_t) (capture.end_p - capture.begin_p);
      ecma_string_t *const capture_str_p = ecma_new_ecma_string_from_utf8 (capture.begin_p, capture_size);
      const ecma_value_t capture_value = ecma_make_string_value (capture_str_p);
      ecma_builtin_helper_def_prop_by_index (result_p,
                                             i,
                                             capture_value,
                                             ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
      ecma_deref_ecma_string (capture_str_p);
    }
    else
    {
      ecma_builtin_helper_def_prop_by_index (result_p,
                                             i,
                                             ECMA_VALUE_UNDEFINED,
                                             ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);
    }
  }

  ecma_builtin_helper_def_prop (result_p,
                                ecma_get_magic_string (LIT_MAGIC_STRING_INDEX),
                                ecma_make_uint32_value (index),
                                ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);

  ecma_builtin_helper_def_prop (result_p,
                                ecma_get_magic_string (LIT_MAGIC_STRING_INPUT),
                                ecma_make_string_value (input_string_p),
                                ECMA_PROPERTY_CONFIGURABLE_ENUMERABLE_WRITABLE);

  return result_array;
} /* ecma_regexp_create_result_object */

/**
 * RegExp helper function to start the recursive matching algorithm
 * and create the result Array object
 *
 * See also:
 *          ECMA-262 v5, 15.10.6.2
 *
 * @return array object - if matched
 *         null         - otherwise
 *
 *         May raise error.
 *         Returned value must be freed with ecma_free_value
 */
ecma_value_t
ecma_regexp_exec_helper (ecma_value_t regexp_value, /**< RegExp object */
                         ecma_value_t input_string, /**< input string */
                         bool ignore_global) /**< ignore global flag */
{
  ecma_value_t ret_value = ECMA_VALUE_EMPTY;

  JERRY_ASSERT (ecma_is_value_object (regexp_value));
  JERRY_ASSERT (ecma_is_value_string (input_string));

  ecma_object_t *regexp_object_p = ecma_get_object_from_value (regexp_value);

  JERRY_ASSERT (ecma_object_class_is (regexp_object_p, LIT_MAGIC_STRING_REGEXP_UL));

  ecma_extended_object_t *ext_object_p = (ecma_extended_object_t *) regexp_object_p;
  re_compiled_code_t *bc_p = ECMA_GET_INTERNAL_VALUE_ANY_POINTER (re_compiled_code_t,
                                                                  ext_object_p->u.class_prop.u.value);

  ecma_regexp_ctx_t re_ctx;
  ecma_string_t *input_string_p = ecma_get_string_from_value (input_string);

  if (bc_p == NULL)
  {
#if ENABLED (JERRY_ES2015)
    return ecma_raise_type_error (ECMA_ERR_MSG ("Incompatible type"));
#else /* !ENABLED (JERRY_ES2015) */
    /* Missing bytecode means the RegExp object is the RegExp.prototype,
     * which will always result in an empty string match. */
    re_ctx.captures_count = 1;

    re_ctx.captures_p = jmem_heap_alloc_block (sizeof (ecma_regexp_capture_t));
    re_ctx.captures_p->begin_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING__EMPTY);
    re_ctx.captures_p->end_p = lit_get_magic_string_utf8 (LIT_MAGIC_STRING__EMPTY);

    ret_value = ecma_regexp_create_result_object (&re_ctx, input_string_p, 0);

    jmem_heap_free_block (re_ctx.captures_p, sizeof (ecma_regexp_capture_t));
    return ret_value;
#endif /* ENABLED (JERRY_ES2015) */
  }

  re_ctx.flags = bc_p->header.status_flags;

  if (ignore_global)
  {
    re_ctx.flags &= (uint16_t) ~RE_FLAG_GLOBAL;
  }

  lit_utf8_size_t input_size;
  lit_utf8_size_t input_length;
  uint8_t input_flags = ECMA_STRING_FLAG_IS_ASCII;
  const lit_utf8_byte_t *input_buffer_p = ecma_string_get_chars (input_string_p,
                                                                 &input_size,
                                                                 &input_length,
                                                                 NULL,
                                                                 &input_flags);

  const lit_utf8_byte_t *input_curr_p = input_buffer_p;
  uint32_t index = 0;
  if (re_ctx.flags & RE_FLAG_GLOBAL)
  {
    ecma_string_t *lastindex_str_p = ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL);
    ecma_value_t lastindex_value = ecma_op_object_get_own_data_prop (regexp_object_p, lastindex_str_p);

    ecma_number_t lastindex_num;
    ret_value = ecma_get_number (lastindex_value, &lastindex_num);
    ecma_free_value (lastindex_value);

    if (ECMA_IS_VALUE_ERROR (ret_value))
    {
      goto cleanup_string;
    }

    /* TODO: Replace with ToLength */
    if (lastindex_num < 0.0f)
    {
#if ENABLED (JERRY_ES2015)
      lastindex_num = 0.0f;
#else /* !ENABLED (JERRY_ES2015) */
      lastindex_num = input_length + 1;
#endif /* ENABLED (JERRY_ES2015) */
    }
    index = ecma_number_to_uint32 (lastindex_num);

    if (index > input_length)
    {
      ret_value = ecma_op_object_put (regexp_object_p,
                                      lastindex_str_p,
                                      ecma_make_integer_value (0),
                                      true);

      if (!ECMA_IS_VALUE_ERROR (ret_value))
      {
        JERRY_ASSERT (ecma_is_value_boolean (ret_value));
        /* lastIndex is out of bounds, the match should fail. */
        ret_value = ECMA_VALUE_NULL;
      }

      goto cleanup_string;
    }

    if (index > 0)
    {
      if (input_flags & ECMA_STRING_FLAG_IS_ASCII)
      {
        input_curr_p += index;
      }
      else
      {
        for (uint32_t i = 0; i < index; i++)
        {
          lit_utf8_incr (&input_curr_p);
        }
      }
    }
  }

  re_ctx.input_start_p = input_buffer_p;
  const lit_utf8_byte_t *input_end_p = re_ctx.input_start_p + input_size;
  re_ctx.input_end_p = input_end_p;

  JERRY_TRACE_MSG ("Exec with flags [global: %d, ignoreCase: %d, multiline: %d]\n",
                   re_ctx.flags & RE_FLAG_GLOBAL,
                   re_ctx.flags & RE_FLAG_IGNORE_CASE,
                   re_ctx.flags & RE_FLAG_MULTILINE);

  re_ctx.captures_count = bc_p->captures_count;
  re_ctx.captures_p = jmem_heap_alloc_block (re_ctx.captures_count * sizeof (ecma_regexp_capture_t));
  memset (re_ctx.captures_p, 0, re_ctx.captures_count * sizeof (ecma_regexp_capture_t));

  re_ctx.non_captures_count = bc_p->non_captures_count;
  re_ctx.non_captures_p = jmem_heap_alloc_block (re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t));
  memset (re_ctx.non_captures_p, 0, re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t));

  const uint32_t iters_length = re_ctx.captures_count + re_ctx.non_captures_count - 1;
  re_ctx.iterations_p = jmem_heap_alloc_block (iters_length * sizeof (uint32_t));
  memset (re_ctx.iterations_p, 0, iters_length * sizeof (uint32_t));

  /* 2. Try to match */
  uint8_t *bc_start_p = (uint8_t *) (bc_p + 1);
  const lit_utf8_byte_t *matched_p = NULL;

  JERRY_ASSERT (index <= input_length);
  while (true)
  {
    matched_p = ecma_regexp_match (&re_ctx, bc_start_p, input_curr_p);

    if (matched_p != NULL)
    {
      break;
    }

    index++;
    if (index > input_length)
    {
      if (re_ctx.flags & RE_FLAG_GLOBAL)
      {
        ecma_value_t put_result = ecma_op_object_put (regexp_object_p,
                                                      ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
                                                      ecma_make_uint32_value (0),
                                                      true);
        if (ECMA_IS_VALUE_ERROR (put_result))
        {
          ret_value = put_result;
          goto cleanup_context;
        }

        JERRY_ASSERT (ecma_is_value_boolean (put_result));
      }

      /* Failed to match, return 'null'. */
      ret_value = ECMA_VALUE_NULL;
      goto cleanup_context;
    }

    JERRY_ASSERT (input_curr_p < input_end_p);
    lit_utf8_incr (&input_curr_p);
  }

  JERRY_ASSERT (matched_p != NULL);

  if (ECMA_RE_STACK_LIMIT_REACHED (matched_p))
  {
    ret_value = ecma_raise_range_error (ECMA_ERR_MSG ("Stack limit exceeded."));
    goto cleanup_context;
  }

  if (re_ctx.flags & RE_FLAG_GLOBAL)
  {
    JERRY_ASSERT (index <= input_length);

    lit_utf8_size_t match_length;
    const lit_utf8_byte_t *match_begin_p = re_ctx.captures_p[0].begin_p;
    const lit_utf8_byte_t *match_end_p = re_ctx.captures_p[0].end_p;

    if (input_flags & ECMA_STRING_FLAG_IS_ASCII)
    {
      match_length = (lit_utf8_size_t) (match_end_p - match_begin_p);
    }
    else
    {
      match_length = lit_utf8_string_length (match_begin_p,
                                             (lit_utf8_size_t) (match_end_p - match_begin_p));
    }

    ecma_value_t put_result = ecma_op_object_put (regexp_object_p,
                                                  ecma_get_magic_string (LIT_MAGIC_STRING_LASTINDEX_UL),
                                                  ecma_make_uint32_value (index + match_length),
                                                  true);
    if (ECMA_IS_VALUE_ERROR (put_result))
    {
      ret_value = put_result;
      goto cleanup_context;
    }

    JERRY_ASSERT (ecma_is_value_boolean (put_result));
  }

  ret_value = ecma_regexp_create_result_object (&re_ctx, input_string_p, index);

cleanup_context:
  jmem_heap_free_block (re_ctx.captures_p, re_ctx.captures_count * sizeof (ecma_regexp_capture_t));
  if (re_ctx.non_captures_p != NULL)
  {
    jmem_heap_free_block (re_ctx.non_captures_p, re_ctx.non_captures_count * sizeof (ecma_regexp_non_capture_t));
  }
  if (re_ctx.iterations_p != NULL)
  {
    jmem_heap_free_block (re_ctx.iterations_p, iters_length * sizeof (uint32_t));
  }

cleanup_string:
  if (input_flags & ECMA_STRING_FLAG_MUST_BE_FREED)
  {
    jmem_heap_free_block ((void *) input_buffer_p, input_size);
  }

  return ret_value;
} /* ecma_regexp_exec_helper */

/**
 * Helper function for converting a RegExp pattern parameter to string.
 *
 * See also:
 *         RegExp.compile
 *         RegExp dispatch call
 *
 * @return empty value if success, error value otherwise
 *         Returned value must be freed with ecma_free_value.
 */
ecma_value_t
ecma_regexp_read_pattern_str_helper (ecma_value_t pattern_arg, /**< the RegExp pattern */
                                     ecma_string_t **pattern_string_p) /**< [out] ptr to the pattern string ptr */
{
  if (!ecma_is_value_undefined (pattern_arg))
  {
    ecma_value_t regexp_str_value = ecma_op_to_string (pattern_arg);
    if (ECMA_IS_VALUE_ERROR (regexp_str_value))
    {
      return regexp_str_value;
    }

    *pattern_string_p = ecma_get_string_from_value (regexp_str_value);
    if (!ecma_string_is_empty (*pattern_string_p))
    {
      ecma_ref_ecma_string (*pattern_string_p);
    }

    ecma_free_value (regexp_str_value); // must be freed *after* ecma_ref_ecma_string
  }

  if (!*pattern_string_p || ecma_string_is_empty (*pattern_string_p))
  {
    *pattern_string_p = ecma_get_magic_string (LIT_MAGIC_STRING_EMPTY_NON_CAPTURE_GROUP);
  }
  return ECMA_VALUE_EMPTY;
} /* ecma_regexp_read_pattern_str_helper */

/**
 * @}
 * @}
 */

#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */