mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2025-12-15 16:29:21 +00:00
492 lines
14 KiB
C
492 lines
14 KiB
C
/* Copyright JS Foundation and other contributors, http://js.foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "ecma-globals.h"
|
|
#include "re-bytecode.h"
|
|
#include "ecma-regexp-object.h"
|
|
|
|
#if ENABLED (JERRY_BUILTIN_REGEXP)
|
|
|
|
/** \addtogroup parser Parser
|
|
* @{
|
|
*
|
|
* \addtogroup regexparser Regular expression
|
|
* @{
|
|
*
|
|
* \addtogroup regexparser_bytecode Bytecode
|
|
* @{
|
|
*/
|
|
|
|
/**
|
|
* Size of block of RegExp bytecode. Used for allocation
|
|
*
|
|
* @return pointer to the RegExp compiled code header
|
|
*/
|
|
#define REGEXP_BYTECODE_BLOCK_SIZE 8UL
|
|
|
|
void
|
|
re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
|
{
|
|
const size_t initial_size = JERRY_ALIGNUP (REGEXP_BYTECODE_BLOCK_SIZE + sizeof (re_compiled_code_t), JMEM_ALIGNMENT);
|
|
bc_ctx_p->block_start_p = jmem_heap_alloc_block (initial_size);
|
|
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + initial_size;
|
|
bc_ctx_p->current_p = bc_ctx_p->block_start_p + sizeof (re_compiled_code_t);
|
|
} /* re_initialize_regexp_bytecode */
|
|
|
|
/**
|
|
* Realloc the bytecode container
|
|
*
|
|
* @return current position in RegExp bytecode
|
|
*/
|
|
static uint8_t *
|
|
re_realloc_regexp_bytecode_block (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
|
{
|
|
JERRY_ASSERT (bc_ctx_p->block_end_p >= bc_ctx_p->block_start_p);
|
|
const size_t old_size = (size_t) (bc_ctx_p->block_end_p - bc_ctx_p->block_start_p);
|
|
|
|
/* If one of the members of RegExp bytecode context is NULL, then all member should be NULL
|
|
* (it means first allocation), otherwise all of the members should be a non NULL pointer. */
|
|
JERRY_ASSERT ((!bc_ctx_p->current_p && !bc_ctx_p->block_end_p && !bc_ctx_p->block_start_p)
|
|
|| (bc_ctx_p->current_p && bc_ctx_p->block_end_p && bc_ctx_p->block_start_p));
|
|
|
|
const size_t new_size = old_size + REGEXP_BYTECODE_BLOCK_SIZE;
|
|
JERRY_ASSERT (bc_ctx_p->current_p >= bc_ctx_p->block_start_p);
|
|
const size_t current_ptr_offset = (size_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p);
|
|
|
|
bc_ctx_p->block_start_p = jmem_heap_realloc_block (bc_ctx_p->block_start_p,
|
|
old_size,
|
|
new_size);
|
|
bc_ctx_p->block_end_p = bc_ctx_p->block_start_p + new_size;
|
|
bc_ctx_p->current_p = bc_ctx_p->block_start_p + current_ptr_offset;
|
|
|
|
return bc_ctx_p->current_p;
|
|
} /* re_realloc_regexp_bytecode_block */
|
|
|
|
/**
|
|
* Append a new bytecode to the and of the bytecode container
|
|
*/
|
|
static uint8_t *
|
|
re_bytecode_reserve (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const size_t size) /**< size */
|
|
{
|
|
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
|
|
|
uint8_t *current_p = bc_ctx_p->current_p;
|
|
if (current_p + size > bc_ctx_p->block_end_p)
|
|
{
|
|
current_p = re_realloc_regexp_bytecode_block (bc_ctx_p);
|
|
}
|
|
|
|
bc_ctx_p->current_p += size;
|
|
return current_p;
|
|
} /* re_bytecode_reserve */
|
|
|
|
/**
|
|
* Insert a new bytecode to the bytecode container
|
|
*/
|
|
static void
|
|
re_bytecode_insert (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const size_t offset, /**< distance from the start of the container */
|
|
const size_t size) /**< size */
|
|
{
|
|
JERRY_ASSERT (size <= REGEXP_BYTECODE_BLOCK_SIZE);
|
|
|
|
uint8_t *current_p = bc_ctx_p->current_p;
|
|
if (current_p + size > bc_ctx_p->block_end_p)
|
|
{
|
|
re_realloc_regexp_bytecode_block (bc_ctx_p);
|
|
}
|
|
|
|
uint8_t *dest_p = bc_ctx_p->block_start_p + offset;
|
|
const size_t bytecode_length = re_get_bytecode_length (bc_ctx_p);
|
|
if (bytecode_length - offset > 0)
|
|
{
|
|
memmove (dest_p + size, dest_p, bytecode_length - offset);
|
|
}
|
|
|
|
bc_ctx_p->current_p += size;
|
|
} /* re_bytecode_insert */
|
|
|
|
/**
|
|
* Encode ecma_char_t into bytecode
|
|
*/
|
|
static void
|
|
re_encode_char (uint8_t *dest_p, /**< destination */
|
|
const ecma_char_t c) /**< character */
|
|
{
|
|
*dest_p++ = (uint8_t) ((c >> 8) & 0xFF);
|
|
*dest_p = (uint8_t) (c & 0xFF);
|
|
} /* re_encode_char */
|
|
|
|
/**
|
|
* Encode uint32_t into bytecode
|
|
*/
|
|
static void
|
|
re_encode_u32 (uint8_t *dest_p, /**< destination */
|
|
const uint32_t u) /**< uint32 value */
|
|
{
|
|
*dest_p++ = (uint8_t) ((u >> 24) & 0xFF);
|
|
*dest_p++ = (uint8_t) ((u >> 16) & 0xFF);
|
|
*dest_p++ = (uint8_t) ((u >> 8) & 0xFF);
|
|
*dest_p = (uint8_t) (u & 0xFF);
|
|
} /* re_encode_u32 */
|
|
|
|
/**
|
|
* Get a character from the RegExp bytecode and increase the bytecode position
|
|
*
|
|
* @return ecma character
|
|
*/
|
|
inline ecma_char_t JERRY_ATTR_ALWAYS_INLINE
|
|
re_get_char (const uint8_t **bc_p) /**< pointer to bytecode start */
|
|
{
|
|
const uint8_t *src_p = *bc_p;
|
|
ecma_char_t chr = (ecma_char_t) *src_p++;
|
|
chr = (ecma_char_t) (chr << 8);
|
|
chr = (ecma_char_t) (chr | *src_p);
|
|
(*bc_p) += sizeof (ecma_char_t);
|
|
return chr;
|
|
} /* re_get_char */
|
|
|
|
/**
|
|
* Get a RegExp opcode and increase the bytecode position
|
|
*
|
|
* @return current RegExp opcode
|
|
*/
|
|
inline re_opcode_t JERRY_ATTR_ALWAYS_INLINE
|
|
re_get_opcode (const uint8_t **bc_p) /**< pointer to bytecode start */
|
|
{
|
|
return (re_opcode_t) *((*bc_p)++);
|
|
} /* re_get_opcode */
|
|
|
|
/**
|
|
* Get a parameter of a RegExp opcode and increase the bytecode position
|
|
*
|
|
* @return opcode parameter
|
|
*/
|
|
inline uint32_t JERRY_ATTR_ALWAYS_INLINE
|
|
re_get_value (const uint8_t **bc_p) /**< pointer to bytecode start */
|
|
{
|
|
const uint8_t *src_p = *bc_p;
|
|
uint32_t value = (uint32_t) (*src_p++);
|
|
value <<= 8;
|
|
value |= ((uint32_t) (*src_p++));
|
|
value <<= 8;
|
|
value |= ((uint32_t) (*src_p++));
|
|
value <<= 8;
|
|
value |= ((uint32_t) (*src_p++));
|
|
|
|
(*bc_p) += sizeof (uint32_t);
|
|
return value;
|
|
} /* re_get_value */
|
|
|
|
/**
|
|
* Get length of bytecode
|
|
*
|
|
* @return bytecode length (unsigned integer)
|
|
*/
|
|
inline uint32_t JERRY_ATTR_PURE JERRY_ATTR_ALWAYS_INLINE
|
|
re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
|
{
|
|
return ((uint32_t) (bc_ctx_p->current_p - bc_ctx_p->block_start_p));
|
|
} /* re_get_bytecode_length */
|
|
|
|
/**
|
|
* Append a RegExp opcode
|
|
*/
|
|
void
|
|
re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const re_opcode_t opcode) /**< input opcode */
|
|
{
|
|
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint8_t));
|
|
*dest_p = (uint8_t) opcode;
|
|
} /* re_append_opcode */
|
|
|
|
/**
|
|
* Append a parameter of a RegExp opcode
|
|
*/
|
|
void
|
|
re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const uint32_t value) /**< input value */
|
|
{
|
|
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (uint32_t));
|
|
re_encode_u32 (dest_p, value);
|
|
} /* re_append_u32 */
|
|
|
|
/**
|
|
* Append a character to the RegExp bytecode
|
|
*/
|
|
void
|
|
re_append_char (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const ecma_char_t input_char) /**< input char */
|
|
{
|
|
uint8_t *dest_p = re_bytecode_reserve (bc_ctx_p, sizeof (ecma_char_t));
|
|
re_encode_char (dest_p, input_char);
|
|
} /* re_append_char */
|
|
|
|
/**
|
|
* Append a jump offset parameter of a RegExp opcode
|
|
*/
|
|
void
|
|
re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
uint32_t value) /**< input value */
|
|
{
|
|
value += (uint32_t) (sizeof (uint32_t));
|
|
re_append_u32 (bc_ctx_p, value);
|
|
} /* re_append_jump_offset */
|
|
|
|
/**
|
|
* Insert a RegExp opcode
|
|
*/
|
|
void
|
|
re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
const uint32_t offset, /**< distance from the start of the container */
|
|
const re_opcode_t opcode) /**< input opcode */
|
|
{
|
|
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint8_t));
|
|
*(bc_ctx_p->block_start_p + offset) = (uint8_t) opcode;
|
|
} /* re_insert_opcode */
|
|
|
|
/**
|
|
* Insert a parameter of a RegExp opcode
|
|
*/
|
|
void
|
|
re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, /**< RegExp bytecode context */
|
|
uint32_t offset, /**< distance from the start of the container */
|
|
uint32_t value) /**< input value */
|
|
{
|
|
re_bytecode_insert (bc_ctx_p, offset, sizeof (uint32_t));
|
|
re_encode_u32 (bc_ctx_p->block_start_p + offset, value);
|
|
} /* re_insert_u32 */
|
|
|
|
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
|
|
/**
|
|
* RegExp bytecode dumper
|
|
*/
|
|
void
|
|
re_dump_bytecode (re_bytecode_ctx_t *bc_ctx_p) /**< RegExp bytecode context */
|
|
{
|
|
re_compiled_code_t *compiled_code_p = (re_compiled_code_t *) bc_ctx_p->block_start_p;
|
|
JERRY_DEBUG_MSG ("%d ", compiled_code_p->header.status_flags);
|
|
JERRY_DEBUG_MSG ("%d ", compiled_code_p->captures_count);
|
|
JERRY_DEBUG_MSG ("%d | ", compiled_code_p->non_captures_count);
|
|
|
|
const uint8_t *bytecode_p = (const uint8_t *) (compiled_code_p + 1);
|
|
|
|
re_opcode_t op;
|
|
while ((op = re_get_opcode (&bytecode_p)))
|
|
{
|
|
switch (op)
|
|
{
|
|
case RE_OP_MATCH:
|
|
{
|
|
JERRY_DEBUG_MSG ("MATCH, ");
|
|
break;
|
|
}
|
|
case RE_OP_CHAR:
|
|
{
|
|
JERRY_DEBUG_MSG ("CHAR ");
|
|
JERRY_DEBUG_MSG ("%c, ", (char) re_get_char (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("N");
|
|
/* FALLTHRU */
|
|
}
|
|
case RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("GZ_START ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_CAPTURE_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("START ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_CAPTURE_NON_GREEDY_GROUP_END:
|
|
{
|
|
JERRY_DEBUG_MSG ("N");
|
|
/* FALLTHRU */
|
|
}
|
|
case RE_OP_CAPTURE_GREEDY_GROUP_END:
|
|
{
|
|
JERRY_DEBUG_MSG ("G_END ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("N");
|
|
/* FALLTHRU */
|
|
}
|
|
case RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("GZ_NC_START ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_NON_CAPTURE_GROUP_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("NC_START ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END:
|
|
{
|
|
JERRY_DEBUG_MSG ("N");
|
|
/* FALLTHRU */
|
|
}
|
|
case RE_OP_NON_CAPTURE_GREEDY_GROUP_END:
|
|
{
|
|
JERRY_DEBUG_MSG ("G_NC_END ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_SAVE_AT_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("RE_START ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_SAVE_AND_MATCH:
|
|
{
|
|
JERRY_DEBUG_MSG ("RE_END, ");
|
|
break;
|
|
}
|
|
case RE_OP_GREEDY_ITERATOR:
|
|
{
|
|
JERRY_DEBUG_MSG ("GREEDY_ITERATOR ");
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_NON_GREEDY_ITERATOR:
|
|
{
|
|
JERRY_DEBUG_MSG ("NON_GREEDY_ITERATOR ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_PERIOD:
|
|
{
|
|
JERRY_DEBUG_MSG ("PERIOD ");
|
|
break;
|
|
}
|
|
case RE_OP_ALTERNATIVE:
|
|
{
|
|
JERRY_DEBUG_MSG ("ALTERNATIVE ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_ASSERT_START:
|
|
{
|
|
JERRY_DEBUG_MSG ("ASSERT_START ");
|
|
break;
|
|
}
|
|
case RE_OP_ASSERT_END:
|
|
{
|
|
JERRY_DEBUG_MSG ("ASSERT_END ");
|
|
break;
|
|
}
|
|
case RE_OP_ASSERT_WORD_BOUNDARY:
|
|
{
|
|
JERRY_DEBUG_MSG ("ASSERT_WORD_BOUNDARY ");
|
|
break;
|
|
}
|
|
case RE_OP_ASSERT_NOT_WORD_BOUNDARY:
|
|
{
|
|
JERRY_DEBUG_MSG ("ASSERT_NOT_WORD_BOUNDARY ");
|
|
break;
|
|
}
|
|
case RE_OP_LOOKAHEAD_POS:
|
|
{
|
|
JERRY_DEBUG_MSG ("LOOKAHEAD_POS ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_LOOKAHEAD_NEG:
|
|
{
|
|
JERRY_DEBUG_MSG ("LOOKAHEAD_NEG ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_BACKREFERENCE:
|
|
{
|
|
JERRY_DEBUG_MSG ("BACKREFERENCE ");
|
|
JERRY_DEBUG_MSG ("%d, ", re_get_value (&bytecode_p));
|
|
break;
|
|
}
|
|
case RE_OP_INV_CHAR_CLASS:
|
|
{
|
|
JERRY_DEBUG_MSG ("INV_");
|
|
/* FALLTHRU */
|
|
}
|
|
case RE_OP_CHAR_CLASS:
|
|
{
|
|
JERRY_DEBUG_MSG ("CHAR_CLASS ");
|
|
uint32_t num_of_class = re_get_value (&bytecode_p);
|
|
JERRY_DEBUG_MSG ("%d", num_of_class);
|
|
while (num_of_class)
|
|
{
|
|
if ((compiled_code_p->header.status_flags & RE_FLAG_UNICODE) != 0)
|
|
{
|
|
JERRY_DEBUG_MSG (" %u", re_get_value (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("-%u", re_get_value (&bytecode_p));
|
|
}
|
|
else
|
|
{
|
|
JERRY_DEBUG_MSG (" %u", re_get_char (&bytecode_p));
|
|
JERRY_DEBUG_MSG ("-%u", re_get_char (&bytecode_p));
|
|
}
|
|
num_of_class--;
|
|
}
|
|
JERRY_DEBUG_MSG (", ");
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
JERRY_DEBUG_MSG ("UNKNOWN(%d), ", (uint32_t) op);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
JERRY_DEBUG_MSG ("EOF\n");
|
|
} /* re_dump_bytecode */
|
|
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
|
|
|
|
/**
|
|
* @}
|
|
* @}
|
|
* @}
|
|
*/
|
|
|
|
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
|