jerryscript/jerry-core/parser/js/js-parser-line-info-create.c
Zoltan Herczeg 998e49a969
Rework storing the line/column/bytecode info (#4707)
This information is stored in a separate memory block instead
of being part of the byte code. Snapshot does not supported.

JerryScript-DCO-1.0-Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
2021-07-15 13:45:10 +02:00

578 lines
18 KiB
C

/* Copyright JS Foundation and other contributors, http://js.foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ecma-line-info.h"
#include "js-parser-internal.h"
#if JERRY_PARSER
#if JERRY_LINE_INFO
/* \addtogroup parser Parser
* @{
*
* \addtogroup jsparser JavaScript
* @{
*
* \addtogroup jsparser_line_info_create Create line info data
* @{
*/
/*
* The line-info data structure uses two number encodings:
*
* Vlq (variable length quantity):
* Each byte has 7 bit data and the highest bit is set for continuation.
* The format is big endian.
*
* Small:
* First byte can encode signed values between 127 and -125 in 1 byte.
* Large values requires more bytes than vlq.
*
* The line-info data structure is a sequence of chunks:
*
* +------+--------------+------------+----------------+
* | Line | StreamLength | StreamData | [ByteCodeSize] |
* +------+--------------+------------+----------------+
*
* Line [Vlq encoding]:
* Specifies the start line of this chunk, relative to its previous value.
* The starting column is always ECMA_LINE_INFO_COLUMN_DEFAULT
*
* StreamLength [uint8_t]:
* Length of the StreamData in bytes minus ECMA_LINE_INFO_STREAM_SIZE_MIN.
* The 0 value represents the last chunk, which size is not specified
* (Can be less than ECMA_LINE_INFO_STREAM_SIZE_MIN).
*
* StreamData [sequence of bytes]:
* Sequence of the following items:
*
* +-----------+--------+--------+
* | EndOffset | [Line] | Column |
* +-----------+--------+--------+
*
* EndOffset [Small encoding]:
* Specifies the EndOffset in the byte code, relative to the previous EndOffset.
* The range of byte codes corresponding to the line/column position of this item
* is between the EndOffset of the previous item (inclusive) and the EndOffset
* of this item (exclusive). The last end offset of a stream is always 0, which
* represents an unterminated range.
*
* Line [Small encoding] [Optional]:
* If bit 1 of end offset is set, this specifies the line position of this item,
* relative to the previous line position, and the column position is set to
* ECMA_LINE_INFO_COLUMN_DEFAULT.
*
* Column [Small encoding]:
* Specifies the current column position relative to the previous column position.
*
* ByteCodeSize [Vlq encoding] [Optional]:
* If StreamLength is not 0, this specifies the byte code size of the whole range.
* This value can be used to skip the byte codes which line info is stored
* in this chunk. This information is not available for the last chunk.
*/
/**
* Maximum number of bytes requires to encode a number.
*/
#define PARSER_LINE_INFO_BUFFER_MAX_SIZE 5
/**
* Stream generation ends after this size is reached,
* since there might be not enough place for the next item.
*/
#define PARSER_LINE_INFO_STREAM_SIZE_LIMIT \
(ECMA_LINE_INFO_STREAM_SIZE_MIN + UINT8_MAX - ((2 * PARSER_LINE_INFO_BUFFER_MAX_SIZE) + 1))
/**
* Free line info temporary data collected during parsing.
*/
void
parser_line_info_free (parser_line_info_data_t *line_info_p)
{
parser_mem_page_t *current_page_p = line_info_p->first_page_p;
while (current_page_p != NULL)
{
parser_mem_page_t *next_p = current_page_p->next_p;
parser_free (current_page_p, sizeof (parser_mem_page_t *) + PARSER_STACK_PAGE_SIZE);
current_page_p = next_p;
}
} /* parser_line_info_free */
/**
* Encodes an uint32_t number into a buffer. Numbers expected to be larger values.
*
* @return the number of bytes written to the buffer
*/
static uint32_t
parser_line_info_encode_vlq (uint8_t *buffer_p, /**< target buffer */
uint32_t value) /**< encoded value */
{
if (value <= ECMA_LINE_INFO_VLQ_MASK)
{
*buffer_p = (uint8_t) value;
return 1;
}
uint32_t length = 0;
uint32_t current_value = value;
do
{
current_value >>= ECMA_LINE_INFO_VLQ_SHIFT;
length++;
}
while (current_value > 0);
buffer_p += length;
do
{
*(--buffer_p) = (uint8_t) (value | ECMA_LINE_INFO_VLQ_CONTINUE);
value >>= ECMA_LINE_INFO_VLQ_SHIFT;
}
while (value > 0);
buffer_p[length - 1] &= ECMA_LINE_INFO_VLQ_MASK;
return length;
} /* parser_line_info_encode_vlq */
/**
* Encodes an uint32_t number into a buffer. Numbers expected to be smaller values.
*
* @return the number of bytes written to the buffer
*/
static uint32_t
parser_line_info_encode_small (uint8_t *buffer_p, /**< target buffer */
uint32_t value) /**< encoded value */
{
if (value < ECMA_LINE_INFO_ENCODE_TWO_BYTE_MIN)
{
buffer_p[0] = (uint8_t) value;
return 1;
}
uint32_t length;
if (JERRY_LIKELY (value < ECMA_LINE_INFO_ENCODE_THREE_BYTE_MIN))
{
value -= ECMA_LINE_INFO_ENCODE_TWO_BYTE_MIN;
buffer_p[0] = ECMA_LINE_INFO_ENCODE_TWO_BYTE;
length = 2;
}
else
{
if (value <= (ECMA_LINE_INFO_ENCODE_THREE_BYTE_MIN + UINT16_MAX))
{
value -= ECMA_LINE_INFO_ENCODE_THREE_BYTE_MIN;
buffer_p[0] = ECMA_LINE_INFO_ENCODE_THREE_BYTE;
length = 3;
}
else
{
buffer_p[0] = ECMA_LINE_INFO_ENCODE_FIVE_BYTE;
buffer_p[3] = (uint8_t) (value >> 16);
buffer_p[4] = (uint8_t) (value >> 24);
length = 5;
}
buffer_p[2] = (uint8_t) (value >> 8);
}
buffer_p[1] = (uint8_t) value;
return length;
} /* parser_line_info_encode_small */
/**
* Encodes the difference between two values.
*
* @return encoded difference
*/
static inline uint32_t JERRY_ATTR_ALWAYS_INLINE
parser_line_info_difference_get (uint32_t current_value, /**< current value */
uint32_t prev_value) /**< previous value */
{
uint32_t result = current_value - prev_value - 1;
if (result <= (UINT32_MAX >> 1))
{
return (result << 1) | ECMA_LINE_INFO_INCREASE;
}
return ((UINT32_MAX - result) << 1) | ECMA_LINE_INFO_DECREASE;
} /* parser_line_info_difference_get */
/**
* Appends a value at the end of the line info stream.
*/
static void
parser_line_info_append_number (parser_context_t *context_p, /**< context */
uint32_t value) /**< value to be encoded */
{
uint8_t buffer[PARSER_LINE_INFO_BUFFER_MAX_SIZE];
uint32_t length = parser_line_info_encode_vlq (buffer, value);
if (context_p->line_info.last_page_p != NULL
&& context_p->line_info.last_page_p->bytes[0] + length <= PARSER_STACK_PAGE_SIZE)
{
memcpy (context_p->line_info.last_page_p->bytes + context_p->line_info.last_page_p->bytes[0],
buffer,
length);
length += context_p->line_info.last_page_p->bytes[0];
context_p->line_info.last_page_p->bytes[0] = (uint8_t) length;
return;
}
size_t size = sizeof (parser_mem_page_t *) + PARSER_STACK_PAGE_SIZE;
parser_mem_page_t *new_page_p = (parser_mem_page_t *) parser_malloc (context_p, size);
new_page_p->next_p = NULL;
if (context_p->line_info.first_page_p == NULL)
{
context_p->line_info.first_page_p = new_page_p;
context_p->line_info.last_page_p = new_page_p;
}
else
{
context_p->line_info.last_page_p->next_p = new_page_p;
context_p->line_info.last_page_p = new_page_p;
}
context_p->line_info.last_page_p->bytes[0] = (uint8_t) (length + 1);
memcpy (new_page_p->bytes + 1, buffer, length);
} /* parser_line_info_append_number */
/**
* Updates the current line information data.
*/
void
parser_line_info_append (parser_context_t *context_p, /**< context */
parser_line_counter_t line, /**< line */
parser_line_counter_t column) /**< column */
{
if (context_p->line_info.first_page_p != NULL
&& (context_p->byte_code_size == context_p->line_info.byte_code_position
|| (context_p->line_info.line == line
&& context_p->line_info.column == column)))
{
return;
}
/* Sets ECMA_LINE_INFO_HAS_LINE bit. */
uint32_t value = (((context_p->byte_code_size - context_p->line_info.byte_code_position) << 1)
| (uint32_t) (context_p->line_info.line != line));
parser_line_info_append_number (context_p, value);
context_p->line_info.byte_code_position = context_p->byte_code_size;
if (value & ECMA_LINE_INFO_HAS_LINE)
{
value = parser_line_info_difference_get (line, context_p->line_info.line);
parser_line_info_append_number (context_p, value);
context_p->line_info.line = line;
}
value = parser_line_info_difference_get (column, context_p->line_info.column);
parser_line_info_append_number (context_p, value);
context_p->line_info.column = column;
} /* parser_line_info_append */
/**
* Line info iterator structure
*/
typedef struct
{
parser_mem_page_t *current_page_p; /**< current page */
uint32_t offset; /**< current offset */
} parser_line_info_iterator_t;
/**
* Decodes the next value from the iterator stream
*/
static uint32_t
parser_line_info_iterator_get (parser_line_info_iterator_t *iterator_p) /**< iterator */
{
uint8_t *source_p = iterator_p->current_page_p->bytes + iterator_p->offset;
uint32_t result = ecma_line_info_decode_vlq (&source_p);
iterator_p->offset = (uint32_t) (source_p - iterator_p->current_page_p->bytes);
JERRY_ASSERT (iterator_p->offset <= iterator_p->current_page_p->bytes[0]);
if (iterator_p->offset < iterator_p->current_page_p->bytes[0])
{
return result;
}
iterator_p->current_page_p = iterator_p->current_page_p->next_p;
iterator_p->offset = 1;
return result;
} /* parser_line_info_iterator_get */
/**
* Generate line info data
*
* @return generated line info data
*/
uint8_t *
parser_line_info_generate (parser_context_t *context_p) /**< context */
{
parser_line_info_iterator_t iterator;
uint8_t *line_info_p = NULL;
uint8_t *dst_p = NULL;
uint32_t total_length = 0;
uint32_t total_length_size = 0;
while (true)
{
/* The following code runs twice: first the size of the data,
* is computed and the data is generated during the second run.
* Note: line_info_p is NULL during the first run. */
parser_mem_page_t *iterator_byte_code_page_p = context_p->byte_code.first_p;
uint32_t iterator_byte_code_page_offset = 0;
uint32_t iterator_byte_code_base = 0;
uint32_t iterator_last_byte_code_offset = UINT32_MAX;
uint32_t iterator_prev_line = 0;
uint32_t iterator_prev_column = 0;
uint32_t iterator_line = 1;
uint32_t iterator_column = 1;
uint8_t block_buffer[PARSER_LINE_INFO_BUFFER_MAX_SIZE];
uint8_t line_column_buffer[PARSER_LINE_INFO_BUFFER_MAX_SIZE * 2];
uint8_t *block_size_p = NULL;
uint32_t block_byte_code_offset = 0;
uint32_t block_prev_line = 1;
uint32_t stream_byte_code_offset = 0;
uint32_t stream_current_line = 1;
uint32_t stream_current_column = ECMA_LINE_INFO_COLUMN_DEFAULT;
uint32_t stream_prev_line = 1;
uint32_t stream_prev_column = ECMA_LINE_INFO_COLUMN_DEFAULT;
uint32_t stream_size = 0;
uint32_t stream_value_count = 0;
uint32_t value;
iterator.current_page_p = context_p->line_info.first_page_p;
iterator.offset = 1;
do
{
/* Decode line information generated during parsing. */
value = parser_line_info_iterator_get (&iterator);
iterator_byte_code_page_offset += (value >> 1);
if (value & 0x1)
{
value = parser_line_info_iterator_get (&iterator);
JERRY_ASSERT (value != ((0 << 1) | ECMA_LINE_INFO_DECREASE));
iterator_line = ecma_line_info_difference_update (iterator_line, value);
}
value = parser_line_info_iterator_get (&iterator);
iterator_column = ecma_line_info_difference_update (iterator_column, value);
while (iterator_byte_code_page_offset >= PARSER_CBC_STREAM_PAGE_SIZE)
{
iterator_byte_code_base += iterator_byte_code_page_p->bytes[PARSER_CBC_STREAM_PAGE_SIZE - 1];
iterator_byte_code_page_offset -= PARSER_CBC_STREAM_PAGE_SIZE;
iterator_byte_code_page_p = iterator_byte_code_page_p->next_p;
}
uint32_t iterator_byte_code_offset = iterator_byte_code_base;
if (iterator_byte_code_page_offset > 0)
{
uint8_t relative_offset = iterator_byte_code_page_p->bytes[iterator_byte_code_page_offset - 1];
iterator_byte_code_offset += relative_offset & CBC_LOWER_SEVEN_BIT_MASK;
}
/* Skip those line/column pairs which byte code was discarded during post processing
* or does not change line/column (this is possible when multiple skips occures). */
if (iterator_byte_code_offset == iterator_last_byte_code_offset
|| (iterator_line == iterator_prev_line && iterator_column == iterator_prev_column))
{
continue;
}
iterator_prev_line = iterator_line;
iterator_prev_column = iterator_column;
iterator_last_byte_code_offset = iterator_byte_code_offset;
if (block_size_p != NULL)
{
/* Sets ECMA_LINE_INFO_HAS_LINE bit. */
value = (((iterator_byte_code_offset - stream_byte_code_offset) << 1)
| (uint32_t) (stream_prev_line != stream_current_line));
uint32_t line_column_size = 0;
uint32_t offset_size = parser_line_info_encode_small (block_buffer, value);
stream_byte_code_offset = iterator_byte_code_offset;
if (value & ECMA_LINE_INFO_HAS_LINE)
{
value = parser_line_info_difference_get (stream_current_line, stream_prev_line);
line_column_size = parser_line_info_encode_small (line_column_buffer, value);
stream_prev_line = stream_current_line;
stream_prev_column = ECMA_LINE_INFO_COLUMN_DEFAULT;
}
value = parser_line_info_difference_get (stream_current_column, stream_prev_column);
line_column_size += parser_line_info_encode_small (line_column_buffer + line_column_size, value);
stream_prev_column = stream_current_column;
stream_current_line = iterator_line;
stream_current_column = iterator_column;
stream_value_count++;
if (stream_value_count < ECMA_LINE_INFO_STREAM_VALUE_COUNT_MAX
&& (stream_size + offset_size + line_column_size <= PARSER_LINE_INFO_STREAM_SIZE_LIMIT))
{
stream_size += offset_size + line_column_size;
if (line_info_p != NULL)
{
memcpy (dst_p, block_buffer, offset_size);
dst_p += offset_size;
memcpy (dst_p, line_column_buffer, line_column_size);
dst_p += line_column_size;
}
continue;
}
/* Finalize the current chunk. The size of EndOffset is always 1. */
stream_size += 1 + line_column_size;
JERRY_ASSERT (stream_size > ECMA_LINE_INFO_STREAM_SIZE_MIN
&& (stream_size - ECMA_LINE_INFO_STREAM_SIZE_MIN) <= UINT8_MAX);
if (line_info_p != NULL)
{
*block_size_p = (uint8_t) (stream_size - ECMA_LINE_INFO_STREAM_SIZE_MIN);
/* Set EndOffset to 0 and copy the has_line bit. */
*dst_p++ = (uint8_t) (block_buffer[0] & ECMA_LINE_INFO_HAS_LINE);
memcpy (dst_p, line_column_buffer, line_column_size);
dst_p += line_column_size;
}
else
{
total_length += stream_size;
dst_p = block_buffer;
}
uint32_t byte_code_diff = iterator_last_byte_code_offset - block_byte_code_offset;
dst_p += parser_line_info_encode_vlq (dst_p, byte_code_diff);
block_byte_code_offset = iterator_last_byte_code_offset;
if (line_info_p == NULL)
{
total_length += (uint32_t) (dst_p - block_buffer);
}
}
/* Start a new chunk. */
if (line_info_p == NULL)
{
dst_p = block_buffer;
}
value = parser_line_info_difference_get (iterator_line, block_prev_line);
dst_p += parser_line_info_encode_vlq (dst_p, value);
block_size_p = dst_p;
dst_p++;
if (line_info_p == NULL)
{
total_length += (uint32_t) (dst_p - block_buffer);
}
block_prev_line = iterator_line;
stream_current_line = iterator_line;
stream_current_column = iterator_column;
stream_prev_line = iterator_line;
stream_prev_column = ECMA_LINE_INFO_COLUMN_DEFAULT;
stream_size = 0;
stream_value_count = 0;
}
while (iterator.current_page_p != NULL);
value = (stream_prev_line != stream_current_line);
/* Finalize the last stream */
if (line_info_p == NULL)
{
dst_p = line_column_buffer;
total_length += stream_size + 1;
}
else
{
*block_size_p = 0;
/* Small encoded value of has_line bit. */
*dst_p++ = (uint8_t) value;
}
if (value)
{
value = parser_line_info_difference_get (stream_current_line, stream_prev_line);
dst_p += parser_line_info_encode_small (dst_p, value);
stream_prev_column = ECMA_LINE_INFO_COLUMN_DEFAULT;
}
value = parser_line_info_difference_get (stream_current_column, stream_prev_column);
dst_p += parser_line_info_encode_small (dst_p, value);
if (line_info_p == NULL)
{
total_length += (uint32_t) (dst_p - line_column_buffer);
}
if (line_info_p != NULL)
{
break;
}
total_length_size = parser_line_info_encode_vlq (block_buffer, total_length);
/* TODO: Support allocation fail. */
line_info_p = (uint8_t *) jmem_heap_alloc_block (total_length + total_length_size);
dst_p = line_info_p + parser_line_info_encode_vlq (line_info_p, total_length);
}
JERRY_ASSERT (line_info_p + total_length_size + total_length == dst_p);
#if JERRY_PARSER_DUMP_BYTE_CODE
if (context_p->is_show_opcodes)
{
ecma_line_info_dump (line_info_p);
}
#endif /* JERRY_PARSER_DUMP_BYTE_CODE */
return line_info_p;
} /* parser_line_info_generate */
#endif /* JERRY_LINE_INFO */
#endif /* JERRY_PARSER */
/**
* @}
* @}
* @}
*/