Implement RegExp octal support

JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai.u-szeged@partner.samsung.com
JerryScript-DCO-1.0-Signed-off-by: Szilard Ledan szledan.u-szeged@partner.samsung.com
This commit is contained in:
Szilard Ledan 2015-07-31 12:33:23 +02:00 committed by Dániel Bátyai
parent cf1eda0981
commit e22a085fc1
2 changed files with 76 additions and 19 deletions

View File

@ -70,6 +70,27 @@ re_parse_non_greedy_char (lit_utf8_iterator_t *iter_p) /**< RegExp pattern */
return false;
} /* re_parse_non_greedy_char */
/**
* Parse a max 3 digit long octal number from input string iterator.
*
* @return uint32_t - parsed octal number
*/
static uint32_t
re_parse_octal (lit_utf8_iterator_t *iter) /**< input string iterator */
{
uint32_t number = 0;
for (int index = 0;
index < 3
&& !lit_utf8_iterator_is_eos (iter)
&& lit_char_is_octal_digit (lit_utf8_iterator_peek_next (iter));
index++)
{
number = number * 8 + lit_char_hex_to_int (lit_utf8_iterator_read_next (iter));
}
return number;
} /* re_parse_octal */
/**
* Parse RegExp iterators
*
@ -465,26 +486,13 @@ re_parse_char_class (re_parser_ctx_t *parser_ctx_p, /**< number of classes */
ch = RE_CHAR_UNDEF;
}
else if (ch <= LIT_UTF16_CODE_UNIT_MAX
&& lit_char_is_decimal_digit ((ecma_char_t) ch))
&& lit_char_is_octal_digit ((ecma_char_t) ch)
&& ch != LIT_CHAR_0)
{
if (lit_utf8_iterator_is_eos (iter_p))
{
return ecma_raise_syntax_error ("invalid character class, end of string after '\\<digits>'");
}
if (ch != LIT_CHAR_0
|| lit_char_is_decimal_digit (lit_utf8_iterator_peek_next (iter_p)))
{
/* FIXME: octal support */
}
lit_utf8_iterator_decr (iter_p);
ch = re_parse_octal (iter_p);
}
/* FIXME: depends on the unicode support
else if (!jerry_unicode_identifier (ch))
{
JERRY_ERROR_MSG ("RegExp escape pattern error. (Char class)");
}
*/
}
} /* ch == LIT_CHAR_BACKSLASH */
if (ch == RE_CHAR_UNDEF)
{
@ -745,12 +753,43 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
{
out_token_p->type = RE_TOK_BACKREFERENCE;
}
else
/* Invalid backreference, fallback to octal */
{
/* Rewind to start of number. */
while (index-- > 0)
{
lit_utf8_iterator_decr (iter_p);
}
/* Try to reparse as octal. */
ecma_char_t digit = lit_utf8_iterator_peek_next (iter_p);
if (!lit_char_is_octal_digit (digit))
{
/* Not octal, keep digit character value. */
number = lit_utf8_iterator_read_next (iter_p);
}
else
{
number = re_parse_octal (iter_p);
}
}
out_token_p->value = number;
}
else
/* Invalid backreference, fallback to octal if possible */
{
out_token_p->value = ch;
if (!lit_char_is_octal_digit (ch))
{
/* Not octal, keep character value. */
out_token_p->value = ch;
}
else
{
lit_utf8_iterator_decr (iter_p);
out_token_p->value = re_parse_octal (iter_p);
}
}
}
}

View File

@ -56,3 +56,21 @@ assert (r.exec ("abcdefghijkl") == "abcdefghijkl");
r = /\n/;
assert (r.exec ("\n") == "\n");
assert (/[\12]+/.exec ("1\n\n\n\n\n2") == "\n\n\n\n\n");
assert (/[\1284]+/.exec ("1\n\n8\n4\n\n2") == "\n\n8\n4\n\n");
assert (/[\89]12/.exec ("1\9128123") == "912");
assert (/[\11]/.exec ("1\n\n\t\n\n2") == "\t");
assert (/[\142][\143][\144]/.exec ("abcde") == "bcd");
assert (/\12+/.exec ("1\n\n\n\n\n2") == "\n\n\n\n\n");
assert (/\11/.exec ("1\n\n\t\n\n2") == "\t");
assert (/\142\143\144/.exec ("abcde") == "bcd");
assert (/\942\143\144/.exec ("a942cde") == "942cd");
assert (/\14234/.exec ("b34") == "b34");
assert (/(\d+)\2([abc]+)\1\2/.exec("123abc123abc") == "123abc123abc,123,abc");
assert (/([abc]+)\40([d-f]+)\12\1/.exec("abc def\nabc") == "abc def\nabc,abc,def");
var expected = "8765432911,8,7,6,5,4,3,2,9,1";
assert (/(\d)(\d)(\d)(\d)(\d)(\d)(\d)(\d)\9(\d)\9/.exec("8765432911") == expected);