Improve RegExp compatibility with web browsers (#3339)

The modification adds support to parse /A{/ like RegExps.
That is: if the iterator is invalid it should be treated as normal
character.

This behaviour is defined in the ES2015 standard Annex B 1.4 point

This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled
(set to zero).

JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
This commit is contained in:
Péter Gál 2019-11-21 10:59:21 +01:00 committed by Robert Fancsik
parent 7f6f562adb
commit aadfa13c38
2 changed files with 222 additions and 14 deletions

View File

@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
{ {
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token.")); return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
} }
case LIT_CHAR_NULL:
{
out_token_p->type = RE_TOK_EOF;
break;
}
case LIT_CHAR_LEFT_BRACE: case LIT_CHAR_LEFT_BRACE:
{ {
#if ENABLED (JERRY_REGEXP_STRICT_MODE) #if ENABLED (JERRY_REGEXP_STRICT_MODE)
return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token.")); return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
#else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */ #else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */
/* Make sure that the current '{' does not start an iterator.
*
* E.g: /\s+{3,4}/ should fail as there is nothing to iterate.
* However /\s+{3,4/ should be valid in web compatibility mode.
*/
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p; const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
lit_utf8_decr (&parser_ctx_p->input_curr_p); lit_utf8_decr (&parser_ctx_p->input_curr_p);
@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
ecma_free_value (JERRY_CONTEXT (error_value)); ecma_free_value (JERRY_CONTEXT (error_value));
parser_ctx_p->input_curr_p = input_curr_p; parser_ctx_p->input_curr_p = input_curr_p;
/* It was not an iterator, continue the parsing. */
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
/* FALLTHRU */
}
default:
{
out_token_p->type = RE_TOK_CHAR; out_token_p->type = RE_TOK_CHAR;
out_token_p->value = ch; out_token_p->value = ch;
#if ENABLED (JERRY_REGEXP_STRICT_MODE)
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
#else
/* In case of compatiblity mode try the following:
* 1. Try parsing an iterator after the character.
* 2.a. If no error is reported: it was an iterator so return an empty value.
* 2.b. If there was an error: it was not an iterator thus return the current position
* to the start of the iterator parsing and set the return value to the empty value.
* 3. The next 're_parse_next_token' call will handle the further parsing of characters.
*/
const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
ret_value = re_parse_iterator (parser_ctx_p, out_token_p); ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
if (!ecma_is_value_empty (ret_value)) if (!ecma_is_value_empty (ret_value))
@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
parser_ctx_p->input_curr_p = input_curr_p; parser_ctx_p->input_curr_p = input_curr_p;
ret_value = ECMA_VALUE_EMPTY; ret_value = ECMA_VALUE_EMPTY;
} }
#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */ #endif
break;
}
case LIT_CHAR_NULL:
{
out_token_p->type = RE_TOK_EOF;
break;
}
default:
{
out_token_p->type = RE_TOK_CHAR;
out_token_p->value = ch;
ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
break; break;
} }
} }

View File

@ -0,0 +1,193 @@
// Copyright JS Foundation and other contributors, http://js.foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
function test_match(re, input, expected)
{
var result = re.exec(input);
if (expected === null)
{
assert (result === null);
return;
}
assert (result !== null);
assert (result.length === expected.length);
for (var idx = 0; idx < result.length; idx++)
{
assert (result[idx] === expected[idx]);
}
}
test_match (new RegExp ("A{1,2}"), "B", null);
test_match (new RegExp ("A{1,2}"), "", null);
test_match (new RegExp ("A{1,2}"), "A", ["A"]);
test_match (new RegExp ("A{1,2}"), "AA", ["AA"]);
test_match (new RegExp ("A{1,2}"), "AAA", ["AA"]);
test_match (new RegExp ("A{1,}"), "B", null);
test_match (new RegExp ("A{1,}"), "GA", ["A"]);
test_match (new RegExp ("A{1,}"), "FAAAW", ["AAA"]);
test_match (new RegExp ("A{1,}"), "FAdAAW", ["A"]);
/* Test web compatiblity (ES2015 Annex B 1.4) */
test_match (new RegExp ("A{1,2"), "A", null);
test_match (new RegExp ("A{1,2"), "AA", null);
test_match (new RegExp ("A{1,2"), "A{1,2", ["A{1,2"]);
test_match (new RegExp ("A{1,2"), "AA{1,2", ["A{1,2"]);
test_match (new RegExp ("A{1,"), "A", null);
test_match (new RegExp ("A{1,"), "AA", null);
test_match (new RegExp ("A{1,"), "A{1,", ["A{1,"]);
test_match (new RegExp ("A{1,"), "A{1,2", ["A{1,"]);
test_match (new RegExp ("A{1,"), "AA{1,2", ["A{1,"]);
test_match (new RegExp ("A{1"), "A", null);
test_match (new RegExp ("A{1"), "AA", null);
test_match (new RegExp ("A{1"), "A{1,", ["A{1"]);
test_match (new RegExp ("A{1"), "A{1,2", ["A{1"]);
test_match (new RegExp ("A{1"), "AA{1,2", ["A{1"]);
test_match (new RegExp ("A{"), "A", null);
test_match (new RegExp ("A{"), "AA", null);
test_match (new RegExp ("A{"), "A{,", ["A{"]);
test_match (new RegExp ("A{"), "A{1,", ["A{"]);
test_match (new RegExp ("A{"), "A{1,2", ["A{"]);
test_match (new RegExp ("A{"), "AA{1,2", ["A{"]);
test_match (new RegExp ("{"), "", null);
test_match (new RegExp ("{"), "AA", null);
test_match (new RegExp ("{"), "{,", ["{"]);
test_match (new RegExp ("{"), "{1,", ["{"]);
test_match (new RegExp ("{"), "{1,2", ["{"]);
test_match (new RegExp ("{"), "A{1,2", ["{"]);
test_match (new RegExp ("{{2,3}"), "", null);
test_match (new RegExp ("{{2,3}"), "AA", null);
test_match (new RegExp ("{{2,3}"), "{{,", ["{{"]);
test_match (new RegExp ("{{2,3}"), "{{{,", ["{{{"]);
test_match (new RegExp ("{{2,3}"), "{{{{,", ["{{{"]);
test_match (new RegExp ("{{2,3"), "{{{{,", null);
test_match (new RegExp ("{{2,3"), "{{2,3,", ["{{2,3"]);
test_match (/A{1,2/, "A", null);
test_match (/A{1,2/, "AA", null);
test_match (/A{1,2/, "A{1,2", ["A{1,2"]);
test_match (/A{1,2/, "AA{1,2", ["A{1,2"]);
test_match (/A{1,/, "A", null);
test_match (/A{1,/, "AA", null);
test_match (/A{1,/, "A{1,", ["A{1,"]);
test_match (/A{1,/, "A{1,2", ["A{1,"]);
test_match (/A{1,/, "AA{1,2", ["A{1,"]);
test_match (/A{1/, "A", null);
test_match (/A{1/, "AA", null);
test_match (/A{1/, "A{1,", ["A{1"]);
test_match (/A{1/, "A{1,2", ["A{1"]);
test_match (/A{1/, "AA{1,2", ["A{1"]);
test_match (/A{/, "A", null);
test_match (/A{/, "AA", null);
test_match (/A{/, "A{,", ["A{"]);
test_match (/A{/, "A{1,", ["A{"]);
test_match (/A{/, "A{1,2", ["A{"]);
test_match (/A{/, "AA{1,2", ["A{"]);
test_match (/{/, "", null);
test_match (/{/, "AA", null);
test_match (/{/, "{,", ["{"]);
test_match (/{/, "{1,", ["{"]);
test_match (/{/, "{1,2", ["{"]);
test_match (/{/, "A{1,2", ["{"]);
test_match (/{{2,3}/, "", null);
test_match (/{{2,3}/, "AA", null);
test_match (/{{2,3}/, "{{,", ["{{"]);
test_match (/{{2,3}/, "{{{,", ["{{{"]);
test_match (/{{2,3}/, "{{{{,", ["{{{"]);
test_match (/{{2,3/, "{{{{,", null);
test_match (/{{2,3/, "{{2,3,", ["{{2,3"]);
try {
new RegExp ("[");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
eval ("/[/");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
new RegExp ("(");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
eval ("/(/");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
test_match (new RegExp("\s+{3,4"), "s+{3,4", null);
test_match (new RegExp("\s+{3,4"), "s{3,4", ["s{3,4"]);
test_match (new RegExp("\s+{3,4"), "ss{3,4", ["ss{3,4"]);
test_match (new RegExp("\\s+{3,4"), " {3,4", [" {3,4"]);
test_match (new RegExp("\\s+{3,4"), " d{3,4", null);
test_match (/s+{3,4/, "s+{3,4", null);
test_match (/s+{3,4/, "s{3,4", ["s{3,4"]);
test_match (/s+{3,4/, "ss{3,4", ["ss{3,4"]);
test_match (/\s+{3,4/, " {3,4", [" {3,4"]);
test_match (/\s+{3,4/, " d{3,4", null);
try {
new RegExp ("\s+{3,4}");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
eval ("/\\s+{3,4}/");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
new RegExp ("a{2,3}{2,3}");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}
try {
eval ("/a{2,3}{2,3}/");
assert (false);
} catch (ex) {
assert (ex instanceof SyntaxError);
}