From aadfa13c3877366636d00531b873e4d0ef149ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20G=C3=A1l?= Date: Thu, 21 Nov 2019 10:59:21 +0100 Subject: [PATCH] Improve RegExp compatibility with web browsers (#3339) The modification adds support to parse /A{/ like RegExps. That is: if the iterator is invalid it should be treated as normal character. This behaviour is defined in the ES2015 standard Annex B 1.4 point This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled (set to zero). JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com --- jerry-core/parser/regexp/re-parser.c | 43 ++++-- tests/jerry/regexp-web-compatibility.js | 193 ++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 14 deletions(-) create mode 100644 tests/jerry/regexp-web-compatibility.js diff --git a/jerry-core/parser/regexp/re-parser.c b/jerry-core/parser/regexp/re-parser.c index c766fd450..32a2fd2a2 100644 --- a/jerry-core/parser/regexp/re-parser.c +++ b/jerry-core/parser/regexp/re-parser.c @@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context * { return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token.")); } + case LIT_CHAR_NULL: + { + out_token_p->type = RE_TOK_EOF; + break; + } case LIT_CHAR_LEFT_BRACE: { #if ENABLED (JERRY_REGEXP_STRICT_MODE) return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token.")); #else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */ + + /* Make sure that the current '{' does not start an iterator. + * + * E.g: /\s+{3,4}/ should fail as there is nothing to iterate. + * However /\s+{3,4/ should be valid in web compatibility mode. + */ const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p; lit_utf8_decr (&parser_ctx_p->input_curr_p); @@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context * ecma_free_value (JERRY_CONTEXT (error_value)); parser_ctx_p->input_curr_p = input_curr_p; - + /* It was not an iterator, continue the parsing. */ +#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */ + /* FALLTHRU */ + } + default: + { out_token_p->type = RE_TOK_CHAR; out_token_p->value = ch; +#if ENABLED (JERRY_REGEXP_STRICT_MODE) + ret_value = re_parse_iterator (parser_ctx_p, out_token_p); +#else + /* In case of compatiblity mode try the following: + * 1. Try parsing an iterator after the character. + * 2.a. If no error is reported: it was an iterator so return an empty value. + * 2.b. If there was an error: it was not an iterator thus return the current position + * to the start of the iterator parsing and set the return value to the empty value. + * 3. The next 're_parse_next_token' call will handle the further parsing of characters. + */ + const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p; ret_value = re_parse_iterator (parser_ctx_p, out_token_p); if (!ecma_is_value_empty (ret_value)) @@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context * parser_ctx_p->input_curr_p = input_curr_p; ret_value = ECMA_VALUE_EMPTY; } -#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */ - break; - } - case LIT_CHAR_NULL: - { - out_token_p->type = RE_TOK_EOF; - break; - } - default: - { - out_token_p->type = RE_TOK_CHAR; - out_token_p->value = ch; - ret_value = re_parse_iterator (parser_ctx_p, out_token_p); +#endif break; } } diff --git a/tests/jerry/regexp-web-compatibility.js b/tests/jerry/regexp-web-compatibility.js new file mode 100644 index 000000000..ffe0a97db --- /dev/null +++ b/tests/jerry/regexp-web-compatibility.js @@ -0,0 +1,193 @@ +// Copyright JS Foundation and other contributors, http://js.foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +function test_match(re, input, expected) +{ + var result = re.exec(input); + + if (expected === null) + { + assert (result === null); + return; + } + + assert (result !== null); + assert (result.length === expected.length); + + for (var idx = 0; idx < result.length; idx++) + { + assert (result[idx] === expected[idx]); + } +} + +test_match (new RegExp ("A{1,2}"), "B", null); +test_match (new RegExp ("A{1,2}"), "", null); +test_match (new RegExp ("A{1,2}"), "A", ["A"]); +test_match (new RegExp ("A{1,2}"), "AA", ["AA"]); +test_match (new RegExp ("A{1,2}"), "AAA", ["AA"]); + +test_match (new RegExp ("A{1,}"), "B", null); +test_match (new RegExp ("A{1,}"), "GA", ["A"]); +test_match (new RegExp ("A{1,}"), "FAAAW", ["AAA"]); +test_match (new RegExp ("A{1,}"), "FAdAAW", ["A"]); + +/* Test web compatiblity (ES2015 Annex B 1.4) */ + +test_match (new RegExp ("A{1,2"), "A", null); +test_match (new RegExp ("A{1,2"), "AA", null); +test_match (new RegExp ("A{1,2"), "A{1,2", ["A{1,2"]); +test_match (new RegExp ("A{1,2"), "AA{1,2", ["A{1,2"]); + +test_match (new RegExp ("A{1,"), "A", null); +test_match (new RegExp ("A{1,"), "AA", null); +test_match (new RegExp ("A{1,"), "A{1,", ["A{1,"]); +test_match (new RegExp ("A{1,"), "A{1,2", ["A{1,"]); +test_match (new RegExp ("A{1,"), "AA{1,2", ["A{1,"]); + +test_match (new RegExp ("A{1"), "A", null); +test_match (new RegExp ("A{1"), "AA", null); +test_match (new RegExp ("A{1"), "A{1,", ["A{1"]); +test_match (new RegExp ("A{1"), "A{1,2", ["A{1"]); +test_match (new RegExp ("A{1"), "AA{1,2", ["A{1"]); + +test_match (new RegExp ("A{"), "A", null); +test_match (new RegExp ("A{"), "AA", null); +test_match (new RegExp ("A{"), "A{,", ["A{"]); +test_match (new RegExp ("A{"), "A{1,", ["A{"]); +test_match (new RegExp ("A{"), "A{1,2", ["A{"]); +test_match (new RegExp ("A{"), "AA{1,2", ["A{"]); + +test_match (new RegExp ("{"), "", null); +test_match (new RegExp ("{"), "AA", null); +test_match (new RegExp ("{"), "{,", ["{"]); +test_match (new RegExp ("{"), "{1,", ["{"]); +test_match (new RegExp ("{"), "{1,2", ["{"]); +test_match (new RegExp ("{"), "A{1,2", ["{"]); + +test_match (new RegExp ("{{2,3}"), "", null); +test_match (new RegExp ("{{2,3}"), "AA", null); +test_match (new RegExp ("{{2,3}"), "{{,", ["{{"]); +test_match (new RegExp ("{{2,3}"), "{{{,", ["{{{"]); +test_match (new RegExp ("{{2,3}"), "{{{{,", ["{{{"]); + +test_match (new RegExp ("{{2,3"), "{{{{,", null); +test_match (new RegExp ("{{2,3"), "{{2,3,", ["{{2,3"]); + +test_match (/A{1,2/, "A", null); +test_match (/A{1,2/, "AA", null); +test_match (/A{1,2/, "A{1,2", ["A{1,2"]); +test_match (/A{1,2/, "AA{1,2", ["A{1,2"]); + +test_match (/A{1,/, "A", null); +test_match (/A{1,/, "AA", null); +test_match (/A{1,/, "A{1,", ["A{1,"]); +test_match (/A{1,/, "A{1,2", ["A{1,"]); +test_match (/A{1,/, "AA{1,2", ["A{1,"]); + +test_match (/A{1/, "A", null); +test_match (/A{1/, "AA", null); +test_match (/A{1/, "A{1,", ["A{1"]); +test_match (/A{1/, "A{1,2", ["A{1"]); +test_match (/A{1/, "AA{1,2", ["A{1"]); + +test_match (/A{/, "A", null); +test_match (/A{/, "AA", null); +test_match (/A{/, "A{,", ["A{"]); +test_match (/A{/, "A{1,", ["A{"]); +test_match (/A{/, "A{1,2", ["A{"]); +test_match (/A{/, "AA{1,2", ["A{"]); + +test_match (/{/, "", null); +test_match (/{/, "AA", null); +test_match (/{/, "{,", ["{"]); +test_match (/{/, "{1,", ["{"]); +test_match (/{/, "{1,2", ["{"]); +test_match (/{/, "A{1,2", ["{"]); + +test_match (/{{2,3}/, "", null); +test_match (/{{2,3}/, "AA", null); +test_match (/{{2,3}/, "{{,", ["{{"]); +test_match (/{{2,3}/, "{{{,", ["{{{"]); +test_match (/{{2,3}/, "{{{{,", ["{{{"]); + +test_match (/{{2,3/, "{{{{,", null); +test_match (/{{2,3/, "{{2,3,", ["{{2,3"]); + +try { + new RegExp ("["); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + eval ("/[/"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + new RegExp ("("); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + eval ("/(/"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +test_match (new RegExp("\s+{3,4"), "s+{3,4", null); +test_match (new RegExp("\s+{3,4"), "s{3,4", ["s{3,4"]); +test_match (new RegExp("\s+{3,4"), "ss{3,4", ["ss{3,4"]); +test_match (new RegExp("\\s+{3,4"), " {3,4", [" {3,4"]); +test_match (new RegExp("\\s+{3,4"), " d{3,4", null); + +test_match (/s+{3,4/, "s+{3,4", null); +test_match (/s+{3,4/, "s{3,4", ["s{3,4"]); +test_match (/s+{3,4/, "ss{3,4", ["ss{3,4"]); +test_match (/\s+{3,4/, " {3,4", [" {3,4"]); +test_match (/\s+{3,4/, " d{3,4", null); + +try { + new RegExp ("\s+{3,4}"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + eval ("/\\s+{3,4}/"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + new RegExp ("a{2,3}{2,3}"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +} + +try { + eval ("/a{2,3}{2,3}/"); + assert (false); +} catch (ex) { + assert (ex instanceof SyntaxError); +}