Improve RegExp compatibility with web browsers (#3339)

The modification adds support to parse /A{/ like RegExps. That is: if the iterator is invalid it should be treated as normal character. This behaviour is defined in the ES2015 standard Annex B 1.4 point This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled (set to zero). JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
2025-12-15 16:29:21 +00:00 · 2019-11-21 10:59:21 +01:00 · 2019-11-21 10:59:21 +01:00 · aadfa13c38
commit aadfa13c38
parent 7f6f562adb
2 changed files with 222 additions and 14 deletions
--- a/jerry-core/parser/regexp/re-parser.c
+++ b/jerry-core/parser/regexp/re-parser.c
@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
    {
      return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
    }
    case LIT_CHAR_NULL:
    {
      out_token_p->type = RE_TOK_EOF;
      break;
    }
    case LIT_CHAR_LEFT_BRACE:
    {
 #if ENABLED (JERRY_REGEXP_STRICT_MODE)
      return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
 #else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */
      /* Make sure that the current '{' does not start an iterator.
       *
       * E.g: /\s+{3,4}/ should fail as there is nothing to iterate.
       *     However /\s+{3,4/ should be valid in web compatibility mode.
       */
      const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
      lit_utf8_decr (&parser_ctx_p->input_curr_p);
@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
      ecma_free_value (JERRY_CONTEXT (error_value));
      parser_ctx_p->input_curr_p = input_curr_p;
-
+      /* It was not an iterator, continue the parsing. */
 #endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
      /* FALLTHRU */
    }
    default:
    {
      out_token_p->type = RE_TOK_CHAR;
      out_token_p->value = ch;
 #if ENABLED (JERRY_REGEXP_STRICT_MODE)
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
 #else
      /* In case of compatiblity mode try the following:
       * 1. Try parsing an iterator after the character.
       * 2.a. If no error is reported: it was an iterator so return an empty value.
       * 2.b. If there was an error: it was not an iterator thus return the current position
       *      to the start of the iterator parsing and set the return value to the empty value.
       * 3. The next 're_parse_next_token' call will handle the further parsing of characters.
       */
      const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      if (!ecma_is_value_empty (ret_value))
@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
        parser_ctx_p->input_curr_p = input_curr_p;
        ret_value = ECMA_VALUE_EMPTY;
      }
-#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
+#endif
      break;
    }
    case LIT_CHAR_NULL:
    {
      out_token_p->type = RE_TOK_EOF;
      break;
    }
    default:
    {
      out_token_p->type = RE_TOK_CHAR;
      out_token_p->value = ch;
      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
      break;
    }
  }
--- a/tests/jerry/regexp-web-compatibility.js
+++ b/tests/jerry/regexp-web-compatibility.js
@ -0,0 +1,193 @@
 // Copyright JS Foundation and other contributors, http://js.foundation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 function test_match(re, input, expected)
 {
  var result = re.exec(input);
  if (expected === null)
  {
    assert (result === null);
    return;
  }
  assert (result !== null);
  assert (result.length === expected.length);
  for (var idx = 0; idx < result.length; idx++)
  {
    assert (result[idx] === expected[idx]);
  }
 }
 test_match (new RegExp ("A{1,2}"), "B", null);
 test_match (new RegExp ("A{1,2}"), "", null);
 test_match (new RegExp ("A{1,2}"), "A", ["A"]);
 test_match (new RegExp ("A{1,2}"), "AA", ["AA"]);
 test_match (new RegExp ("A{1,2}"), "AAA", ["AA"]);
 test_match (new RegExp ("A{1,}"), "B", null);
 test_match (new RegExp ("A{1,}"), "GA", ["A"]);
 test_match (new RegExp ("A{1,}"), "FAAAW", ["AAA"]);
 test_match (new RegExp ("A{1,}"), "FAdAAW", ["A"]);
 /* Test web compatiblity (ES2015 Annex B 1.4) */
 test_match (new RegExp ("A{1,2"), "A", null);
 test_match (new RegExp ("A{1,2"), "AA", null);
 test_match (new RegExp ("A{1,2"), "A{1,2", ["A{1,2"]);
 test_match (new RegExp ("A{1,2"), "AA{1,2", ["A{1,2"]);
 test_match (new RegExp ("A{1,"), "A", null);
 test_match (new RegExp ("A{1,"), "AA", null);
 test_match (new RegExp ("A{1,"), "A{1,", ["A{1,"]);
 test_match (new RegExp ("A{1,"), "A{1,2", ["A{1,"]);
 test_match (new RegExp ("A{1,"), "AA{1,2", ["A{1,"]);
 test_match (new RegExp ("A{1"), "A", null);
 test_match (new RegExp ("A{1"), "AA", null);
 test_match (new RegExp ("A{1"), "A{1,", ["A{1"]);
 test_match (new RegExp ("A{1"), "A{1,2", ["A{1"]);
 test_match (new RegExp ("A{1"), "AA{1,2", ["A{1"]);
 test_match (new RegExp ("A{"), "A", null);
 test_match (new RegExp ("A{"), "AA", null);
 test_match (new RegExp ("A{"), "A{,", ["A{"]);
 test_match (new RegExp ("A{"), "A{1,", ["A{"]);
 test_match (new RegExp ("A{"), "A{1,2", ["A{"]);
 test_match (new RegExp ("A{"), "AA{1,2", ["A{"]);
 test_match (new RegExp ("{"), "", null);
 test_match (new RegExp ("{"), "AA", null);
 test_match (new RegExp ("{"), "{,", ["{"]);
 test_match (new RegExp ("{"), "{1,", ["{"]);
 test_match (new RegExp ("{"), "{1,2", ["{"]);
 test_match (new RegExp ("{"), "A{1,2", ["{"]);
 test_match (new RegExp ("{{2,3}"), "", null);
 test_match (new RegExp ("{{2,3}"), "AA", null);
 test_match (new RegExp ("{{2,3}"), "{{,", ["{{"]);
 test_match (new RegExp ("{{2,3}"), "{{{,", ["{{{"]);
 test_match (new RegExp ("{{2,3}"), "{{{{,", ["{{{"]);
 test_match (new RegExp ("{{2,3"), "{{{{,", null);
 test_match (new RegExp ("{{2,3"), "{{2,3,", ["{{2,3"]);
 test_match (/A{1,2/, "A", null);
 test_match (/A{1,2/, "AA", null);
 test_match (/A{1,2/, "A{1,2", ["A{1,2"]);
 test_match (/A{1,2/, "AA{1,2", ["A{1,2"]);
 test_match (/A{1,/, "A", null);
 test_match (/A{1,/, "AA", null);
 test_match (/A{1,/, "A{1,", ["A{1,"]);
 test_match (/A{1,/, "A{1,2", ["A{1,"]);
 test_match (/A{1,/, "AA{1,2", ["A{1,"]);
 test_match (/A{1/, "A", null);
 test_match (/A{1/, "AA", null);
 test_match (/A{1/, "A{1,", ["A{1"]);
 test_match (/A{1/, "A{1,2", ["A{1"]);
 test_match (/A{1/, "AA{1,2", ["A{1"]);
 test_match (/A{/, "A", null);
 test_match (/A{/, "AA", null);
 test_match (/A{/, "A{,", ["A{"]);
 test_match (/A{/, "A{1,", ["A{"]);
 test_match (/A{/, "A{1,2", ["A{"]);
 test_match (/A{/, "AA{1,2", ["A{"]);
 test_match (/{/, "", null);
 test_match (/{/, "AA", null);
 test_match (/{/, "{,", ["{"]);
 test_match (/{/, "{1,", ["{"]);
 test_match (/{/, "{1,2", ["{"]);
 test_match (/{/, "A{1,2", ["{"]);
 test_match (/{{2,3}/, "", null);
 test_match (/{{2,3}/, "AA", null);
 test_match (/{{2,3}/, "{{,", ["{{"]);
 test_match (/{{2,3}/, "{{{,", ["{{{"]);
 test_match (/{{2,3}/, "{{{{,", ["{{{"]);
 test_match (/{{2,3/, "{{{{,", null);
 test_match (/{{2,3/, "{{2,3,", ["{{2,3"]);
 try {
    new RegExp ("[");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    eval ("/[/");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    new RegExp ("(");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    eval ("/(/");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 test_match (new RegExp("\s+{3,4"), "s+{3,4", null);
 test_match (new RegExp("\s+{3,4"), "s{3,4", ["s{3,4"]);
 test_match (new RegExp("\s+{3,4"), "ss{3,4", ["ss{3,4"]);
 test_match (new RegExp("\\s+{3,4"), "    {3,4", ["    {3,4"]);
 test_match (new RegExp("\\s+{3,4"), "   d{3,4", null);
 test_match (/s+{3,4/, "s+{3,4", null);
 test_match (/s+{3,4/, "s{3,4", ["s{3,4"]);
 test_match (/s+{3,4/, "ss{3,4", ["ss{3,4"]);
 test_match (/\s+{3,4/, "    {3,4", ["    {3,4"]);
 test_match (/\s+{3,4/, "   d{3,4", null);
 try {
    new RegExp ("\s+{3,4}");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    eval ("/\\s+{3,4}/");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    new RegExp ("a{2,3}{2,3}");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }
 try {
    eval ("/a{2,3}{2,3}/");
    assert (false);
 } catch (ex) {
    assert (ex instanceof SyntaxError);
 }