From aadfa13c3877366636d00531b873e4d0ef149ec2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20G=C3=A1l?= <pgal.u-szeged@partner.samsung.com>
Date: Thu, 21 Nov 2019 10:59:21 +0100
Subject: [PATCH] Improve RegExp compatibility with web browsers (#3339)

The modification adds support to parse /A{/ like RegExps.
That is: if the iterator is invalid it should be treated as normal
character.

This behaviour is defined in the ES2015 standard Annex B 1.4 point

This only works if the `JERRY_REGEXP_STRICT_MODE` is disabled
(set to zero).

JerryScript-DCO-1.0-Signed-off-by: Peter Gal pgal.u-szeged@partner.samsung.com
---
 jerry-core/parser/regexp/re-parser.c    |  43 ++++--
 tests/jerry/regexp-web-compatibility.js | 193 ++++++++++++++++++++++++
 2 files changed, 222 insertions(+), 14 deletions(-)
 create mode 100644 tests/jerry/regexp-web-compatibility.js

diff --git a/jerry-core/parser/regexp/re-parser.c b/jerry-core/parser/regexp/re-parser.c
index c766fd450..32a2fd2a2 100644
--- a/jerry-core/parser/regexp/re-parser.c
+++ b/jerry-core/parser/regexp/re-parser.c
@@ -630,11 +630,22 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
     {
       return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
     }
+    case LIT_CHAR_NULL:
+    {
+      out_token_p->type = RE_TOK_EOF;
+      break;
+    }
     case LIT_CHAR_LEFT_BRACE:
     {
 #if ENABLED (JERRY_REGEXP_STRICT_MODE)
       return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid RegExp token."));
 #else /* !ENABLED (JERRY_REGEXP_STRICT_MODE) */
+
+      /* Make sure that the current '{' does not start an iterator.
+       *
+       * E.g: /\s+{3,4}/ should fail as there is nothing to iterate.
+       *     However /\s+{3,4/ should be valid in web compatibility mode.
+       */
       const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
 
       lit_utf8_decr (&parser_ctx_p->input_curr_p);
@@ -648,9 +659,25 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
       ecma_free_value (JERRY_CONTEXT (error_value));
 
       parser_ctx_p->input_curr_p = input_curr_p;
-
+      /* It was not an iterator, continue the parsing. */
+#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
+      /* FALLTHRU */
+    }
+    default:
+    {
       out_token_p->type = RE_TOK_CHAR;
       out_token_p->value = ch;
+#if ENABLED (JERRY_REGEXP_STRICT_MODE)
+      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
+#else
+      /* In case of compatiblity mode try the following:
+       * 1. Try parsing an iterator after the character.
+       * 2.a. If no error is reported: it was an iterator so return an empty value.
+       * 2.b. If there was an error: it was not an iterator thus return the current position
+       *      to the start of the iterator parsing and set the return value to the empty value.
+       * 3. The next 're_parse_next_token' call will handle the further parsing of characters.
+       */
+      const lit_utf8_byte_t *input_curr_p = parser_ctx_p->input_curr_p;
       ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
 
       if (!ecma_is_value_empty (ret_value))
@@ -659,19 +686,7 @@ re_parse_next_token (re_parser_ctx_t *parser_ctx_p, /**< RegExp parser context *
         parser_ctx_p->input_curr_p = input_curr_p;
         ret_value = ECMA_VALUE_EMPTY;
       }
-#endif /* ENABLED (JERRY_REGEXP_STRICT_MODE) */
-      break;
-    }
-    case LIT_CHAR_NULL:
-    {
-      out_token_p->type = RE_TOK_EOF;
-      break;
-    }
-    default:
-    {
-      out_token_p->type = RE_TOK_CHAR;
-      out_token_p->value = ch;
-      ret_value = re_parse_iterator (parser_ctx_p, out_token_p);
+#endif
       break;
     }
   }
diff --git a/tests/jerry/regexp-web-compatibility.js b/tests/jerry/regexp-web-compatibility.js
new file mode 100644
index 000000000..ffe0a97db
--- /dev/null
+++ b/tests/jerry/regexp-web-compatibility.js
@@ -0,0 +1,193 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+function test_match(re, input, expected)
+{
+  var result = re.exec(input);
+
+  if (expected === null)
+  {
+    assert (result === null);
+    return;
+  }
+
+  assert (result !== null);
+  assert (result.length === expected.length);
+
+  for (var idx = 0; idx < result.length; idx++)
+  {
+    assert (result[idx] === expected[idx]);
+  }
+}
+
+test_match (new RegExp ("A{1,2}"), "B", null);
+test_match (new RegExp ("A{1,2}"), "", null);
+test_match (new RegExp ("A{1,2}"), "A", ["A"]);
+test_match (new RegExp ("A{1,2}"), "AA", ["AA"]);
+test_match (new RegExp ("A{1,2}"), "AAA", ["AA"]);
+
+test_match (new RegExp ("A{1,}"), "B", null);
+test_match (new RegExp ("A{1,}"), "GA", ["A"]);
+test_match (new RegExp ("A{1,}"), "FAAAW", ["AAA"]);
+test_match (new RegExp ("A{1,}"), "FAdAAW", ["A"]);
+
+/* Test web compatiblity (ES2015 Annex B 1.4) */
+
+test_match (new RegExp ("A{1,2"), "A", null);
+test_match (new RegExp ("A{1,2"), "AA", null);
+test_match (new RegExp ("A{1,2"), "A{1,2", ["A{1,2"]);
+test_match (new RegExp ("A{1,2"), "AA{1,2", ["A{1,2"]);
+
+test_match (new RegExp ("A{1,"), "A", null);
+test_match (new RegExp ("A{1,"), "AA", null);
+test_match (new RegExp ("A{1,"), "A{1,", ["A{1,"]);
+test_match (new RegExp ("A{1,"), "A{1,2", ["A{1,"]);
+test_match (new RegExp ("A{1,"), "AA{1,2", ["A{1,"]);
+
+test_match (new RegExp ("A{1"), "A", null);
+test_match (new RegExp ("A{1"), "AA", null);
+test_match (new RegExp ("A{1"), "A{1,", ["A{1"]);
+test_match (new RegExp ("A{1"), "A{1,2", ["A{1"]);
+test_match (new RegExp ("A{1"), "AA{1,2", ["A{1"]);
+
+test_match (new RegExp ("A{"), "A", null);
+test_match (new RegExp ("A{"), "AA", null);
+test_match (new RegExp ("A{"), "A{,", ["A{"]);
+test_match (new RegExp ("A{"), "A{1,", ["A{"]);
+test_match (new RegExp ("A{"), "A{1,2", ["A{"]);
+test_match (new RegExp ("A{"), "AA{1,2", ["A{"]);
+
+test_match (new RegExp ("{"), "", null);
+test_match (new RegExp ("{"), "AA", null);
+test_match (new RegExp ("{"), "{,", ["{"]);
+test_match (new RegExp ("{"), "{1,", ["{"]);
+test_match (new RegExp ("{"), "{1,2", ["{"]);
+test_match (new RegExp ("{"), "A{1,2", ["{"]);
+
+test_match (new RegExp ("{{2,3}"), "", null);
+test_match (new RegExp ("{{2,3}"), "AA", null);
+test_match (new RegExp ("{{2,3}"), "{{,", ["{{"]);
+test_match (new RegExp ("{{2,3}"), "{{{,", ["{{{"]);
+test_match (new RegExp ("{{2,3}"), "{{{{,", ["{{{"]);
+
+test_match (new RegExp ("{{2,3"), "{{{{,", null);
+test_match (new RegExp ("{{2,3"), "{{2,3,", ["{{2,3"]);
+
+test_match (/A{1,2/, "A", null);
+test_match (/A{1,2/, "AA", null);
+test_match (/A{1,2/, "A{1,2", ["A{1,2"]);
+test_match (/A{1,2/, "AA{1,2", ["A{1,2"]);
+
+test_match (/A{1,/, "A", null);
+test_match (/A{1,/, "AA", null);
+test_match (/A{1,/, "A{1,", ["A{1,"]);
+test_match (/A{1,/, "A{1,2", ["A{1,"]);
+test_match (/A{1,/, "AA{1,2", ["A{1,"]);
+
+test_match (/A{1/, "A", null);
+test_match (/A{1/, "AA", null);
+test_match (/A{1/, "A{1,", ["A{1"]);
+test_match (/A{1/, "A{1,2", ["A{1"]);
+test_match (/A{1/, "AA{1,2", ["A{1"]);
+
+test_match (/A{/, "A", null);
+test_match (/A{/, "AA", null);
+test_match (/A{/, "A{,", ["A{"]);
+test_match (/A{/, "A{1,", ["A{"]);
+test_match (/A{/, "A{1,2", ["A{"]);
+test_match (/A{/, "AA{1,2", ["A{"]);
+
+test_match (/{/, "", null);
+test_match (/{/, "AA", null);
+test_match (/{/, "{,", ["{"]);
+test_match (/{/, "{1,", ["{"]);
+test_match (/{/, "{1,2", ["{"]);
+test_match (/{/, "A{1,2", ["{"]);
+
+test_match (/{{2,3}/, "", null);
+test_match (/{{2,3}/, "AA", null);
+test_match (/{{2,3}/, "{{,", ["{{"]);
+test_match (/{{2,3}/, "{{{,", ["{{{"]);
+test_match (/{{2,3}/, "{{{{,", ["{{{"]);
+
+test_match (/{{2,3/, "{{{{,", null);
+test_match (/{{2,3/, "{{2,3,", ["{{2,3"]);
+
+try {
+    new RegExp ("[");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    eval ("/[/");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    new RegExp ("(");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    eval ("/(/");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+test_match (new RegExp("\s+{3,4"), "s+{3,4", null);
+test_match (new RegExp("\s+{3,4"), "s{3,4", ["s{3,4"]);
+test_match (new RegExp("\s+{3,4"), "ss{3,4", ["ss{3,4"]);
+test_match (new RegExp("\\s+{3,4"), "    {3,4", ["    {3,4"]);
+test_match (new RegExp("\\s+{3,4"), "   d{3,4", null);
+
+test_match (/s+{3,4/, "s+{3,4", null);
+test_match (/s+{3,4/, "s{3,4", ["s{3,4"]);
+test_match (/s+{3,4/, "ss{3,4", ["ss{3,4"]);
+test_match (/\s+{3,4/, "    {3,4", ["    {3,4"]);
+test_match (/\s+{3,4/, "   d{3,4", null);
+
+try {
+    new RegExp ("\s+{3,4}");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    eval ("/\\s+{3,4}/");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    new RegExp ("a{2,3}{2,3}");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}
+
+try {
+    eval ("/a{2,3}{2,3}/");
+    assert (false);
+} catch (ex) {
+    assert (ex instanceof SyntaxError);
+}