Add missing end-of-string checks to RegExp parser in unicode mode (#3875)

Fixes #3870. Fixes #3871. JerryScript-DCO-1.0-Signed-off-by: Dániel Bátyai dbatyai@inf.u-szeged.hu
2025-12-15 16:29:21 +00:00 · 2020-06-08 11:09:08 +02:00 · 2020-06-08 11:09:08 +02:00 · fed1b0c827
commit fed1b0c827
parent b7a641c124
3 changed files with 52 additions and 18 deletions
--- a/jerry-core/parser/regexp/re-parser.c
+++ b/jerry-core/parser/regexp/re-parser.c
@ -612,30 +612,28 @@ re_parse_char_escape (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context
 #if ENABLED (JERRY_ES2015)
      if (re_ctx_p->flags & RE_FLAG_UNICODE)
      {
-        if (*re_ctx_p->input_curr_p == LIT_CHAR_LEFT_BRACE)
+        if (re_ctx_p->input_curr_p + 1 < re_ctx_p->input_end_p
+            && re_ctx_p->input_curr_p[0] == LIT_CHAR_LEFT_BRACE
+            && lit_char_is_hex_digit (re_ctx_p->input_curr_p[1]))
        {
-          re_ctx_p->input_curr_p++;
+          lit_code_point_t cp = lit_char_hex_to_int (re_ctx_p->input_curr_p[1]);
+          re_ctx_p->input_curr_p += 2;

-          if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
+          while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
          {
-            lit_code_point_t cp = lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
+            cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);

-            while (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && lit_char_is_hex_digit (*re_ctx_p->input_curr_p))
+            if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
            {
-              cp = cp * 16 + lit_char_hex_to_int (*re_ctx_p->input_curr_p++);
-
-              if (JERRY_UNLIKELY (cp > LIT_UNICODE_CODE_POINT_MAX))
-              {
-                return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
-              }
+              return ecma_raise_syntax_error (ECMA_ERR_MSG ("Invalid unicode escape sequence"));
            }
+          }

-            if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
-            {
-              re_ctx_p->input_curr_p++;
-              re_ctx_p->token.value = cp;
-              break;
-            }
+          if (re_ctx_p->input_curr_p < re_ctx_p->input_end_p && *re_ctx_p->input_curr_p == LIT_CHAR_RIGHT_BRACE)
+          {
+            re_ctx_p->input_curr_p++;
+            re_ctx_p->token.value = cp;
+            break;
          }
        }

@ -867,7 +865,8 @@ re_parse_next_token (re_compiler_ctx_t *re_ctx_p) /**< RegExp compiler context *

 #if ENABLED (JERRY_ES2015)
      if (re_ctx_p->flags & RE_FLAG_UNICODE
-          && lit_is_code_point_utf16_high_surrogate (ch))
+          && lit_is_code_point_utf16_high_surrogate (ch)
+          && re_ctx_p->input_curr_p < re_ctx_p->input_end_p)
      {
        const ecma_char_t next = lit_cesu8_peek_next (re_ctx_p->input_curr_p);
        if (lit_is_code_point_utf16_low_surrogate (next))
--- a/tests/jerry/es2015/regression-test-issue-3870.js
+++ b/tests/jerry/es2015/regression-test-issue-3870.js
@ -0,0 +1,15 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+assert (new RegExp("\ud800", "u").exec("\ud800")[0] === "\ud800");
--- a/tests/jerry/es2015/regression-test-issue-3871.js
+++ b/tests/jerry/es2015/regression-test-issue-3871.js
@ -0,0 +1,20 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+try {
+  new RegExp('"\\u', 'u');
+  assert (false);
+} catch (e) {
+  assert (e instanceof SyntaxError);
+}