mirror of
https://github.com/jerryscript-project/jerryscript.git
synced 2026-01-27 04:38:04 +00:00
Support \u200C \u200D unicode characters (#3266)
JerryScript-DCO-1.0-Signed-off-by: Robert Fancsik frobert@inf.u-szeged.hu
This commit is contained in:
parent
6a342fcdd6
commit
eee41ec734
@ -171,11 +171,11 @@ static const uint16_t lit_unicode_non_letter_ident_part_interval_sps[] JERRY_ATT
|
||||
0x17e0, 0x180b, 0x1810, 0x1885, 0x1920, 0x1930, 0x1946, 0x19d0, 0x1a17, 0x1a55,
|
||||
0x1a60, 0x1a7f, 0x1a90, 0x1ab0, 0x1b00, 0x1b34, 0x1b50, 0x1b6b, 0x1b80, 0x1ba1,
|
||||
0x1bb0, 0x1be6, 0x1c24, 0x1c40, 0x1c50, 0x1cd0, 0x1cd4, 0x1cf2, 0x1cf8, 0x1dc0,
|
||||
0x1dfb, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099, 0xa620, 0xa674,
|
||||
0xa69e, 0xa6f0, 0xa823, 0xa880, 0xa8b4, 0xa8d0, 0xa8e0, 0xa900, 0xa926, 0xa947,
|
||||
0xa980, 0xa9b3, 0xa9d0, 0xa9f0, 0xaa29, 0xaa4c, 0xaa50, 0xaa7b, 0xaab2, 0xaab7,
|
||||
0xaabe, 0xaaeb, 0xaaf5, 0xabe3, 0xabec, 0xabf0, 0xfe00, 0xfe20, 0xfe33, 0xfe4d,
|
||||
0xff10
|
||||
0x1dfb, 0x200c, 0x203f, 0x20d0, 0x20e5, 0x2cef, 0x2de0, 0x302a, 0x3099, 0xa620,
|
||||
0xa674, 0xa69e, 0xa6f0, 0xa823, 0xa880, 0xa8b4, 0xa8d0, 0xa8e0, 0xa900, 0xa926,
|
||||
0xa947, 0xa980, 0xa9b3, 0xa9d0, 0xa9f0, 0xaa29, 0xaa4c, 0xaa50, 0xaa7b, 0xaab2,
|
||||
0xaab7, 0xaabe, 0xaaeb, 0xaaf5, 0xabe3, 0xabec, 0xabf0, 0xfe00, 0xfe20, 0xfe33,
|
||||
0xfe4d, 0xff10
|
||||
};
|
||||
|
||||
/**
|
||||
@ -201,11 +201,11 @@ static const uint8_t lit_unicode_non_letter_ident_part_interval_lengths[] JERRY_
|
||||
0x0009, 0x0002, 0x0009, 0x0001, 0x000b, 0x000b, 0x0009, 0x0009, 0x0004, 0x0009,
|
||||
0x001c, 0x000a, 0x0009, 0x000d, 0x0004, 0x0010, 0x0009, 0x0008, 0x0002, 0x000c,
|
||||
0x0009, 0x000d, 0x0013, 0x0009, 0x0009, 0x0002, 0x0014, 0x0002, 0x0001, 0x0035,
|
||||
0x0004, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001, 0x0009, 0x0009,
|
||||
0x0001, 0x0001, 0x0004, 0x0001, 0x0011, 0x0009, 0x0011, 0x0009, 0x0007, 0x000c,
|
||||
0x0003, 0x000d, 0x0009, 0x0009, 0x000d, 0x0001, 0x0009, 0x0002, 0x0002, 0x0001,
|
||||
0x0001, 0x0004, 0x0001, 0x0007, 0x0001, 0x0009, 0x000f, 0x000f, 0x0001, 0x0002,
|
||||
0x0009
|
||||
0x0004, 0x0001, 0x0001, 0x000c, 0x000b, 0x0002, 0x001f, 0x0005, 0x0001, 0x0009,
|
||||
0x0009, 0x0001, 0x0001, 0x0004, 0x0001, 0x0011, 0x0009, 0x0011, 0x0009, 0x0007,
|
||||
0x000c, 0x0003, 0x000d, 0x0009, 0x0009, 0x000d, 0x0001, 0x0009, 0x0002, 0x0002,
|
||||
0x0001, 0x0001, 0x0004, 0x0001, 0x0007, 0x0001, 0x0009, 0x000f, 0x000f, 0x0001,
|
||||
0x0002, 0x0009
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
31
tests/jerry/unicode-format-control-characters.js
Normal file
31
tests/jerry/unicode-format-control-characters.js
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright JS Foundation and other contributors, http://js.foundation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
function checkSyntax (str) {
|
||||
try {
|
||||
eval (str);
|
||||
assert (false);
|
||||
} catch (e) {
|
||||
assert (e instanceof SyntaxError);
|
||||
}
|
||||
}
|
||||
|
||||
// Only \u200C-Zero width non-joiner, and \u200D-Zero width joiner are allowed
|
||||
|
||||
checkSyntax ("_\u200b\u200d");
|
||||
checkSyntax ("_\u200c\u200e");
|
||||
|
||||
var _\u200c\u200d = 5;
|
||||
|
||||
assert (_\u200c\u200d === 5);
|
||||
@ -137,6 +137,14 @@ class UnicodeCategorizer(object):
|
||||
if zero_width_space not in separators:
|
||||
bisect.insort(separators, int(zero_width_space))
|
||||
|
||||
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.1 format-control characters
|
||||
non_letters = self._categories['non_letters']
|
||||
zero_width_non_joiner = 0x200C
|
||||
zero_width_joiner = 0x200D
|
||||
|
||||
bisect.insort(non_letters, int(zero_width_non_joiner))
|
||||
bisect.insort(non_letters, int(zero_width_joiner))
|
||||
|
||||
return self._categories['letters'], self._categories['non_letters'], self._categories['separators']
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user