Update parser to support editions syntax

This commit is contained in:
dcode 2024-06-03 20:12:44 +02:00
parent 722b6359ad
commit ffe8bbfdbf
12 changed files with 167 additions and 74 deletions

2
cli/package-lock.json generated
View File

@ -36,7 +36,7 @@
},
"..": {
"name": "protobufjs",
"version": "7.1.2",
"version": "7.3.0",
"dev": true,
"hasInstallScript": true,
"license": "BSD-3-Clause",

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
syntax = "proto3";

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
syntax = "proto3";

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
syntax = "proto3";

View File

@ -44,7 +44,7 @@
"prof": "node bench/prof",
"test": "npm run test:sources && npm run test:types",
"test:sources": "tape -r ./lib/tape-adapter tests/*.js tests/node/*.js",
"test:types": "tsc tests/comp_typescript.ts --lib es2015 --esModuleInterop --strictNullChecks --experimentalDecorators --emitDecoratorMetadata && tsc tests/data/test.js.ts --lib es2015 --esModuleInterop --noEmit --strictNullChecks && tsc tests/data/*.ts --lib es2015 --esModuleInterop --noEmit --strictNullChecks",
"test:types": "tsc tests/comp_typescript.ts --lib es2015 --esModuleInterop --strictNullChecks --experimentalDecorators --emitDecoratorMetadata && tsc tests/data/test.js.ts --lib es2015 --esModuleInterop --noEmit --strictNullChecks && tsc -p tests/data/tsconfig.json --lib es2015 --esModuleInterop --noEmit --strictNullChecks",
"make": "npm run lint:sources && npm run build && npm run lint:types && node ./scripts/gentests.js && npm test"
},
"dependencies": {

View File

@ -24,8 +24,7 @@ var base10Re = /^[1-9][0-9]*$/,
base8NegRe = /^-?0[0-7]+$/,
numberRe = /^(?![eE])[0-9]*(?:\.[0-9]*)?(?:[eE][+-]?[0-9]+)?$/,
nameRe = /^[a-zA-Z_][a-zA-Z_0-9]*$/,
typeRefRe = /^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/,
fqTypeRefRe = /^(?:\.[a-zA-Z_][a-zA-Z_0-9]*)+$/;
typeRefRe = /^(?:\.?[a-zA-Z_][a-zA-Z_0-9]*)(?:\.[a-zA-Z_][a-zA-Z_0-9]*)*$/;
/**
* Result object returned from {@link parse}.
@ -82,6 +81,7 @@ function parse(source, root, options) {
imports,
weakImports,
syntax,
edition,
isProto3 = false;
var ptr = root;
@ -111,7 +111,59 @@ function parse(source, root, options) {
return values.join("");
}
function readValue(acceptTypeRef) {
function readIdentifier(optionalFirstToken) {
var token = optionalFirstToken || next();
var identifier = token;
if (token === ".") { // fully qualified name
token = next();
identifier += token;
}
/* istanbul ignore if */
if (!nameRe.test(token))
throw illegal(identifier, "identifier");
while (skip(".", true)) {
if (skip("(", true)) {
push(".");
push("(");
break;
}
identifier += ".";
token = next();
identifier += token;
/* istanbul ignore if */
if (!nameRe.test(token))
throw illegal(identifier, "identifier");
}
return identifier;
}
function readOptionIdentifier() {
var identifier = "";
do {
if (skip("(", true)) {
identifier += "(";
identifier += readIdentifier();
identifier += next();
/* istanbul ignore if */
if (!identifier.endsWith(")"))
throw illegal(identifier, "identifier");
} else {
identifier += readIdentifier();
}
if (!skip(".", true)) {
break;
}
identifier += ".";
} while (true); // eslint-disable-line
return identifier;
}
function readValue(acceptIdentifier) {
var token = next();
switch (token) {
case "'":
@ -128,8 +180,8 @@ function parse(source, root, options) {
} catch (e) {
/* istanbul ignore else */
if (acceptTypeRef && typeRefRe.test(token))
return token;
if (acceptIdentifier && nameRe.test(token))
return readIdentifier(token); // `ENUM_VALUE`
/* istanbul ignore next */
throw illegal(token, "value");
@ -170,6 +222,9 @@ function parse(source, root, options) {
sign = -1;
token = token.substring(1);
}
if (skip(".", true)) {
token += "." + next();
}
switch (token) {
case "inf": case "INF": case "Inf":
return sign * Infinity;
@ -224,12 +279,7 @@ function parse(source, root, options) {
if (pkg !== undefined)
throw illegal("package");
pkg = next();
/* istanbul ignore if */
if (!typeRefRe.test(pkg))
throw illegal(pkg, "name");
pkg = readIdentifier();
ptr = ptr.define(pkg);
skip(";");
}
@ -260,12 +310,26 @@ function parse(source, root, options) {
isProto3 = syntax === "proto3";
/* istanbul ignore if */
if (!isProto3 && syntax !== "proto2")
if (!isProto3 && syntax !== "proto2" || edition)
throw illegal(syntax, "syntax");
skip(";");
}
function parseEdition() {
skip("=");
edition = readString();
isProto3 = true;
/* istanbul ignore if */
if (syntax)
throw illegal(syntax, "edition");
syntax = "proto3";
isProto3 = true;
skip(";");
}
function parseCommon(parent, token) {
switch (token) {
@ -360,7 +424,7 @@ function parse(source, root, options) {
default:
/* istanbul ignore if */
if (!isProto3 || !typeRefRe.test(token))
if (!isProto3 || !nameRe.test(token))
throw illegal(token);
push(token);
@ -372,26 +436,12 @@ function parse(source, root, options) {
}
function parseField(parent, rule, extend) {
var type = next();
if (type === "group") {
if (skip("group", true)) {
parseGroup(parent, rule);
return;
}
// Type names can consume multiple tokens, in multiple variants:
// package.subpackage field tokens: "package.subpackage" [TYPE NAME ENDS HERE] "field"
// package . subpackage field tokens: "package" "." "subpackage" [TYPE NAME ENDS HERE] "field"
// package. subpackage field tokens: "package." "subpackage" [TYPE NAME ENDS HERE] "field"
// package .subpackage field tokens: "package" ".subpackage" [TYPE NAME ENDS HERE] "field"
// Keep reading tokens until we get a type name with no period at the end,
// and the next token does not start with a period.
while (type.endsWith(".") || peek().startsWith(".")) {
type += next();
}
/* istanbul ignore if */
if (!typeRefRe.test(type))
throw illegal(type, "type");
var type = readIdentifier();
var name = next();
/* istanbul ignore if */
@ -489,19 +539,14 @@ function parse(source, root, options) {
function parseMapField(parent) {
skip("<");
var keyType = next();
var keyType = readIdentifier();
/* istanbul ignore if */
if (types.mapKey[keyType] === undefined)
throw illegal(keyType, "type");
skip(",");
var valueType = next();
/* istanbul ignore if */
if (!typeRefRe.test(valueType))
throw illegal(valueType, "type");
var valueType = readIdentifier();
skip(">");
var name = next();
@ -602,30 +647,26 @@ function parse(source, root, options) {
}
function parseOption(parent, token) {
var isCustom = skip("(", true);
var identifier = readOptionIdentifier();
/* istanbul ignore if */
if (!typeRefRe.test(token = next()))
throw illegal(token, "name");
var name = token;
var option = name;
// Historically, `(some.option).prop` has been interpreted as a property
// assignment on `some.option`. While the parser understands additional
// option syntax nowadays, there's still no structural knowledge of the
// respective extension. Backwards compatibility can be retained, though:
var optionName = identifier;
var propStart = identifier.lastIndexOf(").");
var propName;
if (isCustom) {
skip(")");
name = "(" + name + ")";
option = name;
token = peek();
if (fqTypeRefRe.test(token)) {
propName = token.slice(1); //remove '.' before property name
name += token;
next();
if (~propStart) {
token = identifier.substring(propStart + 2);
if (typeRefRe.test(token)) {
propName = token;
optionName = identifier.substring(0, propStart + 1);
}
}
skip("=");
var optionValue = parseOptionValue(parent, name);
setParsedOption(parent, option, optionValue, propName);
var optionValue = parseOptionValue(parent, identifier);
setParsedOption(parent, optionName, optionValue, propName);
}
function parseOptionValue(parent, name) {
@ -638,6 +679,7 @@ function parse(source, root, options) {
if (!nameRe.test(token = next())) {
throw illegal(token, "name");
}
/* istanbul ignore if */
if (token === null) {
throw illegal(token, "end of input");
}
@ -750,20 +792,12 @@ function parse(source, root, options) {
if (skip("stream", true))
requestStream = true;
/* istanbul ignore if */
if (!typeRefRe.test(token = next()))
throw illegal(token);
requestType = token;
requestType = readIdentifier();
skip(")"); skip("returns"); skip("(");
if (skip("stream", true))
responseStream = true;
/* istanbul ignore if */
if (!typeRefRe.test(token = next()))
throw illegal(token);
responseType = token;
responseType = readIdentifier();
skip(")");
var method = new Method(name, type, requestType, responseType, requestStream, responseStream);
@ -782,11 +816,7 @@ function parse(source, root, options) {
}
function parseExtension(parent, token) {
/* istanbul ignore if */
if (!typeRefRe.test(token = next()))
throw illegal(token, "reference");
token = readIdentifier();
var reference = token;
ifBlock(null, function parseExtension_block(token) {
switch (token) {
@ -807,7 +837,7 @@ function parse(source, root, options) {
default:
/* istanbul ignore if */
if (!isProto3 || !typeRefRe.test(token))
if (!isProto3 || !nameRe.test(token))
throw illegal(token);
push(token);
parseField(parent, "optional", reference);
@ -847,6 +877,15 @@ function parse(source, root, options) {
parseSyntax();
break;
case "edition":
/* istanbul ignore if */
if (!head)
throw illegal(token);
parseEdition();
break;
case "option":
parseOption(ptr, token);
@ -872,6 +911,7 @@ function parse(source, root, options) {
"imports" : imports,
weakImports : weakImports,
syntax : syntax,
edition : edition,
root : root
};
}

View File

@ -1,7 +1,7 @@
"use strict";
module.exports = tokenize;
var delimRe = /[\s{}=;:[\],'"()<>]/g,
var delimRe = /[\s{}=;:[\],'"()<>.]/g,
stringDoubleRe = /(?:"([^"\\]*(?:\\.[^"\\]*)*)")/g,
stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g;

42
tests/comp_edition.js Normal file
View File

@ -0,0 +1,42 @@
var tape = require("tape");
var protobuf = require("..");
var proto = "edition = \"2023\";\
import \"google/protobuf/cpp_features.proto\";\
import \"google/protobuf/go_features.proto\";\
import \"google/protobuf/java_features.proto\";\
option features.field_presence = EXPLICIT;\
option features.enum_type = CLOSED;\
option features.repeated_field_encoding = EXPANDED;\
option features.json_format = LEGACY_BEST_EFFORT;\
option features.utf8_validation = NONE;\
option features.(pb.cpp).legacy_closed_enum = true;\
option features.(pb.go).legacy_unmarshal_json_enum = true;\
option features.(pb.java).legacy_closed_enum = true;\
message A {\
repeated int32 b = 1 [features.repeated_field_encoding = EXPANDED];\
}";
tape.test("edition", function(test) {
var result = protobuf.parse(proto);
test.equal(result.edition, "2023", "should parse edition");
test.equal(result.syntax, "proto3", "should fall back to proto3 for now");
var root = result.root;
root.resolveAll();
test.pass("should resolve without errors");
test.same(root.options, {
'features.field_presence': 'EXPLICIT',
'features.enum_type': 'CLOSED',
'features.repeated_field_encoding': 'EXPANDED',
'features.json_format': 'LEGACY_BEST_EFFORT',
'features.utf8_validation': 'NONE',
'features.(pb.cpp).legacy_closed_enum': true,
'features.(pb.go).legacy_unmarshal_json_enum': true,
'features.(pb.java).legacy_closed_enum': true
}, "should parse file-level edition options");
test.same(root.lookup("A.b").options, {
'features.repeated_field_encoding': 'EXPANDED'
}, "should parse field-level edition options");
test.end();
});

5
tests/data/tsconfig.json Normal file
View File

@ -0,0 +1,5 @@
{
"include": [
"*.ts"
]
}