mirror of
https://github.com/mwilliamson/mammoth.js.git
synced 2024-12-08 15:14:29 +00:00
242 lines
7.2 KiB
JavaScript
242 lines
7.2 KiB
JavaScript
var _ = require("underscore");
|
|
var lop = require("lop");
|
|
var RegexTokeniser = lop.RegexTokeniser;
|
|
|
|
var documentMatchers = require("./document-matchers");
|
|
var htmlPaths = require("./html-paths");
|
|
var results = require("../lib/results");
|
|
|
|
exports.readHtmlPath = readHtmlPath;
|
|
exports.readDocumentMatcher = readDocumentMatcher;
|
|
exports.readStyle = readStyle;
|
|
|
|
|
|
function readStyle(string) {
|
|
return parseString(styleRule, string);
|
|
}
|
|
|
|
function createStyleRule() {
|
|
return lop.rules.sequence(
|
|
lop.rules.sequence.capture(documentMatcherRule()),
|
|
lop.rules.tokenOfType("whitespace"),
|
|
lop.rules.tokenOfType("arrow"),
|
|
lop.rules.sequence.capture(lop.rules.optional(lop.rules.sequence(
|
|
lop.rules.tokenOfType("whitespace"),
|
|
lop.rules.sequence.capture(htmlPathRule())
|
|
).head()))
|
|
).map(function(documentMatcher, htmlPath) {
|
|
return {
|
|
from: documentMatcher,
|
|
to: htmlPath.valueOrElse(htmlPaths.empty)
|
|
};
|
|
});
|
|
}
|
|
|
|
function readDocumentMatcher(string) {
|
|
return parseString(documentMatcherRule(), string);
|
|
}
|
|
|
|
function documentMatcherRule() {
|
|
var sequence = lop.rules.sequence;
|
|
|
|
var identifierToConstant = function(identifier, constant) {
|
|
return lop.rules.then(
|
|
lop.rules.token("identifier", identifier),
|
|
function() {
|
|
return constant;
|
|
}
|
|
);
|
|
};
|
|
|
|
var paragraphRule = identifierToConstant("p", documentMatchers.paragraph);
|
|
var runRule = identifierToConstant("r", documentMatchers.run);
|
|
|
|
var elementTypeRule = lop.rules.firstOf("p or r",
|
|
paragraphRule,
|
|
runRule
|
|
);
|
|
|
|
var styleIdRule = lop.rules.then(
|
|
classRule,
|
|
function(styleId) {
|
|
return {styleId: styleId};
|
|
}
|
|
);
|
|
|
|
var stringRule = lop.rules.then(
|
|
lop.rules.tokenOfType("string"),
|
|
function(value) {
|
|
return value;
|
|
}
|
|
);
|
|
|
|
var styleNameRule = lop.rules.then(
|
|
lop.rules.sequence(
|
|
lop.rules.tokenOfType("open-square-bracket"),
|
|
lop.rules.token("identifier", "style-name"),
|
|
lop.rules.tokenOfType("equals"),
|
|
lop.rules.sequence.capture(stringRule),
|
|
lop.rules.tokenOfType("close-square-bracket")
|
|
).head(),
|
|
function(styleName) {
|
|
return {styleName: styleName};
|
|
}
|
|
);
|
|
|
|
|
|
var listTypeRule = lop.rules.firstOf("list type",
|
|
identifierToConstant("ordered-list", {isOrdered: true}),
|
|
identifierToConstant("unordered-list", {isOrdered: false})
|
|
);
|
|
var listRule = sequence(
|
|
lop.rules.tokenOfType("colon"),
|
|
sequence.capture(listTypeRule),
|
|
sequence.cut(),
|
|
lop.rules.tokenOfType("open-paren"),
|
|
sequence.capture(integerRule),
|
|
lop.rules.tokenOfType("close-paren")
|
|
).map(function(listType, levelNumber) {
|
|
return {
|
|
list: {
|
|
isOrdered: listType.isOrdered,
|
|
levelIndex: levelNumber - 1
|
|
}
|
|
};
|
|
});
|
|
var matcherSuffix = lop.rules.firstOf("matcher suffix",
|
|
styleIdRule,
|
|
styleNameRule,
|
|
listRule
|
|
);
|
|
var matcherSuffixes = lop.rules.zeroOrMore(matcherSuffix);
|
|
|
|
var paragraphOrRun = sequence(
|
|
sequence.capture(elementTypeRule),
|
|
sequence.capture(matcherSuffixes)
|
|
).map(function(createMatcher, suffixes) {
|
|
var matcherOptions = {};
|
|
suffixes.forEach(function(suffix) {
|
|
_.extend(matcherOptions, suffix);
|
|
});
|
|
return createMatcher(matcherOptions);
|
|
});
|
|
|
|
var bold = identifierToConstant("b", documentMatchers.bold);
|
|
var italic = identifierToConstant("i", documentMatchers.italic);
|
|
var underline = identifierToConstant("u", documentMatchers.underline);
|
|
var strikethrough = identifierToConstant("strike", documentMatchers.strikethrough);
|
|
|
|
return lop.rules.firstOf("element type",
|
|
paragraphOrRun,
|
|
bold,
|
|
italic,
|
|
underline,
|
|
strikethrough
|
|
);
|
|
}
|
|
|
|
function readHtmlPath(string) {
|
|
return parseString(htmlPathRule(), string);
|
|
}
|
|
|
|
function htmlPathRule() {
|
|
var capture = lop.rules.sequence.capture;
|
|
var whitespaceRule = lop.rules.tokenOfType("whitespace");
|
|
var freshRule = lop.rules.then(
|
|
lop.rules.optional(lop.rules.sequence(
|
|
lop.rules.tokenOfType("colon"),
|
|
lop.rules.token("identifier", "fresh")
|
|
)),
|
|
function(option) {
|
|
return option.map(function() {
|
|
return true;
|
|
}).valueOrElse(false);
|
|
}
|
|
);
|
|
|
|
var tagNamesRule = lop.rules.oneOrMoreWithSeparator(
|
|
identifierRule,
|
|
lop.rules.tokenOfType("choice")
|
|
);
|
|
|
|
var styleElementRule = lop.rules.sequence(
|
|
capture(tagNamesRule),
|
|
capture(lop.rules.zeroOrMore(classRule)),
|
|
capture(freshRule)
|
|
).map(function(tagName, classNames, fresh) {
|
|
var attributes = {};
|
|
var options = {};
|
|
if (classNames.length > 0) {
|
|
attributes["class"] = classNames.join(" ");
|
|
}
|
|
if (fresh) {
|
|
options.fresh = true;
|
|
}
|
|
return htmlPaths.element(tagName, attributes, options);
|
|
});
|
|
|
|
return lop.rules.then(
|
|
lop.rules.zeroOrMoreWithSeparator(
|
|
styleElementRule,
|
|
lop.rules.sequence(
|
|
whitespaceRule,
|
|
lop.rules.tokenOfType("gt"),
|
|
whitespaceRule
|
|
)
|
|
),
|
|
htmlPaths.elements
|
|
);
|
|
}
|
|
|
|
var identifierRule = lop.rules.tokenOfType("identifier");
|
|
var integerRule = lop.rules.tokenOfType("integer");
|
|
|
|
var classRule = lop.rules.sequence(
|
|
lop.rules.tokenOfType("dot"),
|
|
lop.rules.sequence.capture(identifierRule)
|
|
).head();
|
|
|
|
function parseString(rule, string) {
|
|
var tokens = tokenise(string);
|
|
var parser = lop.Parser();
|
|
var parseResult = parser.parseTokens(rule, tokens);
|
|
if (parseResult.isSuccess()) {
|
|
return results.success(parseResult.value());
|
|
} else {
|
|
return new results.Result(null, [results.warning(describeFailure(string, parseResult))]);
|
|
}
|
|
}
|
|
|
|
function describeFailure(input, parseResult) {
|
|
return "Did not understand this style mapping, so ignored it: " + input + "\n" +
|
|
parseResult.errors().map(describeError).join("\n");
|
|
}
|
|
|
|
function describeError(error) {
|
|
return "Error was at character number " + error.characterNumber() + ": " +
|
|
"Expected " + error.expected + " but got " + error.actual;
|
|
}
|
|
|
|
function tokenise(string) {
|
|
var tokeniser = new RegexTokeniser([
|
|
{name: "identifier", regex: /([a-zA-Z][a-zA-Z0-9\-]*)/},
|
|
{name: "dot", regex: /\./},
|
|
{name: "colon", regex: /:/},
|
|
{name: "gt", regex: />/},
|
|
{name: "whitespace", regex: /\s+/},
|
|
{name: "arrow", regex: /=>/},
|
|
{name: "equals", regex: /=/},
|
|
{name: "open-paren", regex: /\(/},
|
|
{name: "close-paren", regex: /\)/},
|
|
{name: "open-square-bracket", regex: /\[/},
|
|
{name: "close-square-bracket", regex: /\]/},
|
|
{name: "string", regex: /'([^']*)'/},
|
|
{name: "integer", regex: /([0-9]+)/},
|
|
{name: "choice", regex: /\|/}
|
|
]);
|
|
return tokeniser.tokenise(string);
|
|
}
|
|
|
|
|
|
var styleRule = createStyleRule();
|