mammoth.js/lib/style-reader.js
2016-02-25 19:20:16 +00:00

242 lines
7.2 KiB
JavaScript

var _ = require("underscore");
var lop = require("lop");
var RegexTokeniser = lop.RegexTokeniser;
var documentMatchers = require("./document-matchers");
var htmlPaths = require("./html-paths");
var results = require("../lib/results");
exports.readHtmlPath = readHtmlPath;
exports.readDocumentMatcher = readDocumentMatcher;
exports.readStyle = readStyle;
function readStyle(string) {
return parseString(styleRule, string);
}
function createStyleRule() {
return lop.rules.sequence(
lop.rules.sequence.capture(documentMatcherRule()),
lop.rules.tokenOfType("whitespace"),
lop.rules.tokenOfType("arrow"),
lop.rules.sequence.capture(lop.rules.optional(lop.rules.sequence(
lop.rules.tokenOfType("whitespace"),
lop.rules.sequence.capture(htmlPathRule())
).head()))
).map(function(documentMatcher, htmlPath) {
return {
from: documentMatcher,
to: htmlPath.valueOrElse(htmlPaths.empty)
};
});
}
function readDocumentMatcher(string) {
return parseString(documentMatcherRule(), string);
}
function documentMatcherRule() {
var sequence = lop.rules.sequence;
var identifierToConstant = function(identifier, constant) {
return lop.rules.then(
lop.rules.token("identifier", identifier),
function() {
return constant;
}
);
};
var paragraphRule = identifierToConstant("p", documentMatchers.paragraph);
var runRule = identifierToConstant("r", documentMatchers.run);
var elementTypeRule = lop.rules.firstOf("p or r",
paragraphRule,
runRule
);
var styleIdRule = lop.rules.then(
classRule,
function(styleId) {
return {styleId: styleId};
}
);
var stringRule = lop.rules.then(
lop.rules.tokenOfType("string"),
function(value) {
return value;
}
);
var styleNameRule = lop.rules.then(
lop.rules.sequence(
lop.rules.tokenOfType("open-square-bracket"),
lop.rules.token("identifier", "style-name"),
lop.rules.tokenOfType("equals"),
lop.rules.sequence.capture(stringRule),
lop.rules.tokenOfType("close-square-bracket")
).head(),
function(styleName) {
return {styleName: styleName};
}
);
var listTypeRule = lop.rules.firstOf("list type",
identifierToConstant("ordered-list", {isOrdered: true}),
identifierToConstant("unordered-list", {isOrdered: false})
);
var listRule = sequence(
lop.rules.tokenOfType("colon"),
sequence.capture(listTypeRule),
sequence.cut(),
lop.rules.tokenOfType("open-paren"),
sequence.capture(integerRule),
lop.rules.tokenOfType("close-paren")
).map(function(listType, levelNumber) {
return {
list: {
isOrdered: listType.isOrdered,
levelIndex: levelNumber - 1
}
};
});
var matcherSuffix = lop.rules.firstOf("matcher suffix",
styleIdRule,
styleNameRule,
listRule
);
var matcherSuffixes = lop.rules.zeroOrMore(matcherSuffix);
var paragraphOrRun = sequence(
sequence.capture(elementTypeRule),
sequence.capture(matcherSuffixes)
).map(function(createMatcher, suffixes) {
var matcherOptions = {};
suffixes.forEach(function(suffix) {
_.extend(matcherOptions, suffix);
});
return createMatcher(matcherOptions);
});
var bold = identifierToConstant("b", documentMatchers.bold);
var italic = identifierToConstant("i", documentMatchers.italic);
var underline = identifierToConstant("u", documentMatchers.underline);
var strikethrough = identifierToConstant("strike", documentMatchers.strikethrough);
return lop.rules.firstOf("element type",
paragraphOrRun,
bold,
italic,
underline,
strikethrough
);
}
function readHtmlPath(string) {
return parseString(htmlPathRule(), string);
}
function htmlPathRule() {
var capture = lop.rules.sequence.capture;
var whitespaceRule = lop.rules.tokenOfType("whitespace");
var freshRule = lop.rules.then(
lop.rules.optional(lop.rules.sequence(
lop.rules.tokenOfType("colon"),
lop.rules.token("identifier", "fresh")
)),
function(option) {
return option.map(function() {
return true;
}).valueOrElse(false);
}
);
var tagNamesRule = lop.rules.oneOrMoreWithSeparator(
identifierRule,
lop.rules.tokenOfType("choice")
);
var styleElementRule = lop.rules.sequence(
capture(tagNamesRule),
capture(lop.rules.zeroOrMore(classRule)),
capture(freshRule)
).map(function(tagName, classNames, fresh) {
var attributes = {};
var options = {};
if (classNames.length > 0) {
attributes["class"] = classNames.join(" ");
}
if (fresh) {
options.fresh = true;
}
return htmlPaths.element(tagName, attributes, options);
});
return lop.rules.then(
lop.rules.zeroOrMoreWithSeparator(
styleElementRule,
lop.rules.sequence(
whitespaceRule,
lop.rules.tokenOfType("gt"),
whitespaceRule
)
),
htmlPaths.elements
);
}
var identifierRule = lop.rules.tokenOfType("identifier");
var integerRule = lop.rules.tokenOfType("integer");
var classRule = lop.rules.sequence(
lop.rules.tokenOfType("dot"),
lop.rules.sequence.capture(identifierRule)
).head();
function parseString(rule, string) {
var tokens = tokenise(string);
var parser = lop.Parser();
var parseResult = parser.parseTokens(rule, tokens);
if (parseResult.isSuccess()) {
return results.success(parseResult.value());
} else {
return new results.Result(null, [results.warning(describeFailure(string, parseResult))]);
}
}
function describeFailure(input, parseResult) {
return "Did not understand this style mapping, so ignored it: " + input + "\n" +
parseResult.errors().map(describeError).join("\n");
}
function describeError(error) {
return "Error was at character number " + error.characterNumber() + ": " +
"Expected " + error.expected + " but got " + error.actual;
}
function tokenise(string) {
var tokeniser = new RegexTokeniser([
{name: "identifier", regex: /([a-zA-Z][a-zA-Z0-9\-]*)/},
{name: "dot", regex: /\./},
{name: "colon", regex: /:/},
{name: "gt", regex: />/},
{name: "whitespace", regex: /\s+/},
{name: "arrow", regex: /=>/},
{name: "equals", regex: /=/},
{name: "open-paren", regex: /\(/},
{name: "close-paren", regex: /\)/},
{name: "open-square-bracket", regex: /\[/},
{name: "close-square-bracket", regex: /\]/},
{name: "string", regex: /'([^']*)'/},
{name: "integer", regex: /([0-9]+)/},
{name: "choice", regex: /\|/}
]);
return tokeniser.tokenise(string);
}
var styleRule = createStyleRule();