mammoth.js/lib/document-to-html.js
2016-06-09 22:32:30 +01:00

306 lines
10 KiB
JavaScript

var async = require("async");
var _ = require("underscore");
var promises = require("./promises");
var documents = require("./documents");
var htmlPaths = require("./html-paths");
var results = require("./results");
var images = require("./images");
var Html = require("./html");
var writers = require("./writers");
exports.DocumentConverter = DocumentConverter;
// TODO: investigate tidier solutions
if (typeof setImmediate === "undefined") {
var setImmediate = function(callback) {
setTimeout(callback, 0);
};
}
function DocumentConverter(options) {
return {
convertToHtml: function() {
var conversion = new DocumentConversion(options);
return conversion.convertToHtml.apply(this, arguments);
}
};
}
function DocumentConversion(options) {
var noteNumber = 1;
var noteReferences = [];
options = _.extend({ignoreEmptyParagraphs: true}, options);
var idPrefix = options.idPrefix === undefined ? "" : options.idPrefix;
var defaultParagraphStyle = htmlPaths.topLevelElement("p");
var styleMap = options.styleMap || [];
function convertToHtml(document) {
var messages = [];
return promises.nfcall(elementToHtml, document, messages).then(function(html) {
var writer = writers.writer({
prettyPrint: options.prettyPrint,
outputFormat: options.outputFormat
});
Html.write(writer, Html.simplify(html));
return new results.Result(writer.asString(), messages);
});
}
function convertElements(elements, messages, callback) {
async.mapSeries(elements, function(element, callback) {
return elementToHtml(element, messages, callback);
}, function(error, results) {
if (error) {
callback(error);
} else {
callback(null, _.flatten(results, true));
}
});
}
function elementToHtml(element, messages, callback) {
var handler = elementConverters[element.type];
if (handler) {
handler(element, messages, callback);
} else {
callback();
}
}
function convertParagraph(element, messages, callback) {
var style = styleForParagraph(element, messages);
setImmediate(function() {
convertElements(element.children, messages, function(error, children) {
if (!options.ignoreEmptyParagraphs) {
children = [Html.forceWrite].concat(children);
}
callback(null, Html.pathToNodes(style, children));
});
});
}
function styleForParagraph(element, messages) {
var style = findStyle(element);
if (style) {
return style.to;
} else {
if (element.styleId) {
messages.push(unrecognisedStyleWarning("paragraph", element));
}
return defaultParagraphStyle;
}
}
function convertRun(run, messages, callback) {
if (run.styleId) {
var style = findStyle(run);
if (!style) {
messages.push(unrecognisedStyleWarning("run", run));
}
}
convertElements(run.children, messages, function(error, children) {
var result = children;
if (run.isStrikethrough) {
result = convertRunProperty(result, "strikethrough", "s");
}
if (run.isUnderline) {
result = convertUnderline(result);
}
if (run.verticalAlignment === documents.verticalAlignment.subscript) {
result = wrapInNonFreshElement(result, "sub");
}
if (run.verticalAlignment === documents.verticalAlignment.superscript) {
result = wrapInNonFreshElement(result, "sup");
}
if (run.isItalic) {
result = convertRunProperty(result, "italic", "em");
}
if (run.isBold) {
result = convertRunProperty(result, "bold", "strong");
}
if (style) {
result = Html.pathToNodes(style.to, result);
}
callback(null, result);
});
}
function wrapInNonFreshElement(nodes, tagName) {
return [Html.nonFreshElement(tagName, {}, nodes)];
}
function convertUnderline(runHtml) {
var style = findStyle({type: "underline"});
if (style) {
return Html.pathToNodes(style.to, runHtml);
} else {
return runHtml;
}
}
function convertRunProperty(result, elementType, defaultTagName) {
var style = findStyle({type: elementType});
if (style) {
return Html.pathToNodes(style.to, result);
} else {
return wrapInNonFreshElement(result, defaultTagName);
}
}
function findStyle(element) {
for (var i = 0; i < styleMap.length; i++) {
if (styleMap[i].from.matches(element)) {
return styleMap[i];
}
}
}
var defaultConvertImage = images.imgElement(function(element) {
return element.read("base64").then(function(imageBuffer) {
return {
src: "data:" + element.contentType + ";base64," + imageBuffer
};
});
});
function recoveringConvertImage(convertImage) {
return function(image, messages, callback) {
convertImage(image, messages, function(error, result) {
if (error) {
messages.push(results.warning(error.message));
callback(null, []);
} else {
callback(null, result);
}
});
};
}
function noteHtmlId(note) {
return htmlId(note.noteType + "-" + note.noteId);
}
function noteRefHtmlId(note) {
return htmlId(note.noteType + "-ref-" + note.noteId);
}
function htmlId(suffix) {
return idPrefix + suffix;
}
function convertTable(element, messages, callback) {
wrapChildrenInFreshElement(element, "table", messages, callback);
}
function convertTableRow(element, messages, callback) {
wrapChildrenInFreshElement(element, "tr", messages, callback);
}
function convertTableCell(element, messages, callback) {
convertElements(element.children, messages, function(error, children) {
var attributes = {};
if (element.colSpan !== 1) {
attributes.colspan = element.colSpan.toString();
}
if (element.rowSpan !== 1) {
attributes.rowspan = element.rowSpan.toString();
}
callback(null, [
Html.freshElement("td", attributes, [Html.forceWrite].concat(children))
]);
});
}
function wrapChildrenInFreshElement(element, wrapElementName, messages, callback) {
convertElements(element.children, messages, function(error, children) {
callback(null, [
Html.freshElement(wrapElementName, {}, [Html.forceWrite].concat(children))
]);
});
}
var elementConverters = {
"document": function(document, messages, callback) {
convertElements(document.children, messages, function(error, children) {
var notes = noteReferences.map(function(noteReference) {
return document.notes.resolve(noteReference);
});
convertElements(notes, messages, function(error, notesNodes) {
callback(null, children.concat([
Html.freshElement("ol", {}, notesNodes)
]));
});
});
},
"paragraph": convertParagraph,
"run": convertRun,
"text": function(element, messages, callback) {
callback(null, [Html.text(element.value)]);
},
"tab": function(element, messages, callback) {
callback(null, [Html.text("\t")]);
},
"hyperlink": function(element, messages, callback) {
var href = element.anchor ? "#" + htmlId(element.anchor) : element.href;
convertElements(element.children, messages, function(error, children) {
callback(null, [Html.freshElement("a", {href: href}, children)]);
});
},
"bookmarkStart": function(element, messages, callback) {
var anchor = Html.freshElement("a", {
id: htmlId(element.name)
}, [Html.forceWrite]);
callback(null, [anchor]);
},
"noteReference": function(element, messages, callback) {
noteReferences.push(element);
var anchor = Html.freshElement("a", {
href: "#" + noteHtmlId(element),
id: noteRefHtmlId(element)
}, [Html.text("[" + (noteNumber++) + "]")]);
callback(null, [Html.freshElement("sup", {}, [anchor])]);
},
"note": function(element, messages, callback) {
convertElements(element.body, messages, function(error, children) {
var backLink = Html.elementWithTag(htmlPaths.element("p", {}, {fresh: false}), [
Html.text(" "),
Html.freshElement("a", {href: "#" + noteRefHtmlId(element)}, [Html.text("↑")]),
]);
var body = children.concat([backLink]);
callback(null, Html.freshElement("li", {id: noteHtmlId(element)}, body));
});
},
"image": recoveringConvertImage(options.convertImage || defaultConvertImage),
"table": convertTable,
"tableRow": convertTableRow,
"tableCell": convertTableCell,
"lineBreak": function(element, messages, callback) {
callback(null, [Html.selfClosingElement("br")]);
}
};
return {
convertToHtml: convertToHtml
};
}
function unrecognisedStyleWarning(type, element) {
return results.warning(
"Unrecognised " + type + " style: '" + element.styleName + "'" +
" (Style ID: " + element.styleId + ")"
);
}