mammoth.js/lib/document-xml-reader.js
2013-04-09 21:36:35 +01:00

157 lines
5.0 KiB
JavaScript

exports.DocumentXmlReader = DocumentXmlReader;
var q = require("q");
var _ = require("underscore");
var path = require("path");
var documents = require("./documents");
var Result = require("./results").Result;
var warning = require("./results").warning;
function DocumentXmlReader(relationships, docxFile) {
function convertXmlToDocument(documentXml) {
var body = documentXml.root.first("w:body");
var result = readXmlElements(body.children)
.map(function(children) {
return new documents.Document(children);
});
result.document = result.value;
return result;
}
function readXmlElements(elements) {
var results = elements.map(readXmlElement);
var values = _.pluck(results, "value").filter(notNull);
var messages = _.flatten(_.pluck(results, "messages"), true);
return new Result(values, messages);
}
function readXmlElement(element) {
if (element.type === "element") {
var handler = xmlElementReaders[element.name];
if (handler) {
return handler(element);
} else if (!Object.prototype.hasOwnProperty.call(ignoreElements, element.name)) {
return new Result(
null,
[warning("An unrecognised element was ignored: " + element.name)]
);
}
}
return new Result(null);
}
var xmlElementReaders = {
"w:p": function(element) {
return readXmlElements(element.children)
.map(function(children) {
var properties = _.find(children, isParagraphProperties);
return new documents.Paragraph(
children.filter(negate(isParagraphProperties)),
properties
);
});
},
"w:pPr": function(element) {
var properties = {
type: "paragraphProperties"
};
var styleElement = element.first("w:pStyle");
if (styleElement) {
properties.styleName = styleElement.attributes["w:val"];
}
return new Result(properties);
},
"w:r": function(element) {
return readXmlElements(element.children)
.map(function(children) {
var properties = _.find(children, isRunProperties);
return new documents.Run(
children.filter(negate(isRunProperties)),
properties
);
});
},
"w:rPr": function(element) {
var properties = {
type: "runProperties"
};
var styleElement = element.first("w:rStyle");
if (styleElement) {
properties.styleName = styleElement.attributes["w:val"];
}
properties.isBold = !!element.first("w:b");
properties.isItalic = !!element.first("w:i");
return new Result(properties);
},
"w:t": function(element) {
return new Result(new documents.Text(element.children[0].value));
},
"w:hyperlink": function(element) {
var relationshipId = element.attributes["r:id"];
var href = relationships[relationshipId].target;
return readXmlElements(element.children)
.map(function(children) {
return new documents.Hyperlink(children, {href: href});
});
},
"w:drawing": function(element) {
return readXmlElement(element.first("wp:inline"));
},
"wp:inline": function(element) {
var relationshipId = element
.first("a:graphic")
.first("a:graphicData")
.first("pic:pic")
.first("pic:blipFill")
.first("a:blip")
.attributes["r:embed"];
var imagePath = relationships[relationshipId].target;
var readImage = docxFile.read.bind(docxFile, path.join("word", imagePath));
var altText = element.first("wp:docPr").attributes.descr;
var image = documents.Image(readImage, altText)
return new Result(image);
}
};
return {
convertXmlToDocument: convertXmlToDocument,
readXmlElement: readXmlElement
};
}
var ignoreElements = {
"w:bookmarkStart": true,
"w:bookmarkEnd": true,
"w:sectPr": true,
"w:proofErr": true,
"w:lastRenderedPageBreak": true
};
function notNull(value) {
return value !== null;
}
function isParagraphProperties(element) {
return element.type === "paragraphProperties";
}
function isRunProperties(element) {
return element.type === "runProperties";
}
function negate(predicate) {
return function(value) {
return !predicate(value);
};
}