mirror of
https://github.com/mwilliamson/mammoth.js.git
synced 2024-12-08 15:14:29 +00:00
157 lines
5.0 KiB
JavaScript
157 lines
5.0 KiB
JavaScript
exports.DocumentXmlReader = DocumentXmlReader;
|
|
|
|
var q = require("q");
|
|
var _ = require("underscore");
|
|
var path = require("path");
|
|
|
|
var documents = require("./documents");
|
|
var Result = require("./results").Result;
|
|
var warning = require("./results").warning;
|
|
|
|
|
|
function DocumentXmlReader(relationships, docxFile) {
|
|
function convertXmlToDocument(documentXml) {
|
|
var body = documentXml.root.first("w:body");
|
|
|
|
var result = readXmlElements(body.children)
|
|
.map(function(children) {
|
|
return new documents.Document(children);
|
|
});
|
|
result.document = result.value;
|
|
return result;
|
|
}
|
|
|
|
function readXmlElements(elements) {
|
|
var results = elements.map(readXmlElement);
|
|
var values = _.pluck(results, "value").filter(notNull);
|
|
var messages = _.flatten(_.pluck(results, "messages"), true);
|
|
return new Result(values, messages);
|
|
}
|
|
|
|
function readXmlElement(element) {
|
|
if (element.type === "element") {
|
|
var handler = xmlElementReaders[element.name];
|
|
if (handler) {
|
|
return handler(element);
|
|
} else if (!Object.prototype.hasOwnProperty.call(ignoreElements, element.name)) {
|
|
return new Result(
|
|
null,
|
|
[warning("An unrecognised element was ignored: " + element.name)]
|
|
);
|
|
}
|
|
}
|
|
return new Result(null);
|
|
}
|
|
|
|
var xmlElementReaders = {
|
|
"w:p": function(element) {
|
|
return readXmlElements(element.children)
|
|
.map(function(children) {
|
|
var properties = _.find(children, isParagraphProperties);
|
|
|
|
return new documents.Paragraph(
|
|
children.filter(negate(isParagraphProperties)),
|
|
properties
|
|
);
|
|
});
|
|
},
|
|
"w:pPr": function(element) {
|
|
var properties = {
|
|
type: "paragraphProperties"
|
|
};
|
|
|
|
var styleElement = element.first("w:pStyle");
|
|
if (styleElement) {
|
|
properties.styleName = styleElement.attributes["w:val"];
|
|
}
|
|
|
|
return new Result(properties);
|
|
},
|
|
"w:r": function(element) {
|
|
return readXmlElements(element.children)
|
|
.map(function(children) {
|
|
var properties = _.find(children, isRunProperties);
|
|
|
|
return new documents.Run(
|
|
children.filter(negate(isRunProperties)),
|
|
properties
|
|
);
|
|
});
|
|
},
|
|
"w:rPr": function(element) {
|
|
var properties = {
|
|
type: "runProperties"
|
|
};
|
|
|
|
var styleElement = element.first("w:rStyle");
|
|
if (styleElement) {
|
|
properties.styleName = styleElement.attributes["w:val"];
|
|
}
|
|
properties.isBold = !!element.first("w:b");
|
|
properties.isItalic = !!element.first("w:i");
|
|
|
|
return new Result(properties);
|
|
},
|
|
"w:t": function(element) {
|
|
return new Result(new documents.Text(element.children[0].value));
|
|
},
|
|
"w:hyperlink": function(element) {
|
|
var relationshipId = element.attributes["r:id"];
|
|
var href = relationships[relationshipId].target;
|
|
return readXmlElements(element.children)
|
|
.map(function(children) {
|
|
return new documents.Hyperlink(children, {href: href});
|
|
});
|
|
},
|
|
|
|
"w:drawing": function(element) {
|
|
return readXmlElement(element.first("wp:inline"));
|
|
},
|
|
"wp:inline": function(element) {
|
|
var relationshipId = element
|
|
.first("a:graphic")
|
|
.first("a:graphicData")
|
|
.first("pic:pic")
|
|
.first("pic:blipFill")
|
|
.first("a:blip")
|
|
.attributes["r:embed"];
|
|
var imagePath = relationships[relationshipId].target;
|
|
var readImage = docxFile.read.bind(docxFile, path.join("word", imagePath));
|
|
var altText = element.first("wp:docPr").attributes.descr;
|
|
|
|
var image = documents.Image(readImage, altText)
|
|
return new Result(image);
|
|
}
|
|
};
|
|
return {
|
|
convertXmlToDocument: convertXmlToDocument,
|
|
readXmlElement: readXmlElement
|
|
};
|
|
}
|
|
|
|
var ignoreElements = {
|
|
"w:bookmarkStart": true,
|
|
"w:bookmarkEnd": true,
|
|
"w:sectPr": true,
|
|
"w:proofErr": true,
|
|
"w:lastRenderedPageBreak": true
|
|
};
|
|
|
|
function notNull(value) {
|
|
return value !== null;
|
|
}
|
|
|
|
function isParagraphProperties(element) {
|
|
return element.type === "paragraphProperties";
|
|
}
|
|
|
|
function isRunProperties(element) {
|
|
return element.type === "runProperties";
|
|
}
|
|
|
|
function negate(predicate) {
|
|
return function(value) {
|
|
return !predicate(value);
|
|
};
|
|
}
|