diff --git a/docs/accessibility.md b/docs/accessibility.md index e2ceab7..e42c3a1 100644 --- a/docs/accessibility.md +++ b/docs/accessibility.md @@ -200,6 +200,43 @@ Example of a structure tree with options specified: ]) ]); +### Automatic Marking and Structure Construction for Text + +The `text()` method accepts a `structParent` option which you can use to specify a structure +element to add each paragraph to. It will mark each paragraph of content, create a structure +element for it, and then add it to the parent element you provided. It will use the `P` type, +unless you specify a different type with a `structType` option. + +Example of creating structure automatically with `text()`: + + // Create a section, add it to the document structure, then add paragraphs to it + const section = doc.struct('Sect'); + doc.addStructure(section); + doc.text("Foo. \nBar. ", { structParent: section }); + + // Equivalent code if performed manually + const section = doc.struct('Sect'); + doc.addStructure(section); + section.add(doc.struct('P', [ doc.markStructureContent('P') ])); + doc.text("Foo. "); + section.add(doc.struct('P', [ doc.markStructureContent('P') ])); + doc.text("Bar. "); + +The `list()` method also accepts a `structParent` option. By default, it add list items +(type `LI`) to the parent, each of which contains a label (type `Lbl`, which holds the bullet, +number, or letter) and a body (type `LBody`, which holds the actual item content). You can +override the default types with a `structTypes` option, which is a list: +`[ itemType, labelType, bodyType ]`. You can make any of the types `null` to omit that +part of the structure (i.e. to add labels and bodies directly to the parent, and/or to collapse +the label and body into a single element). + +Example of creating structure automatically with `list()`: + + // Create a list, add it to the structure tree, then add items to it + const list = doc.struct('List'); + someElement.add(list); + doc.list(["Foo. ", "Bar. "], { structParent: list }); + ## Tags and Structure Element Types Here are the tags and structure element types which are defined in Tagged PDF. You must diff --git a/lib/line_wrapper.js b/lib/line_wrapper.js index 5eb4cec..b1e05a3 100644 --- a/lib/line_wrapper.js +++ b/lib/line_wrapper.js @@ -222,7 +222,7 @@ class LineWrapper extends EventEmitter { textWidth = this.wordWidth(buffer + this.ellipsis); // remove characters from the buffer until the ellipsis fits - // to avoid inifinite loop need to stop while-loop if buffer is empty string + // to avoid infinite loop need to stop while-loop if buffer is empty string while (buffer && textWidth > this.lineWidth) { buffer = buffer.slice(0, -1).replace(/\s+$/, ''); textWidth = this.wordWidth(buffer + this.ellipsis); diff --git a/lib/mixins/text.js b/lib/mixins/text.js index 43903ab..94e711c 100644 --- a/lib/mixins/text.js +++ b/lib/mixins/text.js @@ -44,12 +44,20 @@ export default { text = text.replace(/\s{2,}/g, ' '); } + const addStructure = () => { + if (options.structParent) { + options.structParent.add(this.struct(options.structType || 'P', + [ this.markStructureContent(options.structType || 'P') ])); + } + }; + // word wrapping if (options.width) { let wrapper = this._wrapper; if (!wrapper) { wrapper = new LineWrapper(this, options); wrapper.on('line', lineCallback); + wrapper.on('firstLine', addStructure); } this._wrapper = options.continued ? wrapper : null; @@ -59,6 +67,7 @@ export default { // render paragraphs as single lines } else { for (let line of text.split('\n')) { + addStructure(); lineCallback(line, options); } } @@ -150,6 +159,22 @@ export default { level = 1; let i = 0; wrapper.on('firstLine', () => { + let item, itemType, labelType, bodyType; + if (options.structParent) { + if (options.structTypes) { + [ itemType, labelType, bodyType ] = options.structTypes; + } else { + [ itemType, labelType, bodyType ] = [ 'LI', 'Lbl', 'LBody' ]; + } + } + + if (itemType) { + item = this.struct(itemType); + options.structParent.add(item); + } else if (options.structParent) { + item = options.structParent; + } + let l; if ((l = levels[i++]) !== level) { const diff = itemIndent * (l - level); @@ -158,14 +183,27 @@ export default { level = l; } + if (item && (labelType || bodyType)) { + item.add(this.struct(labelType || bodyType, + [ this.markStructureContent(labelType || bodyType) ])); + } switch (listType) { case 'bullet': this.circle(this.x - indent + r, this.y + midLine, r); - return this.fill(); + this.fill(); + break; case 'numbered': case 'lettered': var text = label(numbers[i - 1]); - return this._fragment(text, this.x - indent, this.y, options); + this._fragment(text, this.x - indent, this.y, options); + break; + } + + if (item && labelType && bodyType) { + item.add(this.struct(bodyType, [ this.markStructureContent(bodyType) ])); + } + if (item && item !== options.structParent) { + item.end(); } }); diff --git a/tests/unit/markings.spec.js b/tests/unit/markings.spec.js index 282b1dc..315fbf3 100644 --- a/tests/unit/markings.spec.js +++ b/tests/unit/markings.spec.js @@ -275,56 +275,32 @@ EMC ]); expect(docData).toContainChunk([ `12 0 obj`, - `<< -/S /Section -/K [11 0 R] -/P 8 0 R ->>`, + `<<\n/S /Section\n/K [11 0 R]\n/P 8 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `11 0 obj`, - `<< -/S /P -/K [0 10 0 R 2] -/Pg 7 0 R -/P 12 0 R ->>`, + `<<\n/S /P\n/K [0 10 0 R 2]\n/Pg 7 0 R\n/P 12 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `10 0 obj`, - `<< -/S /Link -/K [1] -/Pg 7 0 R -/P 11 0 R ->>`, + `<<\n/S /Link\n/K [1]\n/Pg 7 0 R\n/P 11 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `14 0 obj`, - `<< -/S /Section -/K [13 0 R] -/P 8 0 R ->>`, + `<<\n/S /Section\n/K [13 0 R]\n/P 8 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `13 0 obj`, - `<< -/S /P -/K [3] -/Pg 7 0 R -/P 14 0 R ->>`, + `<<\n/S /P\n/K [3]\n/Pg 7 0 R\n/P 14 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `9 0 obj`, - `<< ->>`, + `<<\n>>`, `endobj` ]); }); @@ -379,56 +355,32 @@ EMC ]); expect(docData).toContainChunk([ `10 0 obj`, - `<< -/S /Section -/P 8 0 R -/K [13 0 R] ->>`, + `<<\n/S /Section\n/P 8 0 R\n/K [13 0 R]\n>>`, `endobj` ]); expect(docData).toContainChunk([ `13 0 obj`, - `<< -/S /P -/P 10 0 R -/K [0 12 0 R 2] -/Pg 7 0 R ->>`, + `<<\n/S /P\n/P 10 0 R\n/K [0 12 0 R 2]\n/Pg 7 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `12 0 obj`, - `<< -/S /Link -/K [1] -/Pg 7 0 R -/P 13 0 R ->>`, + `<<\n/S /Link\n/K [1]\n/Pg 7 0 R\n/P 13 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `11 0 obj`, - `<< -/S /Section -/P 8 0 R -/K [14 0 R] ->>`, + `<<\n/S /Section\n/P 8 0 R\n/K [14 0 R]\n>>`, `endobj` ]); expect(docData).toContainChunk([ `14 0 obj`, - `<< -/S /P -/K [3] -/Pg 7 0 R -/P 11 0 R ->>`, + `<<\n/S /P\n/K [3]\n/Pg 7 0 R\n/P 11 0 R\n>>`, `endobj` ]); expect(docData).toContainChunk([ `9 0 obj`, - `<< ->>`, + `<<\n>>`, `endobj` ]); }); @@ -460,7 +412,9 @@ EMC `endobj` ]); }); + }); + describe('accessible document', () => { test('identified as accessible', () => { document = new PDFDocument({ info: { @@ -490,9 +444,7 @@ EMC ]); expect(docData).toContainChunk([ `5 0 obj`, - `<< -/Marked true ->>`, + `<<\n/Marked true\n>>`, `endobj` ]); expect(docData).toContainChunk([ @@ -522,4 +474,178 @@ EMC ]); }); }); + + describe('text integration', () => { + test('adds paragraphs to structure', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/P << +/MCID 0 +>> BDC +q +1 0 0 -1 0 792 cm +BT +1 0 0 1 72 711.384 Tm +/F1 12 Tf +[<50> 40 <6172> 10 <6167> 10 <72> 10 <6170682031> 0] TJ +ET +Q +EMC +/P << +/MCID 1 +>> BDC +q +1 0 0 -1 0 792 cm +BT +1 0 0 1 72 697.512 Tm +/F1 12 Tf +[<50> 40 <6172> 10 <6167> 10 <72> 10 <6170682032> 0] TJ +ET +Q +EMC +`, + 'binary' + ); + + const section = document.struct('Sect'); + document.addStructure(section); + document.text("Paragraph 1\nParagraph 2", { structParent: section }); + document.end(); + + expect(docData).toContainChunk([ + `5 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + expect(docData).toContainChunk([ + '11 0 obj', + '<<\n/S /P\n/K [0]\n/Pg 7 0 R\n/P 8 0 R\n>>', + 'endobj', + ]); + expect(docData).toContainChunk([ + '13 0 obj', + '<<\n/S /P\n/K [1]\n/Pg 7 0 R\n/P 8 0 R\n>>', + 'endobj', + ]); + expect(docData).toContainChunk([ + '8 0 obj', + '<<\n/S /Sect\n/P 9 0 R\n/K [11 0 R 13 0 R]\n>>', + 'endobj', + ]); + }); + + test('adds list items to structure', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/Lbl << +/MCID 0 +>> BDC +72 76.5 m +72 74.843146 73.343146 73.5 75 73.5 c +76.656854 73.5 78 74.843146 78 76.5 c +78 78.156854 76.656854 79.5 75 79.5 c +73.343146 79.5 72 78.156854 72 76.5 c +h +f +EMC +/LBody << +/MCID 1 +>> BDC +q +1 0 0 -1 0 792 cm +BT +1 0 0 1 87 711.384 Tm +/F1 12 Tf +[<4974656d2031> 0] TJ +ET +Q +EMC +/Lbl << +/MCID 2 +>> BDC +72 90.372 m +72 88.715146 73.343146 87.372 75 87.372 c +76.656854 87.372 78 88.715146 78 90.372 c +78 92.028854 76.656854 93.372 75 93.372 c +73.343146 93.372 72 92.028854 72 90.372 c +h +f +EMC +/LBody << +/MCID 3 +>> BDC +q +1 0 0 -1 0 792 cm +BT +1 0 0 1 87 697.512 Tm +/F1 12 Tf +[<4974656d2032> 0] TJ +ET +Q +EMC +`, + 'binary' + ); + + const list = document.struct('List'); + document.addStructure(list); + document.list(["Item 1","Item 2"], { structParent: list }); + document.end(); + + expect(docData).toContainChunk([ + `5 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + expect(docData).toContainChunk([ + '12 0 obj', + '<<\n/S /Lbl\n/K [0]\n/Pg 7 0 R\n/P 10 0 R\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '13 0 obj', + '<<\n/S /LBody\n/K [1]\n/Pg 7 0 R\n/P 10 0 R\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '16 0 obj', + '<<\n/S /Lbl\n/K [2]\n/Pg 7 0 R\n/P 15 0 R\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '17 0 obj', + '<<\n/S /LBody\n/K [3]\n/Pg 7 0 R\n/P 15 0 R\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '10 0 obj', + '<<\n/S /LI\n/P 8 0 R\n/K [12 0 R 13 0 R]\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '15 0 obj', + '<<\n/S /LI\n/P 8 0 R\n/K [16 0 R 17 0 R]\n>>', + 'endobj' + ]); + expect(docData).toContainChunk([ + '8 0 obj', + '<<\n/S /List\n/P 9 0 R\n/K [10 0 R 15 0 R]\n>>', + 'endobj' + ]); + }); + }); });