diff --git a/docs/accessibility.md b/docs/accessibility.md new file mode 100644 index 0000000..cf2259b --- /dev/null +++ b/docs/accessibility.md @@ -0,0 +1,16 @@ +# Accessibility + +Marked content sequences are foundational to creating accessible PDFs, which are usable by +visually impaired users who rely on screen readers/text-to-speech engines/vocalisation. + +All marked content sequences are associated with a registered tag, such as 'Span'. + +Example of marking content: + + // Mark some text as a "Span" + doc.markContent('Span'); + doc.text('Hello, world!'); + doc.endMarkedContent(); + +Marked content is automatically ended when a page is ended, and if a new page is automatically +added by text wrapping, marking is automatically begun again on the new page. diff --git a/docs/generate.js b/docs/generate.js index 59cbeb4..d676408 100644 --- a/docs/generate.js +++ b/docs/generate.js @@ -325,5 +325,6 @@ render(doc, 'images.md'); render(doc, 'outline.md'); render(doc, 'annotations.md'); render(doc, 'destinations.md'); +render(doc, 'accessibility.md'); render(doc, 'you_made_it.md'); doc.end(); diff --git a/docs/generate_website.js b/docs/generate_website.js index 8323dd0..36c4d52 100644 --- a/docs/generate_website.js +++ b/docs/generate_website.js @@ -21,6 +21,7 @@ const files = [ 'outline.md', 'annotations.md', 'destinations.md', + 'accessibility.md', 'you_made_it.md' ]; diff --git a/lib/document.js b/lib/document.js index d75e56a..c0d758c 100644 --- a/lib/document.js +++ b/lib/document.js @@ -17,6 +17,7 @@ import TextMixin from './mixins/text'; import ImagesMixin from './mixins/images'; import AnnotationsMixin from './mixins/annotations'; import OutlineMixin from './mixins/outline'; +import MarkingsMixin from './mixins/markings'; import AcroFormMixin from './mixins/acroform'; import LineWrapper from './line_wrapper'; @@ -118,10 +119,11 @@ class PDFDocument extends stream.Readable { } addPage(options) { - // end the current page if needed if (options == null) { ({ options } = this); } + + // end the current page if needed if (!this.options.bufferPages) { this.flushPages(); } @@ -149,6 +151,16 @@ class PDFDocument extends stream.Readable { return this; } + continueOnNewPage(options) { + const pageMarkings = this.endPageMarkings(this.page); + + this.addPage(options); + + this.initPageMarkings(pageMarkings); + + return this; + } + bufferedPageRange() { return { start: this._pageBufferStart, count: this._pageBuffer.length }; } @@ -173,6 +185,7 @@ class PDFDocument extends stream.Readable { this._pageBuffer = []; this._pageBufferStart += pages.length; for (let page of pages) { + this.endPageMarkings(page); page.end(); } } @@ -335,6 +348,7 @@ mixin(TextMixin); mixin(ImagesMixin); mixin(AnnotationsMixin); mixin(OutlineMixin); +mixin(MarkingsMixin); mixin(AcroFormMixin); PDFDocument.LineWrapper = LineWrapper; diff --git a/lib/line_wrapper.js b/lib/line_wrapper.js index a53f17a..5eb4cec 100644 --- a/lib/line_wrapper.js +++ b/lib/line_wrapper.js @@ -310,7 +310,7 @@ class LineWrapper extends EventEmitter { return false; } - this.document.addPage(); + this.document.continueOnNewPage(); this.column = 1; this.startY = this.document.page.margins.top; this.maxY = this.document.page.maxY(); diff --git a/lib/mixins/markings.js b/lib/mixins/markings.js new file mode 100644 index 0000000..58bfbeb --- /dev/null +++ b/lib/mixins/markings.js @@ -0,0 +1,46 @@ +/* +Markings mixin - support marked content sequences in content streams +By Ben Schmidt +*/ + +import PDFObject from "../object"; + +export default { + + markContent(tag, options = null) { + if (!options) { + this.page.markings.push({ tag }); + this.addContent(`/${tag} BMC`); + return this; + } + + this.page.markings.push({ tag, options }); + + const dictionary = {}; + + // TODO: add entries to dictionary based on options + + this.addContent(`/${tag} ${PDFObject.convert(dictionary)} BDC`); + return this; + }, + + endMarkedContent() { + this.page.markings.pop(); + this.addContent('EMC'); + return this; + }, + + initPageMarkings(pageMarkings) { + pageMarkings.forEach((marking) => { + this.markContent(marking.tag, marking.options); + }); + }, + + endPageMarkings(page) { + const pageMarkings = page.markings; + pageMarkings.forEach(() => page.write('EMC')); + page.markings = []; + return pageMarkings; + } + +}; diff --git a/lib/page.js b/lib/page.js index 9c2fe77..29d216e 100644 --- a/lib/page.js +++ b/lib/page.js @@ -105,6 +105,8 @@ class PDFPage { Contents: this.content, Resources: this.resources }); + + this.markings = []; } // Lazily create these dictionaries diff --git a/tests/unit/markings.spec.js b/tests/unit/markings.spec.js new file mode 100644 index 0000000..8f1e1c7 --- /dev/null +++ b/tests/unit/markings.spec.js @@ -0,0 +1,125 @@ +import PDFDocument from '../../lib/document'; +import { logData } from './helpers'; + +describe('Markings', () => { + let document; + + beforeEach(() => { + document = new PDFDocument({ + info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) }, + compress: false + }); + }); + + describe('marked content', () => { + test('with no dictionary', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/Span BMC +EMC +`, + 'binary' + ); + + document.markContent("Span"); + document.endMarkedContent(); + document.end(); + + expect(docData).toContainChunk([ + `5 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + }); + + test('with empty dictionary', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/Span << +>> BDC +EMC +`, + 'binary' + ); + + document.markContent("Span", {}); + document.endMarkedContent(); + document.end(); + + expect(docData).toContainChunk([ + `5 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + }); + + test('automatically closed', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/Span BMC +EMC +`, + 'binary' + ); + + document.markContent("Span"); + document.end(); + + expect(docData).toContainChunk([ + `5 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + }); + + test('continued on a new page', () => { + const docData = logData(document); + + const stream = Buffer.from( + `1 0 0 -1 0 792 cm +/Span BMC +EMC +`, + 'binary' + ); + + document.markContent("Span"); + document.text("on the first page"); + document.continueOnNewPage(); + document.end(); + + expect(docData).toContainChunk([ + `9 0 obj`, + `<< +/Length ${stream.length} +>>`, + `stream`, + stream, + `\nendstream`, + `endobj` + ]); + }); + + }); +}); diff --git a/tests/unit/text.spec.js b/tests/unit/text.spec.js index ade62b1..eb5c052 100644 --- a/tests/unit/text.spec.js +++ b/tests/unit/text.spec.js @@ -167,9 +167,9 @@ Q ); // before this test, this case used to make the code run into an infinite loop. - // To handle regresion gracefully and avoid stucking this test into an infinite loop, - // we look out for a side effect of this infinite loop, witch is adding and infinite number of pages. - // Nomaly, there should not be any page added to the document. + // To handle regression gracefully and avoid sticking this test into an infinite loop, + // we look out for a side effect of this infinite loop, witch is adding an infinite number of pages. + // Normally, there should not be any page added to the document. document.on('pageAdded', () => { const pageRange = document.bufferedPageRange();