Support marking content.

This commit is contained in:
Ben Schmidt 2020-09-27 17:02:33 +10:00 committed by Luiz Américo
parent 5df96274a6
commit ef5e52b2ad
9 changed files with 210 additions and 5 deletions

16
docs/accessibility.md Normal file
View File

@ -0,0 +1,16 @@
# Accessibility
Marked content sequences are foundational to creating accessible PDFs, which are usable by
visually impaired users who rely on screen readers/text-to-speech engines/vocalisation.
All marked content sequences are associated with a registered tag, such as 'Span'.
Example of marking content:
// Mark some text as a "Span"
doc.markContent('Span');
doc.text('Hello, world!');
doc.endMarkedContent();
Marked content is automatically ended when a page is ended, and if a new page is automatically
added by text wrapping, marking is automatically begun again on the new page.

View File

@ -325,5 +325,6 @@ render(doc, 'images.md');
render(doc, 'outline.md');
render(doc, 'annotations.md');
render(doc, 'destinations.md');
render(doc, 'accessibility.md');
render(doc, 'you_made_it.md');
doc.end();

View File

@ -21,6 +21,7 @@ const files = [
'outline.md',
'annotations.md',
'destinations.md',
'accessibility.md',
'you_made_it.md'
];

View File

@ -17,6 +17,7 @@ import TextMixin from './mixins/text';
import ImagesMixin from './mixins/images';
import AnnotationsMixin from './mixins/annotations';
import OutlineMixin from './mixins/outline';
import MarkingsMixin from './mixins/markings';
import AcroFormMixin from './mixins/acroform';
import LineWrapper from './line_wrapper';
@ -118,10 +119,11 @@ class PDFDocument extends stream.Readable {
}
addPage(options) {
// end the current page if needed
if (options == null) {
({ options } = this);
}
// end the current page if needed
if (!this.options.bufferPages) {
this.flushPages();
}
@ -149,6 +151,16 @@ class PDFDocument extends stream.Readable {
return this;
}
continueOnNewPage(options) {
const pageMarkings = this.endPageMarkings(this.page);
this.addPage(options);
this.initPageMarkings(pageMarkings);
return this;
}
bufferedPageRange() {
return { start: this._pageBufferStart, count: this._pageBuffer.length };
}
@ -173,6 +185,7 @@ class PDFDocument extends stream.Readable {
this._pageBuffer = [];
this._pageBufferStart += pages.length;
for (let page of pages) {
this.endPageMarkings(page);
page.end();
}
}
@ -335,6 +348,7 @@ mixin(TextMixin);
mixin(ImagesMixin);
mixin(AnnotationsMixin);
mixin(OutlineMixin);
mixin(MarkingsMixin);
mixin(AcroFormMixin);
PDFDocument.LineWrapper = LineWrapper;

View File

@ -310,7 +310,7 @@ class LineWrapper extends EventEmitter {
return false;
}
this.document.addPage();
this.document.continueOnNewPage();
this.column = 1;
this.startY = this.document.page.margins.top;
this.maxY = this.document.page.maxY();

46
lib/mixins/markings.js Normal file
View File

@ -0,0 +1,46 @@
/*
Markings mixin - support marked content sequences in content streams
By Ben Schmidt
*/
import PDFObject from "../object";
export default {
markContent(tag, options = null) {
if (!options) {
this.page.markings.push({ tag });
this.addContent(`/${tag} BMC`);
return this;
}
this.page.markings.push({ tag, options });
const dictionary = {};
// TODO: add entries to dictionary based on options
this.addContent(`/${tag} ${PDFObject.convert(dictionary)} BDC`);
return this;
},
endMarkedContent() {
this.page.markings.pop();
this.addContent('EMC');
return this;
},
initPageMarkings(pageMarkings) {
pageMarkings.forEach((marking) => {
this.markContent(marking.tag, marking.options);
});
},
endPageMarkings(page) {
const pageMarkings = page.markings;
pageMarkings.forEach(() => page.write('EMC'));
page.markings = [];
return pageMarkings;
}
};

View File

@ -105,6 +105,8 @@ class PDFPage {
Contents: this.content,
Resources: this.resources
});
this.markings = [];
}
// Lazily create these dictionaries

125
tests/unit/markings.spec.js Normal file
View File

@ -0,0 +1,125 @@
import PDFDocument from '../../lib/document';
import { logData } from './helpers';
describe('Markings', () => {
let document;
beforeEach(() => {
document = new PDFDocument({
info: { CreationDate: new Date(Date.UTC(2018, 1, 1)) },
compress: false
});
});
describe('marked content', () => {
test('with no dictionary', () => {
const docData = logData(document);
const stream = Buffer.from(
`1 0 0 -1 0 792 cm
/Span BMC
EMC
`,
'binary'
);
document.markContent("Span");
document.endMarkedContent();
document.end();
expect(docData).toContainChunk([
`5 0 obj`,
`<<
/Length ${stream.length}
>>`,
`stream`,
stream,
`\nendstream`,
`endobj`
]);
});
test('with empty dictionary', () => {
const docData = logData(document);
const stream = Buffer.from(
`1 0 0 -1 0 792 cm
/Span <<
>> BDC
EMC
`,
'binary'
);
document.markContent("Span", {});
document.endMarkedContent();
document.end();
expect(docData).toContainChunk([
`5 0 obj`,
`<<
/Length ${stream.length}
>>`,
`stream`,
stream,
`\nendstream`,
`endobj`
]);
});
test('automatically closed', () => {
const docData = logData(document);
const stream = Buffer.from(
`1 0 0 -1 0 792 cm
/Span BMC
EMC
`,
'binary'
);
document.markContent("Span");
document.end();
expect(docData).toContainChunk([
`5 0 obj`,
`<<
/Length ${stream.length}
>>`,
`stream`,
stream,
`\nendstream`,
`endobj`
]);
});
test('continued on a new page', () => {
const docData = logData(document);
const stream = Buffer.from(
`1 0 0 -1 0 792 cm
/Span BMC
EMC
`,
'binary'
);
document.markContent("Span");
document.text("on the first page");
document.continueOnNewPage();
document.end();
expect(docData).toContainChunk([
`9 0 obj`,
`<<
/Length ${stream.length}
>>`,
`stream`,
stream,
`\nendstream`,
`endobj`
]);
});
});
});

View File

@ -167,9 +167,9 @@ Q
);
// before this test, this case used to make the code run into an infinite loop.
// To handle regresion gracefully and avoid stucking this test into an infinite loop,
// we look out for a side effect of this infinite loop, witch is adding and infinite number of pages.
// Nomaly, there should not be any page added to the document.
// To handle regression gracefully and avoid sticking this test into an infinite loop,
// we look out for a side effect of this infinite loop, witch is adding an infinite number of pages.
// Normally, there should not be any page added to the document.
document.on('pageAdded', () => {
const pageRange = document.bufferedPageRange();