Fix precision rounding issues in LineWrapper (#1595)

* Fix further LineWrapper precision issues

* add test of bounded text precision issue

* add rowSpanning table example

* add failure threshold

* implement toContainText jest matcher

* create a unit test for bounded text precision

* remove round up rounding code path

---------

Co-authored-by: Luiz Américo Pereira Câmara <blikblum@users.noreply.github.com>
This commit is contained in:
Jake Holland 2025-05-03 03:05:11 +01:00 committed by GitHub
parent f3c1776850
commit 54e6600f1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 306 additions and 34 deletions

View File

@ -3,6 +3,7 @@
### Unreleased
- Fix null values in table cells rendering as `[object Object]`
- Fix further LineWrapper precision issues
### [v0.17.0] - 2025-04-12

View File

@ -85,10 +85,10 @@ class LineWrapper extends EventEmitter {
}
wordWidth(word) {
return (
return PDFNumber(
this.document.widthOfString(word, this) +
this.characterSpacing +
this.wordSpacing
this.characterSpacing +
this.wordSpacing,
);
}

View File

@ -1,8 +1,25 @@
const fArray = new Float32Array(1);
const uArray = new Uint32Array(fArray.buffer);
export function PDFNumber(n) {
// PDF numbers are strictly 32bit
// so convert this number to the nearest 32bit number
// so convert this number to a 32bit number
// @see ISO 32000-1 Annex C.2 (real numbers)
return Math.fround(n);
const rounded = Math.fround(n);
if (rounded <= n) return rounded;
// Will have to perform 32bit float truncation
fArray[0] = n;
// Get the 32-bit representation as integer and shift bits
if (n <= 0) {
uArray[0] += 1;
} else {
uArray[0] -= 1;
}
// Return the float value
return fArray[0];
}
/**

View File

@ -1,3 +1,24 @@
/**
* @import PDFDocument from '../../lib/document';
*/
/**
* @typedef {object} TextStream
* @property {string} text
* @property {string} font
* @property {number} fontSize
*
* @typedef {string | Buffer} PDFDataItem
* @typedef {Array<PDFDataItem>} PDFData
*
* @typedef {object} PDFDataObject
* @property {PDFDataItem[]} items
*/
/**
* @param {PDFDocument} doc
* @return {PDFData}
*/
function logData(doc) {
const loggedData = [];
const originalMethod = doc._write;
@ -18,4 +39,83 @@ function joinTokens(...args) {
return r;
}
export { logData, joinTokens };
/**
* @description
* Returns an array of objects from the PDF data. Object items are surrounded by /\d 0 obj/ and 'endobj'.
* @param {PDFData} data
* @return {Array<PDFDataObject>}
*/
function getObjects(data) {
const objects = [];
let currentObject = null;
for (const item of data) {
if (item instanceof Buffer) {
if (currentObject) {
currentObject.items.push(item);
}
} else if (typeof item === 'string') {
if (/^\d+\s0\sobj/.test(item)) {
currentObject = { items: [] };
objects.push(currentObject);
} else if (item === 'endobj') {
currentObject = null;
} else if (currentObject) {
currentObject.items.push(item);
}
}
}
return objects;
}
/**
* @param {Buffer} textStream
* @return {TextStream | undefined}
*/
function parseTextStream(textStream) {
const decodedStream = textStream.toString('utf8');
// Extract font and font size
const fontMatch = decodedStream.match(/\/([A-Za-z0-9]+)\s+(\d+)\s+Tf/);
if (!fontMatch) {
return undefined;
}
const font = fontMatch[1];
const fontSize = parseInt(fontMatch[2], 10);
// Extract hex strings inside TJ array
const tjMatch = decodedStream.match(/\[([^\]]+)\]\s+TJ/);
if (!tjMatch) {
return undefined;
}
let text = '';
// this is a simplified version
// the correct way is to retrieve the encoding from /Resources /Font dictionary and decode using it
// https://stackoverflow.com/a/29468049/5724645
// Match all hex strings like <...>
const hexMatches = [...tjMatch[1].matchAll(/<([0-9a-fA-F]+)>/g)];
for (const m of hexMatches) {
// Convert hex to string
const hex = m[1];
for (let i = 0; i < hex.length; i += 2) {
const code = parseInt(hex.substr(i, 2), 16);
let char = String.fromCharCode(code);
// Handle special cases
if (code === 0x0a) {
char = '\n'; // Newline
} else if (code === 0x0d) {
char = '\r'; // Carriage return
} else if (code === 0x85) {
char = '...';
}
text += char;
}
}
return { text, font, fontSize };
}
export { logData, joinTokens, parseTextStream, getObjects };

View File

@ -1,5 +1,7 @@
import matcher from './toContainChunk';
import toContainChunk from './toContainChunk';
import toContainText from './toContainText';
import { toMatchImageSnapshot } from 'jest-image-snapshot';
expect.extend(matcher);
expect.extend(toContainChunk);
expect.extend(toContainText);
expect.extend({ toMatchImageSnapshot });

View File

@ -15,33 +15,15 @@ describe('Text', () => {
test('with simple content', () => {
const docData = logData(document);
const textStream = Buffer.from(
`1 0 0 -1 0 792 cm
q
1 0 0 -1 0 792 cm
BT
1 0 0 1 72 711.384 Tm
/F1 12 Tf
[<73696d706c65207465> 30 <7874> 0] TJ
ET
Q
`,
'binary',
);
document.text('simple text');
document.end();
expect(docData).toContainChunk([
`5 0 obj`,
`<<
/Length 116
>>`,
`stream`,
textStream,
`\nendstream`,
`endobj`,
]);
expect(docData).toContainText({ text: 'simple text' });
});
test('with destination', () => {
// just check that there is no exception
document.text('simple text', { destination: 'anchor' });
});
test('with content ending after page right margin', () => {
@ -194,5 +176,21 @@ Q
`endobj`,
]);
});
test('bounded text precision - issue #1611', () => {
const docData = logData(document);
const text = 'New york';
const bounds = document.boundsOfString(text);
// Draw text which is constrained to the bounds
document.text(text, {
ellipsis: true,
width: bounds.width,
height: bounds.height,
});
document.end();
expect(docData).toContainText({ text });
});
});
});

View File

@ -0,0 +1,117 @@
import { getObjects, parseTextStream } from '../helpers.js';
/**
* @import { TextStream, PDFDataObject } from '../helpers.js';
* @import JestMatchedUtils from 'jest-matcher-utils';
*/
/**
* @param {JestMatchedUtils} utils
* @param {TextStream} argument
* @return {string}
*/
const passMessage = (utils, argument) => () => {
return (
utils.matcherHint('.not.toContainText', 'data', 'textStream') +
'\n\n' +
`Expected data not to contain text:\n\n${utils.printExpected(argument)}`
);
};
/**
* @param {JestMatchedUtils} utils
* @param {TextStream[]} received
* @param {TextStream} argument
* @return {string}
*/
const failMessage = (utils, received, argument) => () => {
return (
utils.matcherHint('.toContainText', 'data', 'textStream') +
'\n\n' +
`Expected data to contain text:\n\n${utils.printExpected(argument)}\n\nFound:\n\n${utils.printReceived(received)}`
);
};
function textStreamMatches(expected, actual) {
if (expected.text !== actual.text) {
return false;
}
if (expected.font && expected.font !== actual.font) {
return false;
}
if (expected.fontSize && expected.fontSize !== actual.fontSize) {
return false;
}
return true;
}
/**
* @param {PDFDataObject} object
* @return {TextStream | undefined}
*/
function getTextStream(object) {
// text stream objects have 4 items
// first item is a string containing the Length of the stream
// second item 'stream'
// third item is the stream content Buffer
// fourth item is 'endstream'
if (object.items.length !== 4) {
return;
}
if (typeof object.items[0] !== 'string') {
return;
}
if (object.items[1] !== 'stream') {
return;
}
if (!(object.items[2] instanceof Buffer)) {
return;
}
if (!/endstream/.test(object.items[3])) {
return;
}
return parseTextStream(object.items[2]);
}
export default {
/**
*
* @param {(string | Buffer)[]} data
* @param {Partial<TextStream>} textStream
* @returns
*/
toContainText(data, textStream) {
const objects = getObjects(data);
const foundTextStreams = [];
let pass = false;
for (const object of objects) {
const objectTextStream = getTextStream(object, textStream);
if (!objectTextStream) {
continue;
}
foundTextStreams.push(objectTextStream);
if (textStreamMatches(textStream, objectTextStream)) {
pass = true;
break;
}
}
if (pass) {
return {
pass: true,
message: passMessage(this.utils, textStream),
};
}
return {
pass: false,
message: failMessage(this.utils, foundTextStreams, textStream),
};
},
};

View File

@ -1,4 +1,4 @@
import { normalizeSides } from '../../lib/utils';
import { normalizeSides, PDFNumber } from '../../lib/utils';
describe('normalizeSides', () => {
test.each([
@ -54,3 +54,19 @@ describe('normalizeSides', () => {
});
});
});
describe('PDFNumber', () => {
test.each([
[0, 0],
[0.04999999701976776], //float32 rounded down
[0.05],
[0.05000000074505806], //float32 rounded up
[1],
[-1],
[-5.05],
[5.05],
])('PDFNumber(%f) -> %f', (n) => {
expect(PDFNumber(n)).toBeLessThanOrEqual(n);
expect(PDFNumber(n, false)).toBeLessThanOrEqual(n);
});
});

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -26,7 +26,7 @@ function runDocTest(options, fn) {
const { systemFonts = false } = options;
const images = await pdf2png(pdfData, { systemFonts });
for (let image of images) {
expect(image).toMatchImageSnapshot();
expect(image).toMatchImageSnapshot(options);
}
resolve();
} catch (err) {

View File

@ -405,4 +405,24 @@ describe('table', function () {
},
);
});
test('multi page table', function () {
return runDocTest({ size: [500, 300] }, function (doc) {
doc.font('tests/fonts/Roboto-Italic.ttf');
doc.table({
debug: true,
data: [
[
{
rowSpan: 3,
text: 'LEFT_COLUMN',
},
'RIGHT_COLUM_ROW_1\n'.repeat(5),
],
['RIGHT_COLUM_ROW_2\n'.repeat(5)],
['RIGHT_COLUM_ROW_3'],
],
});
});
});
});

View File

@ -127,6 +127,7 @@ describe('text', function () {
'#ecf157',
'#acfa70',
];
function randColor() {
return cols[i++ % cols.length];
}