mirror of
https://github.com/foliojs/pdfkit.git
synced 2025-12-08 20:15:54 +00:00
* Generate ToUnicodeMap bfrange in multiple ranges (#1498) This resolves #1498. * Add unit test for bfrange lines in toUnicodeMap * Add changelog line
This commit is contained in:
parent
485b7e6bee
commit
946f9cf6dc
@ -7,6 +7,7 @@
|
||||
- Fix for soft hyphen not being replaced by visible hyphen if necessary (#457)
|
||||
- Optimize output files by ignoring identity transforms
|
||||
- Fix for Acroforms - setting an option to false will still apply the flag (#1495)
|
||||
- Fix for text extraction in PDFium-based viewers due to invalid ToUnicodeMap (#1498)
|
||||
|
||||
### [v0.14.0] - 2023-11-09
|
||||
|
||||
|
||||
@ -252,6 +252,15 @@ class EmbeddedFont extends PDFFont {
|
||||
entries.push(`<${encoded.join(' ')}>`);
|
||||
}
|
||||
|
||||
const chunkSize = 256;
|
||||
const chunks = Math.ceil(entries.length / chunkSize);
|
||||
const ranges = [];
|
||||
for (let i = 0; i < chunks; i++) {
|
||||
const start = i * chunkSize;
|
||||
const end = Math.min((i + 1) * chunkSize, entries.length);
|
||||
ranges.push(`<${toHex(start)}> <${toHex(end - 1)}> [${entries.slice(start, end).join(' ')}]`);
|
||||
}
|
||||
|
||||
cmap.end(`\
|
||||
/CIDInit /ProcSet findresource begin
|
||||
12 dict begin
|
||||
@ -267,7 +276,7 @@ begincmap
|
||||
<0000><ffff>
|
||||
endcodespacerange
|
||||
1 beginbfrange
|
||||
<0000> <${toHex(entries.length - 1)}> [${entries.join(' ')}]
|
||||
${ranges.join('\n')}
|
||||
endbfrange
|
||||
endcmap
|
||||
CMapName currentdict /CMap defineresource pop
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import PDFFontFactory from '../../lib/font_factory';
|
||||
import PDFDocument from '../../lib/document';
|
||||
import PDFFontFactory from '../../lib/font_factory';
|
||||
import { logData } from './helpers';
|
||||
|
||||
describe('EmbeddedFont', () => {
|
||||
test('no fontLayoutCache option', () => {
|
||||
@ -52,4 +53,46 @@ describe('EmbeddedFont', () => {
|
||||
expect(dictionary.data.BaseFont).toBe('BAJJZZ+Roboto-Regular');
|
||||
});
|
||||
});
|
||||
|
||||
describe.only('toUnicodeMap', () => {
|
||||
test('bfrange lines should not cross highcode boundary', () => {
|
||||
const doc = new PDFDocument({ compress: false });
|
||||
const font = PDFFontFactory.open(
|
||||
doc,
|
||||
'tests/fonts/Roboto-Regular.ttf',
|
||||
undefined,
|
||||
'F1099'
|
||||
);
|
||||
|
||||
// 398 different glyphs
|
||||
font.encode('ABCDEFGHIJKLMNOPQRSTUVWXYZ');
|
||||
font.encode('abcdefghijklmnopqrstuvwxyz');
|
||||
font.encode('ÁÀÂÄÅÃÆÇÐÉÈÊËÍÌÎÏÑÓÒÔÖÕØŒÞÚÙÛÜÝŸ');
|
||||
font.encode('áàâäãåæçðéèêëíìîïıñóòôöõøœßþúùûüýÿ');
|
||||
font.encode('ĀĂĄĆČĎĐĒĖĘĚĞĢĪĮİĶŁĹĻĽŃŅŇŌŐŔŖŘŠŚŞȘŢȚŤŪŮŰŲŽŹŻ');
|
||||
font.encode('āăąćčďđēėęěğģīįķłĺļľńņňōőŕŗřšśşșţțťūůűųžźż');
|
||||
font.encode('ΑΒΓ∆ΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΆΈΉΊΌΎΏΪΫ');
|
||||
font.encode('αβγδεζηθικλµνξοπρςστυφχψωάέήίόύώϊϋΐΰ');
|
||||
font.encode('АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');
|
||||
font.encode('абвгдежзийклмнопрстуфхцчшщъыьэюя');
|
||||
font.encode('ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏҐӁҒҖҚҢҮҰҲҶҺӘӢӨӮ');
|
||||
font.encode('ѐёђѓєѕіїјљњћќѝўџґӂғҗқңүұҳҷһәӣөӯ');
|
||||
|
||||
const docData = logData(doc);
|
||||
font.toUnicodeCmap();
|
||||
const text = docData.map((d) => d.toString("utf8")).join("");
|
||||
|
||||
let glyphs = 0
|
||||
for (const block of text.matchAll(/beginbfrange\n((?:.|\n)*?)\nendbfrange/g)) {
|
||||
for (const line of block[1].matchAll(/^<([0-9a-f]+)>\s+<([0-9a-f]+)>\s+\[/igm)) {
|
||||
const low = parseInt(line[1], 16);
|
||||
const high = parseInt(line[2], 16);
|
||||
glyphs += high - low + 1;
|
||||
expect(high & 0xFFFFFF00).toBe(low & 0xFFFFFF00);
|
||||
}
|
||||
}
|
||||
|
||||
expect(glyphs).toBe(398 + 1);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user