diff --git a/demo/out.pdf b/demo/out.pdf index 40c9a91..1332abd 100644 Binary files a/demo/out.pdf and b/demo/out.pdf differ diff --git a/lib/font.coffee b/lib/font.coffee index bfebc02..4e957fe 100644 --- a/lib/font.coffee +++ b/lib/font.coffee @@ -88,11 +88,7 @@ class PDFFont Filter: 'FlateDecode' @fontfile.add compressedData - - cmap = @subset.cmap - widths = @subset.charWidths - charWidths = (Math.round widths[gid] * @scaleFactor for gid, i in cmap when i >= 32) - + @descriptor = @document.ref Type: 'FontDescriptor' FontName: @subset.postscriptName @@ -105,14 +101,18 @@ class PDFFont Descent: @decender CapHeight: @capHeight XHeight: @xHeight + + firstChar = +Object.keys(@subset.cmap)[0] + charWidths = for code, glyph of @subset.cmap + Math.round @ttf.hmtx.forGlyph(glyph).advance * @scaleFactor ref = Type: 'Font' BaseFont: @subset.postscriptName Subtype: 'TrueType' FontDescriptor: @descriptor - FirstChar: 32 - LastChar: 255 + FirstChar: firstChar + LastChar: firstChar + charWidths.length - 1 Widths: @document.ref charWidths Encoding: 'MacRomanEncoding' diff --git a/lib/font/macroman.coffee b/lib/font/macroman.coffee deleted file mode 100644 index e78ed65..0000000 --- a/lib/font/macroman.coffee +++ /dev/null @@ -1,227 +0,0 @@ -exports.TO_UNICODE = - 0x20: 0x0020 # SPACE - 0x21: 0x0021 # EXCLAMATION MARK - 0x22: 0x0022 # QUOTATION MARK - 0x23: 0x0023 # NUMBER SIGN - 0x24: 0x0024 # DOLLAR SIGN - 0x25: 0x0025 # PERCENT SIGN - 0x26: 0x0026 # AMPERSAND - 0x27: 0x0027 # APOSTROPHE - 0x28: 0x0028 # LEFT PARENTHESIS - 0x29: 0x0029 # RIGHT PARENTHESIS - 0x2A: 0x002A # ASTERISK - 0x2B: 0x002B # PLUS SIGN - 0x2C: 0x002C # COMMA - 0x2D: 0x002D # HYPHEN-MINUS - 0x2E: 0x002E # FULL STOP - 0x2F: 0x002F # SOLIDUS - 0x30: 0x0030 # DIGIT ZERO - 0x31: 0x0031 # DIGIT ONE - 0x32: 0x0032 # DIGIT TWO - 0x33: 0x0033 # DIGIT THREE - 0x34: 0x0034 # DIGIT FOUR - 0x35: 0x0035 # DIGIT FIVE - 0x36: 0x0036 # DIGIT SIX - 0x37: 0x0037 # DIGIT SEVEN - 0x38: 0x0038 # DIGIT EIGHT - 0x39: 0x0039 # DIGIT NINE - 0x3A: 0x003A # COLON - 0x3B: 0x003B # SEMICOLON - 0x3C: 0x003C # LESS-THAN SIGN - 0x3D: 0x003D # EQUALS SIGN - 0x3E: 0x003E # GREATER-THAN SIGN - 0x3F: 0x003F # QUESTION MARK - 0x40: 0x0040 # COMMERCIAL AT - 0x41: 0x0041 # LATIN CAPITAL LETTER A - 0x42: 0x0042 # LATIN CAPITAL LETTER B - 0x43: 0x0043 # LATIN CAPITAL LETTER C - 0x44: 0x0044 # LATIN CAPITAL LETTER D - 0x45: 0x0045 # LATIN CAPITAL LETTER E - 0x46: 0x0046 # LATIN CAPITAL LETTER F - 0x47: 0x0047 # LATIN CAPITAL LETTER G - 0x48: 0x0048 # LATIN CAPITAL LETTER H - 0x49: 0x0049 # LATIN CAPITAL LETTER I - 0x4A: 0x004A # LATIN CAPITAL LETTER J - 0x4B: 0x004B # LATIN CAPITAL LETTER K - 0x4C: 0x004C # LATIN CAPITAL LETTER L - 0x4D: 0x004D # LATIN CAPITAL LETTER M - 0x4E: 0x004E # LATIN CAPITAL LETTER N - 0x4F: 0x004F # LATIN CAPITAL LETTER O - 0x50: 0x0050 # LATIN CAPITAL LETTER P - 0x51: 0x0051 # LATIN CAPITAL LETTER Q - 0x52: 0x0052 # LATIN CAPITAL LETTER R - 0x53: 0x0053 # LATIN CAPITAL LETTER S - 0x54: 0x0054 # LATIN CAPITAL LETTER T - 0x55: 0x0055 # LATIN CAPITAL LETTER U - 0x56: 0x0056 # LATIN CAPITAL LETTER V - 0x57: 0x0057 # LATIN CAPITAL LETTER W - 0x58: 0x0058 # LATIN CAPITAL LETTER X - 0x59: 0x0059 # LATIN CAPITAL LETTER Y - 0x5A: 0x005A # LATIN CAPITAL LETTER Z - 0x5B: 0x005B # LEFT SQUARE BRACKET - 0x5C: 0x005C # REVERSE SOLIDUS - 0x5D: 0x005D # RIGHT SQUARE BRACKET - 0x5E: 0x005E # CIRCUMFLEX ACCENT - 0x5F: 0x005F # LOW LINE - 0x60: 0x0060 # GRAVE ACCENT - 0x61: 0x0061 # LATIN SMALL LETTER A - 0x62: 0x0062 # LATIN SMALL LETTER B - 0x63: 0x0063 # LATIN SMALL LETTER C - 0x64: 0x0064 # LATIN SMALL LETTER D - 0x65: 0x0065 # LATIN SMALL LETTER E - 0x66: 0x0066 # LATIN SMALL LETTER F - 0x67: 0x0067 # LATIN SMALL LETTER G - 0x68: 0x0068 # LATIN SMALL LETTER H - 0x69: 0x0069 # LATIN SMALL LETTER I - 0x6A: 0x006A # LATIN SMALL LETTER J - 0x6B: 0x006B # LATIN SMALL LETTER K - 0x6C: 0x006C # LATIN SMALL LETTER L - 0x6D: 0x006D # LATIN SMALL LETTER M - 0x6E: 0x006E # LATIN SMALL LETTER N - 0x6F: 0x006F # LATIN SMALL LETTER O - 0x70: 0x0070 # LATIN SMALL LETTER P - 0x71: 0x0071 # LATIN SMALL LETTER Q - 0x72: 0x0072 # LATIN SMALL LETTER R - 0x73: 0x0073 # LATIN SMALL LETTER S - 0x74: 0x0074 # LATIN SMALL LETTER T - 0x75: 0x0075 # LATIN SMALL LETTER U - 0x76: 0x0076 # LATIN SMALL LETTER V - 0x77: 0x0077 # LATIN SMALL LETTER W - 0x78: 0x0078 # LATIN SMALL LETTER X - 0x79: 0x0079 # LATIN SMALL LETTER Y - 0x7A: 0x007A # LATIN SMALL LETTER Z - 0x7B: 0x007B # LEFT CURLY BRACKET - 0x7C: 0x007C # VERTICAL LINE - 0x7D: 0x007D # RIGHT CURLY BRACKET - 0x7E: 0x007E # TILDE - # - 0x80: 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x81: 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x82: 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA - 0x83: 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE - 0x84: 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE - 0x85: 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x86: 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x87: 0x00E1 # LATIN SMALL LETTER A WITH ACUTE - 0x88: 0x00E0 # LATIN SMALL LETTER A WITH GRAVE - 0x89: 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x8A: 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS - 0x8B: 0x00E3 # LATIN SMALL LETTER A WITH TILDE - 0x8C: 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE - 0x8D: 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA - 0x8E: 0x00E9 # LATIN SMALL LETTER E WITH ACUTE - 0x8F: 0x00E8 # LATIN SMALL LETTER E WITH GRAVE - 0x90: 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x91: 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS - 0x92: 0x00ED # LATIN SMALL LETTER I WITH ACUTE - 0x93: 0x00EC # LATIN SMALL LETTER I WITH GRAVE - 0x94: 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x95: 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS - 0x96: 0x00F1 # LATIN SMALL LETTER N WITH TILDE - 0x97: 0x00F3 # LATIN SMALL LETTER O WITH ACUTE - 0x98: 0x00F2 # LATIN SMALL LETTER O WITH GRAVE - 0x99: 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x9A: 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS - 0x9B: 0x00F5 # LATIN SMALL LETTER O WITH TILDE - 0x9C: 0x00FA # LATIN SMALL LETTER U WITH ACUTE - 0x9D: 0x00F9 # LATIN SMALL LETTER U WITH GRAVE - 0x9E: 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x9F: 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS - 0xA0: 0x2020 # DAGGER - 0xA1: 0x00B0 # DEGREE SIGN - 0xA2: 0x00A2 # CENT SIGN - 0xA3: 0x00A3 # POUND SIGN - 0xA4: 0x00A7 # SECTION SIGN - 0xA5: 0x2022 # BULLET - 0xA6: 0x00B6 # PILCROW SIGN - 0xA7: 0x00DF # LATIN SMALL LETTER SHARP S - 0xA8: 0x00AE # REGISTERED SIGN - 0xA9: 0x00A9 # COPYRIGHT SIGN - 0xAA: 0x2122 # TRADE MARK SIGN - 0xAB: 0x00B4 # ACUTE ACCENT - 0xAC: 0x00A8 # DIAERESIS - 0xAD: 0x2260 # NOT EQUAL TO - 0xAE: 0x00C6 # LATIN CAPITAL LETTER AE - 0xAF: 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE - 0xB0: 0x221E # INFINITY - 0xB1: 0x00B1 # PLUS-MINUS SIGN - 0xB2: 0x2264 # LESS-THAN OR EQUAL TO - 0xB3: 0x2265 # GREATER-THAN OR EQUAL TO - 0xB4: 0x00A5 # YEN SIGN - 0xB5: 0x00B5 # MICRO SIGN - 0xB6: 0x2202 # PARTIAL DIFFERENTIAL - 0xB7: 0x2211 # N-ARY SUMMATION - 0xB8: 0x220F # N-ARY PRODUCT - 0xB9: 0x03C0 # GREEK SMALL LETTER PI - 0xBA: 0x222B # INTEGRAL - 0xBB: 0x00AA # FEMININE ORDINAL INDICATOR - 0xBC: 0x00BA # MASCULINE ORDINAL INDICATOR - 0xBD: 0x03A9 # GREEK CAPITAL LETTER OMEGA - 0xBE: 0x00E6 # LATIN SMALL LETTER AE - 0xBF: 0x00F8 # LATIN SMALL LETTER O WITH STROKE - 0xC0: 0x00BF # INVERTED QUESTION MARK - 0xC1: 0x00A1 # INVERTED EXCLAMATION MARK - 0xC2: 0x00AC # NOT SIGN - 0xC3: 0x221A # SQUARE ROOT - 0xC4: 0x0192 # LATIN SMALL LETTER F WITH HOOK - 0xC5: 0x2248 # ALMOST EQUAL TO - 0xC6: 0x2206 # INCREMENT - 0xC7: 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0xC8: 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0xC9: 0x2026 # HORIZONTAL ELLIPSIS - 0xCA: 0x00A0 # NO-BREAK SPACE - 0xCB: 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE - 0xCC: 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE - 0xCD: 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE - 0xCE: 0x0152 # LATIN CAPITAL LIGATURE OE - 0xCF: 0x0153 # LATIN SMALL LIGATURE OE - 0xD0: 0x2013 # EN DASH - 0xD1: 0x2014 # EM DASH - 0xD2: 0x201C # LEFT DOUBLE QUOTATION MARK - 0xD3: 0x201D # RIGHT DOUBLE QUOTATION MARK - 0xD4: 0x2018 # LEFT SINGLE QUOTATION MARK - 0xD5: 0x2019 # RIGHT SINGLE QUOTATION MARK - 0xD6: 0x00F7 # DIVISION SIGN - 0xD7: 0x25CA # LOZENGE - 0xD8: 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS - 0xD9: 0x0178 # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0xDA: 0x2044 # FRACTION SLASH - 0xDB: 0x20AC # EURO SIGN - 0xDC: 0x2039 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0xDD: 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0xDE: 0xFB01 # LATIN SMALL LIGATURE FI - 0xDF: 0xFB02 # LATIN SMALL LIGATURE FL - 0xE0: 0x2021 # DOUBLE DAGGER - 0xE1: 0x00B7 # MIDDLE DOT - 0xE2: 0x201A # SINGLE LOW-9 QUOTATION MARK - 0xE3: 0x201E # DOUBLE LOW-9 QUOTATION MARK - 0xE4: 0x2030 # PER MILLE SIGN - 0xE5: 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0xE6: 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0xE7: 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE - 0xE8: 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS - 0xE9: 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE - 0xEA: 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE - 0xEB: 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0xEC: 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS - 0xED: 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE - 0xEE: 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE - 0xEF: 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0xF0: 0xF8FF # Apple logo - 0xF1: 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE - 0xF2: 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE - 0xF3: 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0xF4: 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE - 0xF5: 0x0131 # LATIN SMALL LETTER DOTLESS I - 0xF6: 0x02C6 # MODIFIER LETTER CIRCUMFLEX ACCENT - 0xF7: 0x02DC # SMALL TILDE - 0xF8: 0x00AF # MACRON - 0xF9: 0x02D8 # BREVE - 0xFA: 0x02D9 # DOT ABOVE - 0xFB: 0x02DA # RING ABOVE - 0xFC: 0x00B8 # CEDILLA - 0xFD: 0x02DD # DOUBLE ACUTE ACCENT - 0xFE: 0x02DB # OGONEK - 0xFF: 0x02C7 # CARON - -exports.FROM_UNICODE = require('./utils').invert(exports.TO_UNICODE) \ No newline at end of file diff --git a/lib/font/subset.coffee b/lib/font/subset.coffee index cec4478..f3d3627 100644 --- a/lib/font/subset.coffee +++ b/lib/font/subset.coffee @@ -1,30 +1,37 @@ CmapTable = require './tables/cmap' -MacRoman = require './macroman' utils = require './utils' class Subset constructor: (@font) -> @subset = {} + @unicodes = {} + @next = 33 # PDFs don't like character codes between 0 and 32 use: (character) -> # if given a string, add each character if typeof character is 'string' for i in [0...character.length] @use character.charCodeAt(i) - else - @subset[MacRoman.FROM_UNICODE[character]] = character + + return + + if not @unicodes[character] + @subset[@next] = character + @unicodes[character] = @next++ encodeText: (text) -> + # encodes UTF-8 text for this subset. Returned + # text may not look correct, but it is. string = '' for i in [0...text.length] - char = MacRoman.FROM_UNICODE[text.charCodeAt(i)] + char = @unicodes[text.charCodeAt(i)] string += String.fromCharCode(char) return string cmap: -> # generate the cmap table for this subset - unicodeCmap = @font.cmap.unicode.codeMap + unicodeCmap = @font.cmap.tables[0].codeMap mapping = {} for roman, unicode of @subset mapping[roman] = unicodeCmap[unicode] @@ -33,7 +40,7 @@ class Subset glyphIDs: -> # collect glyph ids for this subset - unicodeCmap = @font.cmap.unicode.codeMap + unicodeCmap = @font.cmap.tables[0].codeMap ret = [0] for roman, unicode of @subset val = unicodeCmap[unicode] @@ -60,7 +67,7 @@ class Subset encode: -> # generate the Cmap for this subset - cmap = CmapTable.encode @cmap() + cmap = CmapTable.encode @cmap(), 'unicode' glyphs = @glyphsFor @glyphIDs() # compute old2new and new2old mapping tables @@ -82,9 +89,10 @@ class Subset name = @font.name.encode() # store for use later - @cmap = cmap.indexes @postscriptName = name.postscriptName - @charWidths = (@font.hmtx.forGlyph(id).advance for id in oldIDs) + @cmap = {} + for code, ids of cmap.charMap + @cmap[code] = ids.old tables = cmap: cmap.table diff --git a/lib/font/tables/cmap.coffee b/lib/font/tables/cmap.coffee index 06e401c..1d0aa12 100644 --- a/lib/font/tables/cmap.coffee +++ b/lib/font/tables/cmap.coffee @@ -17,7 +17,7 @@ class CmapTable extends Table return true - @encode: (charmap, encoding = 0) -> + @encode: (charmap, encoding = 'macroman') -> result = CmapEntry.encode(charmap, encoding) table = new Data @@ -73,16 +73,18 @@ class CmapEntry @codeMap[code] = glyphId & 0xFFFF - @encode: (charmap, format) -> + @encode: (charmap, encoding) -> subtable = new Data - switch format - when 0 # Mac Roman + codes = Object.keys(charmap).sort (a, b) -> a - b + + switch encoding + when 'macroman' id = 0 indexes = (0 for i in [0...256]) map = { 0: 0 } codeMap = {} - for code in Object.keys(charmap).sort() + for code in codes map[charmap[code]] ?= ++id codeMap[code] = old: charmap[code] @@ -100,11 +102,88 @@ class CmapEntry result = charMap: codeMap - indexes: indexes subtable: subtable.data maxGlyphID: id + 1 - when 4 # Unicode - TODO: implement - return + when 'unicode' + startCodes = [] + endCodes = [] + nextID = 0 + map = {} + charMap = {} + last = diff = null + + for code in codes + old = charmap[code] + map[old] ?= ++nextID + charMap[code] = + old: old + new: map[old] + + delta = map[old] - code + if not last? or delta isnt diff + endCodes.push last if last + startCodes.push code + diff = delta + + last = code + + endCodes.push last if last + endCodes.push 0xFFFF + startCodes.push 0xFFFF + + segCount = startCodes.length + segCountX2 = segCount * 2 + searchRange = 2 * Math.pow(Math.log(segCount) / Math.LN2, 2) + entrySelector = Math.log(searchRange / 2) / Math.LN2 + rangeShift = 2 * segCount - searchRange + + deltas = [] + rangeOffsets = [] + glyphIDs = [] + + for startCode, i in startCodes + endCode = endCodes[i] + + if startCode is 0xFFFF + deltas.push 0 + rangeOffsets.push 0 + break + + startGlyph = charMap[startCode].new + if startCode - startGlyph >= 0x8000 + deltas.push 0 + rangeOffsets.push 2 * (glyphIDs.length + segCount - i) + + for code in [startCode..endCode] + glyphIDs.push charMap[code].new + + else + deltas.push startGlyph - startCode + rangeOffsets.push 0 + + subtable.writeUInt16 3 # platformID + subtable.writeUInt16 1 # encodingID + subtable.writeUInt32 12 # offset + subtable.writeUInt16 4 # format + subtable.writeUInt16 16 + segCount * 8 + glyphIDs.length * 2 # length + subtable.writeUInt16 0 # language + subtable.writeUInt16 segCountX2 + subtable.writeUInt16 searchRange + subtable.writeUInt16 entrySelector + subtable.writeUInt16 rangeShift + + subtable.writeUInt16 code for code in endCodes + subtable.writeUInt16 0 # reserved value + subtable.writeUInt16 code for code in startCodes + + subtable.writeUInt16 delta for delta in deltas + subtable.writeUInt16 offset for offset in rangeOffsets + subtable.writeUInt16 id for id in glyphIDs + + result = + charMap: charMap + subtable: subtable.data + maxGlyphID: nextID + 1 module.exports = CmapTable \ No newline at end of file