From ef6578a48628fcc6017a6782b3f147289676a35e Mon Sep 17 00:00:00 2001 From: Devon Govett Date: Sun, 16 Feb 2014 02:00:18 -0800 Subject: [PATCH] Rewrite the line wrapper to use an implementation of the Unicode Line Breaking Algorithm Should solve a huge number of issues. The regular express based word matching from before was not good. It was overzealous and caused the most bugs of anything in the project. It also didn't work at all for languages like Chinese which don't have spaces between words. The new wrapping algorithm supports all of this. It is a separate module at http://github.com/devongovett/linebreak. --- lib/line_wrapper.coffee | 99 ++++++++++++++++++++++------------------- lib/mixins/text.coffee | 11 ++--- package.json | 34 +++++++++----- 3 files changed, 79 insertions(+), 65 deletions(-) diff --git a/lib/line_wrapper.coffee b/lib/line_wrapper.coffee index f68821f..c3f5d96 100644 --- a/lib/line_wrapper.coffee +++ b/lib/line_wrapper.coffee @@ -1,6 +1,5 @@ -# This regular expression is used for splitting a string into wrappable words -WORD_RE = /([^ ,\/!.?:;\-\n]*[ ,\/!.?:;\-]*)|\n/g {EventEmitter} = require 'events' +LineBreaker = require 'linebreak' class LineWrapper extends EventEmitter constructor: (@document) -> @@ -23,7 +22,7 @@ class LineWrapper extends EventEmitter options.align = align @lastLine = false - wrap: (paragraphs, options) -> + wrap: (text, options) -> width = @document.widthOfString.bind(@document) indent = options.indent or 0 charSpacing = options.characterSpacing or 0 @@ -46,57 +45,65 @@ class LineWrapper extends EventEmitter # word width cache wordWidths = {} - @emit 'sectionStart', options, this - for text, i in paragraphs - @emit 'firstLine', options, this + breaker = new LineBreaker(text) + last = null + buffer = '' + textWidth = 0 + wc = 0 + + emitLine = => + options.textWidth = textWidth + wordSpacing * (wc - 1) + options.wordCount = wc + options.lineWidth = @lineWidth + @emit 'line', buffer, options, this + + spaceLeft = @lineWidth - indent + + while bk = breaker.nextBreak() + if not last? or last.required + @emit 'firstLine', options, this + + word = text.slice(last?.position or 0, bk.position) + w = wordWidths[word] ?= width(word, options) + charSpacing + wordSpacing - # split the line into words - words = text.match(WORD_RE) or [text] - - # space left on the line to fill with words - spaceLeft = @lineWidth - indent - options.lineWidth = spaceLeft - - len = words.length - buffer = '' - wc = 0 - - for word, wi in words - w = wordWidths[word] ?= width(word, options) + charSpacing + wordSpacing - - if w > spaceLeft or word is '\n' - options.textWidth = width(buffer.trim(), options) + wordSpacing * (wc - 1) - @emit 'line', buffer.trim(), options, this - - # if we've reached the edge of the page, - # continue on a new page or column - if @document.y > @maxY - @nextSection() + if w <= spaceLeft + buffer += word + lineWidth += w + wc++ + if bk.required or w > spaceLeft + if bk.required + @emit 'lastLine', options, this + + emitLine() + + # if we've reached the edge of the page, + # continue on a new page or column + if @document.y > @maxY + @nextSection() + + # reset the space left and buffer + if bk.required + spaceLeft = @lineWidth - indent + buffer = '' + lineWidth = 0 + wc = 0 + else # reset the space left and buffer spaceLeft = @lineWidth - w - buffer = if word is '\n' then '' else word + buffer = word + lineWidth = w wc = 1 - - else - # add the word to the buffer - spaceLeft -= w - buffer += word - wc++ - - # add the last line - @lastLine = true - @emit 'lastLine', options, this - options.textWidth = width(buffer.trim(), options) + wordSpacing * (wc - 1) - @emit 'line', buffer.trim(), options, this + else + spaceLeft -= w + + last = bk - # make sure that the first line of a paragraph is never by - # itself at the bottom of a page (orphans) - nextY = @document.y + @document.currentLineHeight(true) - if i < paragraphs.length - 1 and nextY > @maxY - @nextSection() + if wc > 0 + @emit 'lastLine', options, this + emitLine() @emit 'sectionEnd', options, this diff --git a/lib/mixins/text.coffee b/lib/mixins/text.coffee index 533c21b..f9a2236 100644 --- a/lib/mixins/text.coffee +++ b/lib/mixins/text.coffee @@ -1,4 +1,3 @@ -WORD_RE = /([^ ,\/!.?:;\-\n]*[ ,\/!.?:;\-]*)|\n/g LineWrapper = require '../line_wrapper' module.exports = @@ -34,18 +33,16 @@ module.exports = # if the wordSpacing option is specified, remove multiple consecutive spaces if options.wordSpacing text = text.replace(/\s{2,}/g, ' ') - - paragraphs = text.split '\n' - + # word wrapping if options.width wrapper = new LineWrapper(this) wrapper.on 'line', @_line.bind(this) - wrapper.wrap(paragraphs, options) + wrapper.wrap text, options # render paragraphs as single lines else - @_line line, options for line in paragraphs + @_line line, options for line in text.split '\n' return this @@ -97,7 +94,7 @@ module.exports = @x -= pos wrapper.lineWidth += pos - wrapper.wrap(items, options) + wrapper.wrap items.join('\n'), options @x -= indent return this diff --git a/package.json b/package.json index cda2ed7..79a65a6 100644 --- a/package.json +++ b/package.json @@ -1,29 +1,39 @@ { "name": "pdfkit", "description": "A PDF generation library for Node.js", - "keywords": ["pdf", "pdf writer", "pdf generator", "graphics", "document", "vector"], + "keywords": [ + "pdf", + "pdf writer", + "pdf generator", + "graphics", + "document", + "vector" + ], "version": "0.2.8", "homepage": "http://pdfkit.org/", "author": { - "name": "Devon Govett", - "email": "devongovett@gmail.com", - "url": "http://badassjs.com/" + "name": "Devon Govett", + "email": "devongovett@gmail.com", + "url": "http://badassjs.com/" }, "repository": { - "type": "git", - "url": "https://github.com/devongovett/pdfkit.git" + "type": "git", + "url": "https://github.com/devongovett/pdfkit.git" }, "bugs": "http://github.com/devongovett/pdfkit/issues", "devDependencies": { - "coffee-script": ">=1.0.1" + "coffee-script": ">=1.0.1" }, "dependencies": { - "png-js": ">=0.1.0" + "png-js": ">=0.1.0", + "linebreak": "~0.1.0" }, "scripts": { - "prepublish": "coffee -o js -c lib/ && cp -r lib/font/data js/font/data", - "postpublish": "rm -rf ./js" + "prepublish": "coffee -o js -c lib/ && cp -r lib/font/data js/font/data", + "postpublish": "rm -rf ./js" }, "main": "./js/document", - "engine": [ "node >= v0.6.0" ] -} \ No newline at end of file + "engine": [ + "node >= v0.6.0" + ] +}