Rewrite the line wrapper to use an implementation of the Unicode Line Breaking Algorithm

Should solve a huge number of issues. The regular express based word matching from before was not good. It was overzealous and caused the most bugs of anything in the project. It also didn't work at all for languages like Chinese which don't have spaces between words.

The new wrapping algorithm supports all of this. It is a separate module at http://github.com/devongovett/linebreak.
This commit is contained in:
Devon Govett 2014-02-16 02:00:18 -08:00
parent 897630ecb9
commit ef6578a486
3 changed files with 79 additions and 65 deletions

View File

@ -1,6 +1,5 @@
# This regular expression is used for splitting a string into wrappable words
WORD_RE = /([^ ,\/!.?:;\-\n]*[ ,\/!.?:;\-]*)|\n/g
{EventEmitter} = require 'events'
LineBreaker = require 'linebreak'
class LineWrapper extends EventEmitter
constructor: (@document) ->
@ -23,7 +22,7 @@ class LineWrapper extends EventEmitter
options.align = align
@lastLine = false
wrap: (paragraphs, options) ->
wrap: (text, options) ->
width = @document.widthOfString.bind(@document)
indent = options.indent or 0
charSpacing = options.characterSpacing or 0
@ -46,57 +45,65 @@ class LineWrapper extends EventEmitter
# word width cache
wordWidths = {}
@emit 'sectionStart', options, this
for text, i in paragraphs
@emit 'firstLine', options, this
breaker = new LineBreaker(text)
last = null
buffer = ''
textWidth = 0
wc = 0
emitLine = =>
options.textWidth = textWidth + wordSpacing * (wc - 1)
options.wordCount = wc
options.lineWidth = @lineWidth
@emit 'line', buffer, options, this
spaceLeft = @lineWidth - indent
while bk = breaker.nextBreak()
if not last? or last.required
@emit 'firstLine', options, this
word = text.slice(last?.position or 0, bk.position)
w = wordWidths[word] ?= width(word, options) + charSpacing + wordSpacing
# split the line into words
words = text.match(WORD_RE) or [text]
# space left on the line to fill with words
spaceLeft = @lineWidth - indent
options.lineWidth = spaceLeft
len = words.length
buffer = ''
wc = 0
for word, wi in words
w = wordWidths[word] ?= width(word, options) + charSpacing + wordSpacing
if w > spaceLeft or word is '\n'
options.textWidth = width(buffer.trim(), options) + wordSpacing * (wc - 1)
@emit 'line', buffer.trim(), options, this
# if we've reached the edge of the page,
# continue on a new page or column
if @document.y > @maxY
@nextSection()
if w <= spaceLeft
buffer += word
lineWidth += w
wc++
if bk.required or w > spaceLeft
if bk.required
@emit 'lastLine', options, this
emitLine()
# if we've reached the edge of the page,
# continue on a new page or column
if @document.y > @maxY
@nextSection()
# reset the space left and buffer
if bk.required
spaceLeft = @lineWidth - indent
buffer = ''
lineWidth = 0
wc = 0
else
# reset the space left and buffer
spaceLeft = @lineWidth - w
buffer = if word is '\n' then '' else word
buffer = word
lineWidth = w
wc = 1
else
# add the word to the buffer
spaceLeft -= w
buffer += word
wc++
# add the last line
@lastLine = true
@emit 'lastLine', options, this
options.textWidth = width(buffer.trim(), options) + wordSpacing * (wc - 1)
@emit 'line', buffer.trim(), options, this
else
spaceLeft -= w
last = bk
# make sure that the first line of a paragraph is never by
# itself at the bottom of a page (orphans)
nextY = @document.y + @document.currentLineHeight(true)
if i < paragraphs.length - 1 and nextY > @maxY
@nextSection()
if wc > 0
@emit 'lastLine', options, this
emitLine()
@emit 'sectionEnd', options, this

View File

@ -1,4 +1,3 @@
WORD_RE = /([^ ,\/!.?:;\-\n]*[ ,\/!.?:;\-]*)|\n/g
LineWrapper = require '../line_wrapper'
module.exports =
@ -34,18 +33,16 @@ module.exports =
# if the wordSpacing option is specified, remove multiple consecutive spaces
if options.wordSpacing
text = text.replace(/\s{2,}/g, ' ')
paragraphs = text.split '\n'
# word wrapping
if options.width
wrapper = new LineWrapper(this)
wrapper.on 'line', @_line.bind(this)
wrapper.wrap(paragraphs, options)
wrapper.wrap text, options
# render paragraphs as single lines
else
@_line line, options for line in paragraphs
@_line line, options for line in text.split '\n'
return this
@ -97,7 +94,7 @@ module.exports =
@x -= pos
wrapper.lineWidth += pos
wrapper.wrap(items, options)
wrapper.wrap items.join('\n'), options
@x -= indent
return this

View File

@ -1,29 +1,39 @@
{
"name": "pdfkit",
"description": "A PDF generation library for Node.js",
"keywords": ["pdf", "pdf writer", "pdf generator", "graphics", "document", "vector"],
"keywords": [
"pdf",
"pdf writer",
"pdf generator",
"graphics",
"document",
"vector"
],
"version": "0.2.8",
"homepage": "http://pdfkit.org/",
"author": {
"name": "Devon Govett",
"email": "devongovett@gmail.com",
"url": "http://badassjs.com/"
"name": "Devon Govett",
"email": "devongovett@gmail.com",
"url": "http://badassjs.com/"
},
"repository": {
"type": "git",
"url": "https://github.com/devongovett/pdfkit.git"
"type": "git",
"url": "https://github.com/devongovett/pdfkit.git"
},
"bugs": "http://github.com/devongovett/pdfkit/issues",
"devDependencies": {
"coffee-script": ">=1.0.1"
"coffee-script": ">=1.0.1"
},
"dependencies": {
"png-js": ">=0.1.0"
"png-js": ">=0.1.0",
"linebreak": "~0.1.0"
},
"scripts": {
"prepublish": "coffee -o js -c lib/ && cp -r lib/font/data js/font/data",
"postpublish": "rm -rf ./js"
"prepublish": "coffee -o js -c lib/ && cp -r lib/font/data js/font/data",
"postpublish": "rm -rf ./js"
},
"main": "./js/document",
"engine": [ "node >= v0.6.0" ]
}
"engine": [
"node >= v0.6.0"
]
}