Get rid of PDFObject.s and use JS String object to represent PDF strings

Also improves escaping based on the spec, and detects unicode strings automatically, which are converted to UTF16-BE.
This commit is contained in:
Devon Govett 2015-03-19 20:22:44 -07:00
parent 1e0e7f9567
commit 22a9bfdfab
3 changed files with 69 additions and 54 deletions

View File

@ -172,7 +172,7 @@ class PDFDocument extends stream.Readable
@_info = @ref()
for key, val of @info
if typeof val is 'string'
val = PDFObject.s val, true
val = new String val
@_info.data[key] = val

View File

@ -1,5 +1,3 @@
PDFObject = require '../object'
module.exports =
annotate: (x, y, w, h, options) ->
options.Type = 'Annot'
@ -9,7 +7,7 @@ module.exports =
delete options.color
if typeof options.Dest is 'string'
options.Dest = PDFObject.s options.Dest
options.Dest = new String options.Dest
# Capitalize keys
for key, val of options
@ -22,7 +20,7 @@ module.exports =
note: (x, y, w, h, contents, options = {}) ->
options.Subtype = 'Text'
options.Contents = PDFObject.s contents, true
options.Contents = new String contents
options.Name = 'Comment'
options.color ?= [243, 223, 92]
@annotate x, y, w, h, options
@ -31,7 +29,7 @@ module.exports =
options.Subtype = 'Link'
options.A = @ref
S: 'URI'
URI: PDFObject.s url
URI: new String url
options.A.end()
@annotate x, y, w, h, options
@ -39,7 +37,7 @@ module.exports =
_markup: (x, y, w, h, options = {}) ->
[x1, y1, x2, y2] = @_convertRect x, y, w, h
options.QuadPoints = [x1, y2, x2, y2, x1, y1, x2, y1]
options.Contents = PDFObject.s ''
options.Contents = new String
@annotate x, y, w, h, options
highlight: (x, y, w, h, options = {}) ->
@ -57,24 +55,24 @@ module.exports =
lineAnnotation: (x1, y1, x2, y2, options = {}) ->
options.Subtype = 'Line'
options.Contents = PDFObject.s ''
options.Contents = new String
options.L = [x1, @page.height - y1, x2, @page.height - y2]
@annotate x1, y1, x2, y2, options
rectAnnotation: (x, y, w, h, options = {}) ->
options.Subtype = 'Square'
options.Contents = PDFObject.s ''
options.Contents = new String
@annotate x, y, w, h, options
ellipseAnnotation: (x, y, w, h, options = {}) ->
options.Subtype = 'Circle'
options.Contents = PDFObject.s ''
options.Contents = new String
@annotate x, y, w, h, options
textAnnotation: (x, y, w, h, text, options = {}) ->
options.Subtype = 'FreeText'
options.Contents = PDFObject.s text, true
options.DA = PDFObject.s ''
options.Contents = new String text
options.DA = new String
@annotate x, y, w, h, options
_convertRect: (x1, y1, w, h) ->

View File

@ -6,30 +6,75 @@ By Devon Govett
class PDFObject
pad = (str, length) ->
(Array(length + 1).join('0') + str).slice(-length)
escapableRe = /[\n\r\t\b\f\(\)\\]/g
escapable =
'\n': '\\n'
'\r': '\\r'
'\t': '\\t'
'\b': '\\b'
'\f': '\\f'
'\\': '\\\\'
'(': '\\('
')': '\\)'
# Convert little endian UTF-16 to big endian
swapBytes = (buff) ->
l = buff.length
if l & 0x01
throw new Error("Buffer length must be even")
else
for i in [0...l - 1] by 2
a = buff[i]
buff[i] = buff[i + 1]
buff[i+1] = a
return buff
@convert: (object) ->
if Array.isArray object
items = (PDFObject.convert e for e in object).join(' ')
'[' + items + ']'
else if typeof object is 'string'
# String literals are converted to the PDF name type
if typeof object is 'string'
'/' + object
# String objects are converted to PDF strings (UTF-16)
else if object instanceof String
# Escape characters as required by the spec
string = object.replace escapableRe, (c) ->
return escapable[c]
# Detect if this is a unicode string
isUnicode = false
for i in [0...string.length] by 1
if string.charCodeAt(i) > 0x7f
isUnicode = true
break
# If so, encode it as big endian UTF-16
if isUnicode
string = swapBytes(new Buffer('\ufeff' + string, 'utf16le')).toString('binary')
'(' + string + ')'
# Buffers are converted to PDF hex strings
else if Buffer.isBuffer(object)
object.toString()
'<' + object.toString('hex') + '>'
else if object instanceof PDFReference
object.toString()
else if object instanceof Date
'(D:' + pad(object.getUTCFullYear(), 4) +
pad(object.getUTCMonth(), 2) +
pad(object.getUTCDate(), 2) +
pad(object.getUTCHours(), 2) +
pad(object.getUTCMinutes(), 2) +
pad(object.getUTCSeconds(), 2) +
'(D:' + pad(object.getUTCFullYear(), 4) +
pad(object.getUTCMonth(), 2) +
pad(object.getUTCDate(), 2) +
pad(object.getUTCHours(), 2) +
pad(object.getUTCMinutes(), 2) +
pad(object.getUTCSeconds(), 2) +
'Z)'
else if Array.isArray object
items = (PDFObject.convert e for e in object).join(' ')
'[' + items + ']'
else if {}.toString.call(object) is '[object Object]'
out = ['<<']
for key, val of object
@ -40,34 +85,6 @@ class PDFObject
else
'' + object
# Convert Big-endian UCS-2 to Little-endian to support most PDFRreaders
swapBytes = (buff) ->
l = buff.length
if l & 0x01
throw new Error("Buffer length must be even")
else
for i in [0...l - 1] by 2
a = buff[i]
buff[i] = buff[i+1]
buff[i+1] = a
return buff
@s: (string, swap = false) ->
string = string.replace(/\\/g, '\\\\\\\\')
.replace(/\(/g, '\\(')
.replace(/\)/g, '\\)')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
if swap
string = swapBytes(new Buffer('\ufeff' + string, 'ucs-2')).toString('binary')
return {
isString: yes
toString: -> string
}
module.exports = PDFObject
PDFReference = require './reference'