Get rid of PDFObject.s and use JS String object to represent PDF strings

Also improves escaping based on the spec, and detects unicode strings automatically, which are converted to UTF16-BE.
2025-12-08 20:15:54 +00:00 · 2015-03-19 20:22:44 -07:00 · 2015-03-19 20:22:44 -07:00 · 22a9bfdfab
commit 22a9bfdfab
parent 1e0e7f9567
3 changed files with 69 additions and 54 deletions
--- a/lib/document.coffee
+++ b/lib/document.coffee
@ -172,7 +172,7 @@ class PDFDocument extends stream.Readable
    @_info = @ref()
    for key, val of @info
      if typeof val is 'string'
-        val = PDFObject.s val, true
+        val = new String val
              
      @_info.data[key] = val
        
--- a/lib/mixins/annotations.coffee
+++ b/lib/mixins/annotations.coffee
@ -1,5 +1,3 @@
-PDFObject = require '../object'
-
 module.exports = 
  annotate: (x, y, w, h, options) ->
    options.Type = 'Annot'
@ -9,7 +7,7 @@ module.exports =
    delete options.color
    
    if typeof options.Dest is 'string'
-      options.Dest = PDFObject.s options.Dest
+      options.Dest = new String options.Dest
    
    # Capitalize keys  
    for key, val of options
@ -22,7 +20,7 @@ module.exports =
    
  note: (x, y, w, h, contents, options = {}) ->
    options.Subtype = 'Text'
-    options.Contents = PDFObject.s contents, true
+    options.Contents = new String contents
    options.Name = 'Comment'
    options.color ?= [243, 223, 92]
    @annotate x, y, w, h, options
@ -31,7 +29,7 @@ module.exports =
    options.Subtype = 'Link'
    options.A = @ref
      S: 'URI'
-      URI: PDFObject.s url
+      URI: new String url
      
    options.A.end()
    @annotate x, y, w, h, options
@ -39,7 +37,7 @@ module.exports =
  _markup: (x, y, w, h, options = {}) ->
    [x1, y1, x2, y2] = @_convertRect x, y, w, h
    options.QuadPoints = [x1, y2, x2, y2, x1, y1, x2, y1]
-    options.Contents = PDFObject.s ''
+    options.Contents = new String
    @annotate x, y, w, h, options
    
  highlight: (x, y, w, h, options = {}) ->
@ -57,24 +55,24 @@ module.exports =
    
  lineAnnotation: (x1, y1, x2, y2, options = {}) ->
    options.Subtype = 'Line'
-    options.Contents = PDFObject.s ''
+    options.Contents = new String
    options.L = [x1, @page.height - y1, x2, @page.height - y2]
    @annotate x1, y1, x2, y2, options
    
  rectAnnotation: (x, y, w, h, options = {}) ->
    options.Subtype = 'Square'
-    options.Contents = PDFObject.s ''
+    options.Contents = new String
    @annotate x, y, w, h, options
    
  ellipseAnnotation: (x, y, w, h, options = {}) ->
    options.Subtype = 'Circle'
-    options.Contents = PDFObject.s ''
+    options.Contents = new String
    @annotate x, y, w, h, options
    
  textAnnotation: (x, y, w, h, text, options = {}) ->
    options.Subtype = 'FreeText'
-    options.Contents = PDFObject.s text, true
-    options.DA = PDFObject.s ''
+    options.Contents = new String text
+    options.DA = new String
    @annotate x, y, w, h, options
    
  _convertRect: (x1, y1, w, h) ->
--- a/lib/object.coffee
+++ b/lib/object.coffee
@ -6,30 +6,75 @@ By Devon Govett
 class PDFObject
  pad = (str, length) ->
    (Array(length + 1).join('0') + str).slice(-length)
+    
+  escapableRe = /[\n\r\t\b\f\(\)\\]/g
+  escapable = 
+    '\n': '\\n'
+    '\r': '\\r'
+    '\t': '\\t'
+    '\b': '\\b'
+    '\f': '\\f'
+    '\\': '\\\\'
+    '(': '\\('
+    ')': '\\)'
+    
+  # Convert little endian UTF-16 to big endian
+  swapBytes = (buff) ->
+    l = buff.length
+    if l & 0x01
+      throw new Error("Buffer length must be even")
+    else
+      for i in [0...l - 1] by 2
+        a = buff[i]
+        buff[i] = buff[i + 1]
+        buff[i+1] = a
+        
+    return buff
  
  @convert: (object) ->
-    if Array.isArray object
-      items = (PDFObject.convert e for e in object).join(' ')
-      '[' + items + ']'
-      
-    else if typeof object is 'string'
+    # String literals are converted to the PDF name type
+    if typeof object is 'string'
      '/' + object
+      
+    # String objects are converted to PDF strings (UTF-16)
+    else if object instanceof String
+      # Escape characters as required by the spec
+      string = object.replace escapableRe, (c) ->
+        return escapable[c]
+        
+      # Detect if this is a unicode string
+      isUnicode = false
+      for i in [0...string.length] by 1
+        if string.charCodeAt(i) > 0x7f
+          isUnicode = true
+          break
+          
+      # If so, encode it as big endian UTF-16
+      if isUnicode
+        string = swapBytes(new Buffer('\ufeff' + string, 'utf16le')).toString('binary')
+        
+      '(' + string + ')'

+    # Buffers are converted to PDF hex strings
    else if Buffer.isBuffer(object)
-      object.toString()
+      '<' + object.toString('hex') + '>'
 	            
    else if object instanceof PDFReference
      object.toString()
-      
+        
    else if object instanceof Date
-      '(D:' + pad(object.getUTCFullYear(), 4) + 
-          pad(object.getUTCMonth(), 2) + 
-          pad(object.getUTCDate(), 2) + 
-          pad(object.getUTCHours(), 2) + 
-          pad(object.getUTCMinutes(), 2) + 
-          pad(object.getUTCSeconds(), 2) + 
+      '(D:' + pad(object.getUTCFullYear(), 4) +
+              pad(object.getUTCMonth(), 2) + 
+              pad(object.getUTCDate(), 2) + 
+              pad(object.getUTCHours(), 2) + 
+              pad(object.getUTCMinutes(), 2) + 
+              pad(object.getUTCSeconds(), 2) + 
      'Z)'
      
+    else if Array.isArray object
+      items = (PDFObject.convert e for e in object).join(' ')
+      '[' + items + ']'
+        
    else if {}.toString.call(object) is '[object Object]'
      out = ['<<']
      for key, val of object
@ -40,34 +85,6 @@ class PDFObject
      
    else 
      '' + object
-      
-  # Convert Big-endian UCS-2 to Little-endian to support most PDFRreaders
-  swapBytes = (buff) ->
-    l = buff.length
-    if l & 0x01
-      throw new Error("Buffer length must be even")
-    else
-      for i in [0...l - 1] by 2
-        a = buff[i]
-        buff[i] = buff[i+1]
-        buff[i+1] = a 
-    return buff
-      
-  @s: (string, swap = false) ->
-    string = string.replace(/\\/g, '\\\\\\\\')
-      .replace(/\(/g, '\\(')
-      .replace(/\)/g, '\\)')
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&amp;/g, '&')
-      
-    if swap
-      string = swapBytes(new Buffer('\ufeff' + string, 'ucs-2')).toString('binary')
-    
-    return {
-      isString: yes
-      toString: -> string
-    }
-    
+          
 module.exports = PDFObject
 PDFReference = require './reference'