added numeric parser, modularized parsers

This commit is contained in:
Alexander Sulfrian 2011-01-29 13:58:18 +01:00
parent fec176e23b
commit 0d36ce0799
3 changed files with 338 additions and 258 deletions

230
lib/binaryParser.js Normal file
View File

@ -0,0 +1,230 @@
var BinaryParser = function(config) {
config = config || {};
this.encoding = config.encoding || 'utf8';
};
var p = BinaryParser.prototype;
var parseBits = function(data, bits, offset, callback) {
offset = offset || 0;
callback = callback || function(lastValue, newValue, bits) { return (lastValue * Math.pow(2, bits)) + newValue; };
var offsetBytes = offset >> 3;
// read first (maybe partial) byte
var mask = 0xff;
var firstBits = 8 - (offset % 8);
if (bits < firstBits) {
mask = (0xff << (8 - bits)) & 0xff;
firstBits = bits;
}
if (offset) {
mask = mask >> (offset % 8);
}
var result = 0;
if ((offset % 8) + bits > 8) {
result = callback(0, data[offsetBytes] & mask, firstBits);
}
// read bytes
var bytes = (bits + offset) >> 3;
for (var i = offsetBytes + 1; i < bytes; i++) {
result = callback(result, data[i], 8);
}
// bits to read, that are not a complete byte
var lastBits = (bits + offset) % 8;
if (lastBits > 0) {
result = callback(result, data[bytes] >> (8 - lastBits), lastBits);
}
return result;
}
var parseFloat = function(data, precisionBits, exponentBits) {
var bias = Math.pow(2, exponentBits - 1) - 1;
var sign = parseBits(data, 1);
var exponent = parseBits(data, exponentBits, 1);
if (exponent == 0)
return 0;
// parse mantissa
var precisionBitsCounter = 1;
var parsePrecisionBits = function(lastValue, newValue, bits) {
if (lastValue == 0) {
lastValue = 1;
}
for (var i = 1; i <= bits; i++) {
precisionBitsCounter /= 2;
if ((newValue & (0x1 << (bits - i))) > 0) {
lastValue += precisionBitsCounter;
}
}
return lastValue;
};
var mantissa = parseBits(data, precisionBits, exponentBits + 1, parsePrecisionBits);
// special cases
if (exponent == (Math.pow(2, exponentBits + 1) - 1)) {
if (mantissa == 0) {
return (sign == 0) ? Infinity : -Infinity;
}
return NaN;
}
// normale number
return ((sign == 0) ? 1 : -1) * Math.pow(2, exponent - bias) * mantissa;
};
p.parseBool = function(value) {
console.log(JSON.stringify(value));
return (parseBits(value, 16) == 0);
}
p.parseInt16 = function(value) {
if (parseBits(value, 1) == 1) {
return -1 * (Math.pow(2, 15) - parseBits(value, 15, 1));
}
return parseBits(value, 15, 1);
}
p.parseInt32 = function(value) {
if (parseBits(value, 1) == 1) {
return -1 * (Math.pow(2, 31) - parseBits(value, 31, 1));
}
return parseBits(value, 31, 1);
}
p.parseInt64 = function(value) {
if (parseBits(value, 1) == 1) {
return -1 * (Math.pow(2, 63) - parseBits(value, 63, 1));
}
return parseBits(value, 63, 1);
}
p.parseFloat32 = function(value) {
return parseFloat(value, 23, 8);
}
p.parseFloat64 = function(value) {
return parseFloat(value, 52, 11);
}
p.parseNumeric = function(value) {
var sign = parseBits(value, 16, 32);
if (sign == 0xc000) {
return NaN;
}
var weight = Math.pow(10000, parseBits(value, 16, 16));
var result = 0;
var digits = new Array();
var ndigits = parseBits(value, 16);
for (var i = 0; i < ndigits; i++) {
result += parseBits(value, 16, 64 + (16 * i)) * weight;
weight /= 10000;
}
var scale = Math.pow(10, parseBits(value, 16, 48));
return ((sign == 0) ? 1 : -1) * Math.round(result * scale) / scale;
}
p.parseDate = function(value) {
var sign = parseBits(value, 1);
var rawValue = parseBits(value, 63, 1);
// discard usecs and shift from 2000 to 1970
var result = new Date((((sign == 0) ? 1 : -1) * rawValue / 1000) + 946684800000);
// add microseconds to the date
result.usec = rawValue % 1000;
result.getMicroSeconds = function() {
return this.usec;
};
result.setMicroSeconds = function(value) {
this.usec = value;
};
result.getUTCMicroSeconds = function() {
return this.usec;
};
return result;
}
p.parseIntArray = p.parseStringArray = function(value) {
var dim = parseBits(value, 32);
var flags = parseBits(value, 32, 32);
var elementType = parseBits(value, 32, 64);
var offset = 96;
var dims = new Array();
for (var i = 0; i < dim; i++) {
// parse dimension
dims[i] = parseBits(value, 32, offset);
offset += 32;
// ignore lower bounds
offset += 32;
};
var parseElement = function(elementType) {
// parse content length
var length = parseBits(value, 32, offset);
offset += 32;
// parse null values
if (length == 0xffffffff) {
return null;
}
if (elementType == 0x17) {
// int
var result = parseBits(value, length * 8, offset);
offset += length * 8;
return result;
}
else if (elementType == 0x19) {
// string
var result = value.toString('utf8', offset >> 3, (offset += (length << 3)) >> 3);
return result;
}
else {
console.log("ERROR: ElementType not implemented: " + elementType);
}
};
var parse = function(dimension, elementType) {
var array = new Array();
if (dimension.length > 1) {
var count = dimension.shift();
for (var i = 0; i < count; i++) {
array[i] = parseArray(dimension, elementType);
}
dimension.unshift(count);
}
else {
for (var i = 0; i < dimension[0]; i++) {
array[i] = parseElement(elementType);
}
}
return array;
}
return parse(dims, elementType);
};
module.exports = BinaryParser;

View File

@ -1,6 +1,8 @@
var EventEmitter = require('events').EventEmitter;
var sys = require('sys');var sys = require('sys');
var Result = require(__dirname + "/result");
var TextParser = require(__dirname + "/textParser");
var BinaryParser = require(__dirname + "/binaryParser");
var Query = function(config) {
this.text = config.text;
@ -30,43 +32,52 @@ var noParse = function(val) {
//creates datarow metatdata from the supplied
//data row information
var buildDataRowMetadata = function(msg, converters, names) {
var parsers = {
text: new TextParser(),
binary: new BinaryParser()
};
var len = msg.fields.length;
for(var i = 0; i < len; i++) {
var field = msg.fields[i];
var dataTypeId = field.dataTypeID;
var format = field.format;
names[i] = field.name;
switch(dataTypeId) {
case 20:
converters[i] = parseBinaryInt64;
converters[i] = parsers[format].parseInt64;
break;
case 21:
converters[i] = parseBinaryInt16;
converters[i] = parsers[format].parseInt16;
break;
case 23:
converters[i] = parseBinaryInt32;
converters[i] = parsers[format].parseInt32;
break;
case 26:
converters[i] = parseBinaryInt64;
converters[i] = parsers[format].parseInt64;
break;
case 1700:
case 700:
converters[i] = parseBinaryFloat32;
converters[i] = parsers[format].parseFloat32;
break;
case 701:
converters[i] = parseBinaryFloat64;
converters[i] = parsers[format].parseFloat64;
break;
case 1700:
converters[i] = parsers[format].parseNumeric;
break;
case 16:
converters[i] = function(val) {
return val == 1;
};
converters[i] = parsers[format].parseBool;
break;
case 1114:
case 1184:
converters[i] = parseDate;
converters[i] = parsers[format].parseDate;
break;
case 1008:
case 1009:
converters[i] = parsers[format].parseStringArray;
break;
case 1007:
case 1008:
converters[i] = arrayParser;
converters[i] = parsers[format].parseIntArray;
break;
default:
converters[i] = dataTypeParsers[dataTypeId] || noParse;
@ -96,6 +107,7 @@ p.submit = function(connection) {
for(var i = 0; i < msg.fields.length; i++) {
var rawValue = msg.fields[i];
row[names[i]] = rawValue === null ? null : converters[i](rawValue);
console.log(names[i] + ': ' + JSON.stringify(row[names[i]]));
}
self.emit('row', row);
@ -202,251 +214,6 @@ p.prepare = function(connection) {
connection.on('error', onCommandComplete);
};
var dateParser = function(isoDate) {
//TODO this could do w/ a refactor
var dateMatcher = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\.\d{1,})?/;
var match = dateMatcher.exec(isoDate);
var year = match[1];
var month = parseInt(match[2],10)-1;
var day = match[3];
var hour = parseInt(match[4],10);
var min = parseInt(match[5],10);
var seconds = parseInt(match[6], 10);
var miliString = match[7];
var mili = 0;
if(miliString) {
mili = 1000 * parseFloat(miliString);
}
var tZone = /([Z|+\-])(\d{2})?(\d{2})?/.exec(isoDate.split(' ')[1]);
//minutes to adjust for timezone
var tzAdjust = 0;
if(tZone) {
var type = tZone[1];
switch(type) {
case 'Z': break;
case '-':
tzAdjust = -(((parseInt(tZone[2],10)*60)+(parseInt(tZone[3]||0,10))));
break;
case '+':
tzAdjust = (((parseInt(tZone[2],10)*60)+(parseInt(tZone[3]||0,10))));
break;
default:
throw new Error("Unidentifed tZone part " + type);
}
}
var utcOffset = Date.UTC(year, month, day, hour, min, seconds, mili);
var date = new Date(utcOffset - (tzAdjust * 60* 1000));
return date;
};
function shl(a,b) {
// Copyright (c) 1996 Henri Torgemane. All Rights Reserved.
// fix for crappy <<
for (var i=0;i<b;i++) {
a=a%0x80000000;
if (a&0x40000000==0x40000000)
{
a-=0x40000000;
a*=2;
a+=0x80000000;
} else
a*=2;
};
return a;
}
var parseFloat = function(data, precisionBits, exponentBits) {
var bias = Math.pow(2, exponentBits - 1) - 1;
var sign = parseBits(data, 1);
var exponent = parseBits(data, exponentBits, 1);
if (exponent == 0)
return 0;
// parse mantissa
var precisionBitsCounter = 1;
var parsePrecisionBits = function(lastValue, newValue, bits) {
if (lastValue == 0) {
lastValue = 1;
}
for (var i = 1; i <= bits; i++) {
precisionBitsCounter /= 2;
if ((newValue & (0x1 << (bits - i))) > 0) {
lastValue += precisionBitsCounter;
}
}
return lastValue;
};
var mantissa = parseBits(data, precisionBits, exponentBits + 1, parsePrecisionBits);
// special cases
if (exponent == (Math.pow(2, exponentBits + 1) - 1)) {
if (mantissa == 0) {
return (sign == 0) ? Infinity : -Infinity;
}
return NaN;
}
// normale number
return ((sign == 0) ? 1 : -1) * Math.pow(2, exponent - bias) * mantissa;
};
var parseBits = function(data, bits, offset, callback) {
offset = offset || 0;
callback = callback || function(lastValue, newValue, bits) { return (lastValue * Math.pow(2, bits)) + newValue; };
var offsetBytes = offset >> 3;
// read first (maybe partial) byte
var mask = 0xff;
var firstBits = 8 - (offset % 8);
if (bits < firstBits) {
mask = (0xff << (8 - bits)) & 0xff;
firstBits = bits;
}
if (offset) {
mask = mask >> (offset % 8);
}
var result = callback(0, data[offsetBytes] & mask, firstBits);
// read bytes
var bytes = (bits + offset) >> 3;
for (var i = offsetBytes + 1; i < bytes; i++) {
result = callback(result, data[i], 8);
}
// bits to read, that are not a complete byte
var lastBits = (bits + offset) % 8;
if (lastBits > 0) {
result = callback(result, data[bytes] >> (8 - lastBits), lastBits);
}
return result;
}
var parseBinaryInt64 = function(value) {
return parseBits(value, 64);
}
var parseBinaryInt32 = function(value) {
return parseBits(value, 32);
}
var parseBinaryInt16 = function(value) {
return parseBits(value, 16);
}
var parseBinaryFloat32 = function(value) {
return parseFloat(value, 23, 8);
}
var parseBinaryFloat64 = function(value) {
return parseFloat(value, 52, 11);
}
var parseDate = function(value) {
var sign = parseBits(value, 1);
var rawValue = parseBits(value, 63, 1);
// discard usecs and shift from 2000 to 1970
var result = new Date((((sign == 0) ? 1 : -1) * rawValue / 1000) + 946684800000);
// add microseconds to the date
result.usec = rawValue % 1000;
result.getMicroSeconds = function() {
return this.usec;
};
result.setMicroSeconds = function(value) {
this.usec = value;
};
result.getUTCMicroSeconds = function() {
return this.usec;
};
return result;
}
var arrayParser = function(value) {
var dim = parseBits(value, 32);
var flags = parseBits(value, 32, 32);
var elementType = parseBits(value, 32, 64);
var offset = 96;
var dims = new Array();
for (var i = 0; i < dim; i++) {
// parse dimension
dims[i] = parseBits(value, 32, offset);
offset += 32;
// ignore lower bounds
offset += 32;
};
var parseElement = function(elementType) {
// parse content length
var length = parseBits(value, 32, offset);
offset += 32;
// parse null values
if (length == 0xffffffff) {
return null;
}
if (elementType == 0x17) {
// int
var result = parseBits(value, length * 8, offset);
offset += length * 8;
return result;
}
else if (elementType == 0x19) {
// string
var result = value.toString('utf8', offset >> 3, (offset += (length << 3)) >> 3);
return result;
}
else {
console.log("ERROR: ElementType not implemented: " + elementType);
}
};
var parseArray = function(dimension, elementType) {
var array = new Array();
if (dimension.length > 1) {
var count = dimension.shift();
for (var i = 0; i < count; i++) {
array[i] = parseArray(dimension, elementType);
}
dimension.unshift(count);
}
else {
for (var i = 0; i < dimension[0]; i++) {
array[i] = parseElement(elementType);
}
}
return array;
}
return parseArray(dims, elementType);
};
// To help we test dateParser
Query.dateParser = dateParser;
var dataTypeParsers = {
};

83
lib/textParser.js Normal file
View File

@ -0,0 +1,83 @@
var TextParser = function(config) {
config = config || {};
};
var p = TextParser.prototype;
p.parseBool = function(value) {
return (value === 't');
}
p.parseInt64 = p.parseInt32 = p.parseInt16 = function(value) {
return parseInt(value);
}
p.parseNumeric = p.parseFloat64 = p.parseFloat32 = function(value) {
return parseFloat(value);
}
p.parseDate = function(value) {
//TODO this could do w/ a refactor
var dateMatcher = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\.\d{1,})?/;
var match = dateMatcher.exec(value);
var year = match[1];
var month = parseInt(match[2],10)-1;
var day = match[3];
var hour = parseInt(match[4],10);
var min = parseInt(match[5],10);
var seconds = parseInt(match[6], 10);
var miliString = match[7];
var mili = 0;
if(miliString) {
mili = 1000 * this.parseFloat(miliString);
}
var tZone = /([Z|+\-])(\d{2})?(\d{2})?/.exec(isoDate.split(' ')[1]);
//minutes to adjust for timezone
var tzAdjust = 0;
if(tZone) {
var type = tZone[1];
switch(type) {
case 'Z': break;
case '-':
tzAdjust = -(((parseInt(tZone[2],10)*60)+(parseInt(tZone[3]||0,10))));
break;
case '+':
tzAdjust = (((parseInt(tZone[2],10)*60)+(parseInt(tZone[3]||0,10))));
break;
default:
throw new Error("Unidentifed tZone part " + type);
}
}
var utcOffset = Date.UTC(year, month, day, hour, min, seconds, mili);
var date = new Date(utcOffset - (tzAdjust * 60* 1000));
return date;
}
p.parseIntArray = function(value) {
return JSON.parse(val.replace("{","[").replace("}","]"));
};
p.parseStringArray = function(value) {
if (!val) return null;
if (val[0] !== '{' || val[val.length-1] !== '}')
throw "Not postgresql array! (" + arrStr + ")";
var x = val.substring(1, val.length - 1);
x = x.match(/(NULL|[^,]+|"((?:.|\n|\r)*?)(?!\\)"|\{((?:.|\n|\r)*?(?!\\)\}) (,|$))/mg);
if (x === null) throw "Not postgre array";
return x.map(function (el) {
if (el === 'NULL') return null;
if (el[0] === '{') return arguments.callee(el);
if (el[0] === '\"') return el.substring(1, el.length - 1).replace('\\\"', '\"');
return el;
});
};
module.exports = TextParser;