Support sigbits encoding for 64bit integers

Fixes #61
This commit is contained in:
Sandro Santilli 2015-04-02 16:26:30 +02:00
parent d0922d28b8
commit 11012357fd
3 changed files with 204 additions and 3 deletions

2
NEWS
View File

@ -5,6 +5,8 @@
- PC_PCId(pcpoint|pcpatch)
- PC_Get(pcpoint) returns float8[]
- PC_Summary(pcpatch)
- Enhancements
- Support sigbits encoding for 64bit integers (#61)
1.0.0
-----

View File

@ -166,12 +166,14 @@ test_sigbits_encoding()
uint8_t *bytes, *ebytes;
uint16_t *bytes16, *ebytes16;
uint32_t *bytes32, *ebytes32;
uint64_t *bytes64, *ebytes64;
size_t ebytes_size;
uint32_t count, nelems;
uint8_t common8;
uint16_t common16;
uint32_t common32;
uint64_t common64;
PCBYTES pcb, epcb, pcb2;
/*
@ -294,7 +296,7 @@ test_sigbits_encoding()
pcb = initbytes(bytes, nelems*4, PC_INT32);
common32 = pc_bytes_sigbits_count_32(&pcb, &count);
CU_ASSERT_EQUAL(count, 26); /* unique bit count */
CU_ASSERT_EQUAL(count, 26); /* common bits count */
CU_ASSERT_EQUAL(common32, 103232);
epcb = pc_bytes_sigbits_encode(pcb);
@ -331,6 +333,41 @@ test_sigbits_encoding()
pc_bytes_free(epcb);
pc_bytes_free(pcb2);
/* Test the 64 bit implementation path */
nelems = 6;
bytes64 = (uint64_t[]){
103241, /* 32x0 0000000000000001 1001 0011 0100 1001 */
103251, /* 32x0 0000000000000001 1001 0011 0101 0011 */
103261, /* 32x0 0000000000000001 1001 0011 0101 1101 */
103271, /* 32x0 0000000000000001 1001 0011 0110 0111 */
103281, /* 32x0 0000000000000001 1001 0011 0111 0001 */
103291 /* 32x0 0000000000000001 1001 0011 0111 1011 */
};
bytes = (uint8_t*)bytes64;
pcb = initbytes(bytes, nelems*8, PC_INT64);
common64 = pc_bytes_sigbits_count_64(&pcb, &count);
CU_ASSERT_EQUAL(count, 58); /* common bits count */
CU_ASSERT_EQUAL(common64, 103232);
epcb = pc_bytes_sigbits_encode(pcb);
ebytes64 = (uint64_t*)(epcb.bytes);
CU_ASSERT_EQUAL(ebytes64[0], 6); /* unique bit count */
CU_ASSERT_EQUAL(ebytes64[1], 103232); /* common bits */
CU_ASSERT_EQUAL(ebytes64[2], 2681726210471362560); /* packed uint64 */
pcb2 = pc_bytes_sigbits_decode(epcb);
pc_bytes_free(epcb);
bytes64 = (uint64_t*)(pcb2.bytes);
CU_ASSERT_EQUAL(bytes64[0], 103241);
CU_ASSERT_EQUAL(bytes64[1], 103251);
CU_ASSERT_EQUAL(bytes64[2], 103261);
CU_ASSERT_EQUAL(bytes64[3], 103271);
CU_ASSERT_EQUAL(bytes64[4], 103281);
CU_ASSERT_EQUAL(bytes64[5], 103291);
pc_bytes_free(pcb2);
}
/*

View File

@ -574,7 +574,7 @@ pc_bytes_sigbits_encode_16(const PCBYTES pcb, uint16_t commonvalue, uint8_t comm
/* How wide are our unique values? */
int nbits = bitwidth - commonbits;
/* Size of output buffer (#bits/8+1remainder+4metadata) */
size_t size_out_raw = (nbits * pcb.npoints / 8) + 5;
size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 4;
/* Make sure buffer is size to hold all our words */
size_t size_out = size_out_raw + (size_out_raw % 2);
uint8_t *bytes_out = pcalloc(size_out);
@ -668,7 +668,7 @@ pc_bytes_sigbits_encode_32(const PCBYTES pcb, uint32_t commonvalue, uint8_t comm
/* How wide are our unique values? */
int nbits = bitwidth - commonbits;
/* Size of output buffer (#bits/8+1remainder+8metadata) */
size_t size_out_raw = (nbits * pcb.npoints / 8) + 9;
size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 8;
size_t size_out = size_out_raw + (4 - (size_out_raw % 4));
uint8_t *bytes_out = pcalloc(size_out);
/* Use this to zero out the parts that are common */
@ -742,6 +742,99 @@ pc_bytes_sigbits_encode_32(const PCBYTES pcb, uint32_t commonvalue, uint8_t comm
return pcbout;
}
/**
* Encoded array:
* <uint64> number of bits per unique section
* <uint64> common bits for the array
* [n_bits]... unique bits packed in
* Size of encoded array comes out in ebytes_size.
*/
PCBYTES
pc_bytes_sigbits_encode_64(const PCBYTES pcb, uint64_t commonvalue, uint8_t commonbits)
{
int i;
int shift;
uint64_t *bytes = (uint64_t*)(pcb.bytes);
/* How wide are our words? */
static int bitwidth = 64;
/* How wide are our unique values? */
int nbits = bitwidth - commonbits;
/* Size of output buffer (#bits/8+1remainder+16metadata) */
size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 16;
size_t size_out = size_out_raw + (8 - (size_out_raw % 8));
uint8_t *bytes_out = pcalloc(size_out);
/* Use this to zero out the parts that are common */
uint64_t mask = (0xFFFFFFFFFFFFFFFF >> commonbits);
/* Write head */
uint64_t *byte_ptr = (uint64_t*)bytes_out;
/* What bit are we writing to now? */
int bit = bitwidth;
/* Write to... */
PCBYTES pcbout = pcb;
/* Number of unique bits goes up front */
*byte_ptr = nbits;
byte_ptr++;
/* The common value we'll add the unique values to */
*byte_ptr = commonvalue;
byte_ptr++;
/* All the values are the same... */
if ( bitwidth == commonbits )
{
pcbout.size = size_out;
pcbout.bytes = bytes_out;
pcbout.compression = PC_DIM_SIGBITS;
return pcbout;
}
for ( i = 0; i < pcb.npoints; i++ )
{
uint64_t val = bytes[i];
/* Clear off common parts */
val &= mask;
/* How far to move unique parts to get to write head? */
shift = bit - nbits;
/* If positive, we can fit this part into the current word */
if ( shift >= 0 )
{
val <<= shift;
*byte_ptr |= val;
bit -= nbits;
if ( bit <= 0 )
{
bit = bitwidth;
byte_ptr++;
}
}
/* If negative, then we need to split this part across words */
else
{
/* First the bit into the current word */
uint32_t v = val;
int s = abs(shift);
v >>= s;
*byte_ptr |= v;
/* The reset to write the next word */
bit = bitwidth;
byte_ptr++;
v = val;
shift = bit - s;
/* But only those parts we didn't already write */
v <<= shift;
*byte_ptr |= v;
bit -= s;
}
}
pcbout.size = size_out;
pcbout.bytes = bytes_out;
pcbout.compression = PC_DIM_SIGBITS;
pcbout.readonly = PC_FALSE;
return pcbout;
}
/**
* Convert a raw byte array into with common bits stripped and the
* remaining bits packed in.
@ -772,6 +865,11 @@ pc_bytes_sigbits_encode(const PCBYTES pcb)
uint32_t commonvalue = pc_bytes_sigbits_count_32(&pcb, &nbits);
return pc_bytes_sigbits_encode_32(pcb, commonvalue, nbits);
}
case 8:
{
uint64_t commonvalue = pc_bytes_sigbits_count_64(&pcb, &nbits);
return pc_bytes_sigbits_encode_64(pcb, commonvalue, nbits);
}
default:
{
pcerror("%s: bits_encode cannot handle interpretation %d", __func__, pcb.interpretation);
@ -997,6 +1095,66 @@ pc_bytes_sigbits_decode_32(const PCBYTES pcb)
return pcbout;
}
PCBYTES
pc_bytes_sigbits_decode_64(const PCBYTES pcb)
{
int i;
const uint64_t *bytes_ptr = (const uint64_t *)(pcb.bytes);
uint64_t nbits;
uint64_t commonvalue;
uint64_t mask;
int bit = 64;
size_t outbytes_size = sizeof(uint64_t) * pcb.npoints;
uint8_t *outbytes = pcalloc(outbytes_size);
uint64_t *obytes = (uint64_t*)outbytes;
PCBYTES pcbout = pcb;
/* How many unique bits? */
nbits = *bytes_ptr;
bytes_ptr++;
/* What is the shared bit value? */
commonvalue = *bytes_ptr;
bytes_ptr++;
/* Calculate mask */
mask = (0xFFFFFFFFFFFFFFFF >> (bit-nbits));
for ( i = 0; i < pcb.npoints; i++ )
{
int shift = bit - nbits;
uint64_t val = *bytes_ptr;
if ( shift >= 0 )
{
val >>= shift;
val &= mask;
val |= commonvalue;
obytes[i] = val;
bit -= nbits;
}
else
{
int s = abs(shift);
val <<= s;
val &= mask;
val |= commonvalue;
obytes[i] = val;
bytes_ptr++;
bit = 64;
val = *bytes_ptr;
shift = bit - s;
val >>= shift;
val &= mask;
bit -= s;
obytes[i] |= val;
}
}
pcbout.size = outbytes_size;
pcbout.compression = PC_DIM_SIGBITS;
pcbout.bytes = outbytes;
pcbout.readonly = PC_FALSE;
return pcbout;
}
PCBYTES
pc_bytes_sigbits_decode(const PCBYTES pcb)
@ -1016,6 +1174,10 @@ pc_bytes_sigbits_decode(const PCBYTES pcb)
{
return pc_bytes_sigbits_decode_32(pcb);
}
case 8:
{
return pc_bytes_sigbits_decode_64(pcb);
}
default:
{
pcerror("%s: cannot handle interpretation %d", __func__, pcb.interpretation);