diff --git a/NEWS b/NEWS index 26b040b..39773e7 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ - PC_PCId(pcpoint|pcpatch) - PC_Get(pcpoint) returns float8[] - PC_Summary(pcpatch) +- Enhancements + - Support sigbits encoding for 64bit integers (#61) 1.0.0 ----- diff --git a/lib/cunit/cu_pc_bytes.c b/lib/cunit/cu_pc_bytes.c index 4644779..bd80b6a 100644 --- a/lib/cunit/cu_pc_bytes.c +++ b/lib/cunit/cu_pc_bytes.c @@ -166,12 +166,14 @@ test_sigbits_encoding() uint8_t *bytes, *ebytes; uint16_t *bytes16, *ebytes16; uint32_t *bytes32, *ebytes32; + uint64_t *bytes64, *ebytes64; size_t ebytes_size; uint32_t count, nelems; uint8_t common8; uint16_t common16; uint32_t common32; + uint64_t common64; PCBYTES pcb, epcb, pcb2; /* @@ -294,7 +296,7 @@ test_sigbits_encoding() pcb = initbytes(bytes, nelems*4, PC_INT32); common32 = pc_bytes_sigbits_count_32(&pcb, &count); - CU_ASSERT_EQUAL(count, 26); /* unique bit count */ + CU_ASSERT_EQUAL(count, 26); /* common bits count */ CU_ASSERT_EQUAL(common32, 103232); epcb = pc_bytes_sigbits_encode(pcb); @@ -331,6 +333,41 @@ test_sigbits_encoding() pc_bytes_free(epcb); pc_bytes_free(pcb2); + /* Test the 64 bit implementation path */ + + nelems = 6; + + bytes64 = (uint64_t[]){ + 103241, /* 32x0 0000000000000001 1001 0011 0100 1001 */ + 103251, /* 32x0 0000000000000001 1001 0011 0101 0011 */ + 103261, /* 32x0 0000000000000001 1001 0011 0101 1101 */ + 103271, /* 32x0 0000000000000001 1001 0011 0110 0111 */ + 103281, /* 32x0 0000000000000001 1001 0011 0111 0001 */ + 103291 /* 32x0 0000000000000001 1001 0011 0111 1011 */ + }; + bytes = (uint8_t*)bytes64; + pcb = initbytes(bytes, nelems*8, PC_INT64); + + common64 = pc_bytes_sigbits_count_64(&pcb, &count); + CU_ASSERT_EQUAL(count, 58); /* common bits count */ + CU_ASSERT_EQUAL(common64, 103232); + + epcb = pc_bytes_sigbits_encode(pcb); + ebytes64 = (uint64_t*)(epcb.bytes); + CU_ASSERT_EQUAL(ebytes64[0], 6); /* unique bit count */ + CU_ASSERT_EQUAL(ebytes64[1], 103232); /* common bits */ + CU_ASSERT_EQUAL(ebytes64[2], 2681726210471362560); /* packed uint64 */ + + pcb2 = pc_bytes_sigbits_decode(epcb); + pc_bytes_free(epcb); + bytes64 = (uint64_t*)(pcb2.bytes); + CU_ASSERT_EQUAL(bytes64[0], 103241); + CU_ASSERT_EQUAL(bytes64[1], 103251); + CU_ASSERT_EQUAL(bytes64[2], 103261); + CU_ASSERT_EQUAL(bytes64[3], 103271); + CU_ASSERT_EQUAL(bytes64[4], 103281); + CU_ASSERT_EQUAL(bytes64[5], 103291); + pc_bytes_free(pcb2); } /* diff --git a/lib/pc_bytes.c b/lib/pc_bytes.c index f201fd9..df3820e 100644 --- a/lib/pc_bytes.c +++ b/lib/pc_bytes.c @@ -574,7 +574,7 @@ pc_bytes_sigbits_encode_16(const PCBYTES pcb, uint16_t commonvalue, uint8_t comm /* How wide are our unique values? */ int nbits = bitwidth - commonbits; /* Size of output buffer (#bits/8+1remainder+4metadata) */ - size_t size_out_raw = (nbits * pcb.npoints / 8) + 5; + size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 4; /* Make sure buffer is size to hold all our words */ size_t size_out = size_out_raw + (size_out_raw % 2); uint8_t *bytes_out = pcalloc(size_out); @@ -668,7 +668,7 @@ pc_bytes_sigbits_encode_32(const PCBYTES pcb, uint32_t commonvalue, uint8_t comm /* How wide are our unique values? */ int nbits = bitwidth - commonbits; /* Size of output buffer (#bits/8+1remainder+8metadata) */ - size_t size_out_raw = (nbits * pcb.npoints / 8) + 9; + size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 8; size_t size_out = size_out_raw + (4 - (size_out_raw % 4)); uint8_t *bytes_out = pcalloc(size_out); /* Use this to zero out the parts that are common */ @@ -742,6 +742,99 @@ pc_bytes_sigbits_encode_32(const PCBYTES pcb, uint32_t commonvalue, uint8_t comm return pcbout; } +/** +* Encoded array: +* number of bits per unique section +* common bits for the array +* [n_bits]... unique bits packed in +* Size of encoded array comes out in ebytes_size. +*/ +PCBYTES +pc_bytes_sigbits_encode_64(const PCBYTES pcb, uint64_t commonvalue, uint8_t commonbits) +{ + int i; + int shift; + uint64_t *bytes = (uint64_t*)(pcb.bytes); + + /* How wide are our words? */ + static int bitwidth = 64; + /* How wide are our unique values? */ + int nbits = bitwidth - commonbits; + /* Size of output buffer (#bits/8+1remainder+16metadata) */ + size_t size_out_raw = (nbits * pcb.npoints / 8) + 1 + 16; + size_t size_out = size_out_raw + (8 - (size_out_raw % 8)); + uint8_t *bytes_out = pcalloc(size_out); + /* Use this to zero out the parts that are common */ + uint64_t mask = (0xFFFFFFFFFFFFFFFF >> commonbits); + /* Write head */ + uint64_t *byte_ptr = (uint64_t*)bytes_out; + /* What bit are we writing to now? */ + int bit = bitwidth; + /* Write to... */ + PCBYTES pcbout = pcb; + + /* Number of unique bits goes up front */ + *byte_ptr = nbits; + byte_ptr++; + /* The common value we'll add the unique values to */ + *byte_ptr = commonvalue; + byte_ptr++; + + /* All the values are the same... */ + if ( bitwidth == commonbits ) + { + pcbout.size = size_out; + pcbout.bytes = bytes_out; + pcbout.compression = PC_DIM_SIGBITS; + return pcbout; + } + + for ( i = 0; i < pcb.npoints; i++ ) + { + uint64_t val = bytes[i]; + /* Clear off common parts */ + val &= mask; + /* How far to move unique parts to get to write head? */ + shift = bit - nbits; + /* If positive, we can fit this part into the current word */ + if ( shift >= 0 ) + { + val <<= shift; + *byte_ptr |= val; + bit -= nbits; + if ( bit <= 0 ) + { + bit = bitwidth; + byte_ptr++; + } + } + /* If negative, then we need to split this part across words */ + else + { + /* First the bit into the current word */ + uint32_t v = val; + int s = abs(shift); + v >>= s; + *byte_ptr |= v; + /* The reset to write the next word */ + bit = bitwidth; + byte_ptr++; + v = val; + shift = bit - s; + /* But only those parts we didn't already write */ + v <<= shift; + *byte_ptr |= v; + bit -= s; + } + } + + pcbout.size = size_out; + pcbout.bytes = bytes_out; + pcbout.compression = PC_DIM_SIGBITS; + pcbout.readonly = PC_FALSE; + return pcbout; +} + /** * Convert a raw byte array into with common bits stripped and the * remaining bits packed in. @@ -772,6 +865,11 @@ pc_bytes_sigbits_encode(const PCBYTES pcb) uint32_t commonvalue = pc_bytes_sigbits_count_32(&pcb, &nbits); return pc_bytes_sigbits_encode_32(pcb, commonvalue, nbits); } + case 8: + { + uint64_t commonvalue = pc_bytes_sigbits_count_64(&pcb, &nbits); + return pc_bytes_sigbits_encode_64(pcb, commonvalue, nbits); + } default: { pcerror("%s: bits_encode cannot handle interpretation %d", __func__, pcb.interpretation); @@ -997,6 +1095,66 @@ pc_bytes_sigbits_decode_32(const PCBYTES pcb) return pcbout; } +PCBYTES +pc_bytes_sigbits_decode_64(const PCBYTES pcb) +{ + int i; + const uint64_t *bytes_ptr = (const uint64_t *)(pcb.bytes); + uint64_t nbits; + uint64_t commonvalue; + uint64_t mask; + int bit = 64; + size_t outbytes_size = sizeof(uint64_t) * pcb.npoints; + uint8_t *outbytes = pcalloc(outbytes_size); + uint64_t *obytes = (uint64_t*)outbytes; + PCBYTES pcbout = pcb; + + /* How many unique bits? */ + nbits = *bytes_ptr; + bytes_ptr++; + /* What is the shared bit value? */ + commonvalue = *bytes_ptr; + bytes_ptr++; + /* Calculate mask */ + mask = (0xFFFFFFFFFFFFFFFF >> (bit-nbits)); + + for ( i = 0; i < pcb.npoints; i++ ) + { + int shift = bit - nbits; + uint64_t val = *bytes_ptr; + if ( shift >= 0 ) + { + val >>= shift; + val &= mask; + val |= commonvalue; + obytes[i] = val; + bit -= nbits; + } + else + { + int s = abs(shift); + val <<= s; + val &= mask; + val |= commonvalue; + obytes[i] = val; + bytes_ptr++; + bit = 64; + val = *bytes_ptr; + shift = bit - s; + val >>= shift; + val &= mask; + bit -= s; + obytes[i] |= val; + } + } + + pcbout.size = outbytes_size; + pcbout.compression = PC_DIM_SIGBITS; + pcbout.bytes = outbytes; + pcbout.readonly = PC_FALSE; + return pcbout; +} + PCBYTES pc_bytes_sigbits_decode(const PCBYTES pcb) @@ -1016,6 +1174,10 @@ pc_bytes_sigbits_decode(const PCBYTES pcb) { return pc_bytes_sigbits_decode_32(pcb); } + case 8: + { + return pc_bytes_sigbits_decode_64(pcb); + } default: { pcerror("%s: cannot handle interpretation %d", __func__, pcb.interpretation);