Rough start of GHT compression support

This commit is contained in:
Paul Ramsey 2013-04-30 16:51:23 -07:00
parent f9a3981fcf
commit 309dddf4c8
11 changed files with 528 additions and 15 deletions

View File

@ -260,15 +260,18 @@ fi
if test $FOUND_GHT_H -a $FOUND_GHT_LIB; then
AC_DEFINE([HAVE_LIBGHT])
AC_SUBST([GHT_LDFLAGS])
AC_SUBST([GHT_CPPFLAGS])
GHT_STATUS="found"
GHT_STATUS="enabled"
if test $GHTDIR; then
GHT_STATUS="$GHTDIR"
fi
else
GHT_STATUS="not found"
fi
GHT_LDFLAGS=""
GHT_CPPFLAGS=""
GHT_STATUS="disabled"
fi
AC_SUBST([GHT_LDFLAGS])
AC_SUBST([GHT_CPPFLAGS])
dnl ===========================================================================
dnl Output the relevant files

View File

@ -1,8 +1,8 @@
include ../config.mk
CPPFLAGS = $(XML2_CPPFLAGS) $(ZLIB_CPPFLAGS)
LDFLAGS = $(XML2_LDFLAGS) $(ZLIB_LDFLAGS)
CPPFLAGS = $(XML2_CPPFLAGS) $(ZLIB_CPPFLAGS) $(GHT_CPPFLAGS)
LDFLAGS = $(XML2_LDFLAGS) $(ZLIB_LDFLAGS) $(GHT_LDFLASGS)
OBJS = \
pc_bytes.o \
@ -11,6 +11,7 @@ OBJS = \
pc_patch.o \
pc_patch_dimensional.o \
pc_patch_uncompressed.o \
pc_patch_ght.o \
pc_point.o \
pc_pointlist.o \
pc_schema.o \

View File

@ -1,8 +1,8 @@
include ../../config.mk
CPPFLAGS = $(XML2_CPPFLAGS) $(CUNIT_CPPFLAGS) $(ZLIB_CPPFLAGS) -I..
LDFLAGS = $(XML2_LDFLAGS) $(CUNIT_LDFLAGS) $(ZLIB_LDFLAGS)
CPPFLAGS = $(XML2_CPPFLAGS) $(CUNIT_CPPFLAGS) $(ZLIB_CPPFLAGS) $(GHT_CPPFLAGS) -I..
LDFLAGS = $(XML2_LDFLAGS) $(CUNIT_LDFLAGS) $(ZLIB_LDFLAGS) $(GHT_LDFLAGS)
EXE = cu_tester

View File

@ -18,8 +18,13 @@
#include <string.h>
#include <stdint.h>
#include "pc_config.h"
#include "hashtable.h"
#ifdef HAVE_LIBGHT
#include "ght.h"
#endif
/**********************************************************************
* DATA STRUCTURES
*/
@ -179,7 +184,9 @@ typedef struct
const PCSCHEMA *schema;
uint32_t npoints; /* How many points we have */
double xmin, xmax, ymin, ymax;
uint8_t *data;
#ifdef HAVE_LIBGHT
GhtTree *ght;
#endif
} PCPATCH_GHT;
@ -214,6 +221,10 @@ void pc_set_handlers(pc_allocator allocator, pc_reallocator reallocator,
pc_deallocator deallocator, pc_message_handler error_handler,
pc_message_handler info_handler, pc_message_handler warning_handler);
void pc_set_ght_handlers(pc_allocator allocator, pc_reallocator reallocator,
pc_deallocator deallocator, pc_message_handler error_handler,
pc_message_handler info_handler, pc_message_handler warning_handler);
/** Set program to use system memory allocators and messaging */
void pc_install_default_handlers(void);
@ -251,6 +262,10 @@ PCDIMENSION* pc_schema_get_dimension_by_name(const PCSCHEMA *s, const char *name
uint32_t pc_schema_is_valid(const PCSCHEMA *s);
/** Create a full copy of the schema and dimensions it contains */
PCSCHEMA* pc_schema_clone(const PCSCHEMA *s);
/** Add/overwrite a dimension in a schema */
void pc_schema_set_dimension(PCSCHEMA *s, PCDIMENSION *d);
/** Check/set the x/y position in the dimension list */
void pc_schema_check_xy(PCSCHEMA *s);
/**********************************************************************
@ -292,6 +307,9 @@ int pc_point_get_double_by_name(const PCPOINT *pt, const char *name, double *d);
/** Casts dimension value to double and scale/offset appropriately before returning */
int pc_point_get_double_by_index(const PCPOINT *pt, uint32_t idx, double *d);
/** Reads a double right off the data area */
int pc_point_get_double(const PCPOINT *pt, const PCDIMENSION *dim, double *d);
/** Returns X coordinate */
double pc_point_get_x(const PCPOINT *pt);

View File

@ -164,6 +164,10 @@ PCPATCH_UNCOMPRESSED* pc_patch_uncompressed_from_pointlist(const PCPOINTLIST *pl
PCPATCH_UNCOMPRESSED* pc_patch_uncompressed_from_dimensional(const PCPATCH_DIMENSIONAL *pdl);
int pc_patch_uncompressed_add_point(PCPATCH_UNCOMPRESSED *c, const PCPOINT *p);
/* GHT PATCHES */
PCPATCH_GHT* pc_patch_ght_from_uncompressed(const PCPATCH_UNCOMPRESSED *pa);
PCPATCH_GHT* pc_patch_ght_from_pointlist(const PCPOINTLIST *pdl);
/****************************************************************************

View File

@ -104,6 +104,18 @@ void pc_set_handlers(pc_allocator allocator, pc_reallocator reallocator,
pc_context.info = info_handler;
}
void pc_set_ght_handlers(pc_allocator allocator, pc_reallocator reallocator,
pc_deallocator deallocator, pc_message_handler error_handler,
pc_message_handler info_handler, pc_message_handler warn_handler)
{
#ifdef HAVE_LIBGHT
ght_set_handlers((GhtAllocator)allocator, (GhtReallocator)reallocator,
(GhtDeallocator)deallocator, (GhtMessageHandler)error_handler,
(GhtMessageHandler)info_handler, (GhtMessageHandler)warn_handler);
#endif
return;
}
void *
pcalloc(size_t size)

450
lib/pc_patch_ght.c Normal file
View File

@ -0,0 +1,450 @@
/***********************************************************************
* pc_patch_dght.c
*
* GHT compressed patch handling. Create, get and set values from the
* geohashtree (ght) ordered PCPATCH structure.
*
* PgSQL Pointcloud is free and open source software provided
* by the Government of Canada
* Copyright (c) 2013 Natural Resources Canada
*
***********************************************************************/
#include <math.h>
#include <assert.h>
#include "pc_api_internal.h"
/* Includes and functions that expect GHT headers and definitions */
#ifdef HAVE_LIBGHT
static int
pc_type_from_ght_type(const GhtType ghttype)
{
switch(ghttype)
{
case GHT_UNKNOWN:
return PC_UNKNOWN;
case GHT_INT8:
return PC_INT8;
case GHT_UINT8:
return PC_UINT8;
case GHT_INT16:
return PC_INT16;
case GHT_UINT16:
return PC_UINT16;
case GHT_INT32:
return PC_INT32;
case GHT_UINT32:
return PC_UINT32;
case GHT_INT64:
return PC_INT64;
case GHT_UINT64:
return PC_UINT64;
case GHT_DOUBLE:
return PC_DOUBLE;
case GHT_FLOAT:
return PC_FLOAT;
}
}
static GhtType
ght_type_from_pc_type(const int pctype)
{
switch(pctype)
{
case PC_UNKNOWN:
return GHT_UNKNOWN;
case PC_INT8:
return GHT_INT8;
case PC_UINT8:
return GHT_UINT8;
case PC_INT16:
return GHT_INT16;
case PC_UINT16:
return GHT_UINT16;
case PC_INT32:
return GHT_INT32;
case PC_UINT32:
return GHT_UINT32;
case PC_INT64:
return GHT_INT64;
case PC_UINT64:
return GHT_UINT64;
case PC_DOUBLE:
return GHT_DOUBLE;
case PC_FLOAT:
return GHT_FLOAT;
}
}
static GhtDimension *
ght_dimension_from_pc_dimension(const PCDIMENSION *pcdim)
{
int i;
GhtDimension *dim;
ght_dimension_new(&dim);
if ( pcdim->name )
{
dim->name = pcstrdup(pcdim->name);
}
if ( pcdim->description )
{
dim->description = pcstrdup(pcdim->description);
}
dim->scale = pcdim->scale;
dim->offset = pcdim->offset;
dim->type = ght_type_from_pc_type(pcdim->interpretation);
return dim;
}
static GhtSchema *
ght_schema_from_pc_schema(const PCSCHEMA *pcschema)
{
int i;
GhtSchema *schema;
ght_schema_new(&schema);
for ( i = 0; i < pcschema->ndims; i++ )
{
GhtDimension *dim = ght_dimension_from_pc_dimension(pcschema->dims[i]);
ght_schema_add_dimension(schema, dim);
}
return schema;
}
#endif /* HAVE_LIBGHT */
void
pc_init_ght_handlers()
{
#ifdef HAVE_LIBGHT
#else
return;
#endif
}
PCPATCH_GHT *
pc_patch_ght_from_pointlist(const PCPOINTLIST *pdl)
{
PCPATCH_UNCOMPRESSED *patch = pc_patch_uncompressed_from_pointlist(pdl);
PCPATCH_GHT *ghtpatch = pc_patch_ght_from_uncompressed(patch);
pc_patch_uncompressed_free(patch);
return ghtpatch;
}
PCPATCH_GHT *
pc_patch_ght_from_uncompressed(const PCPATCH_UNCOMPRESSED *pa)
{
#ifndef HAVE_LIBGHT
pcerror("%s: libght support is not enabled", __func__);
return NULL;
#else
int i, j;
int pointcount = 0;
GhtSchema *schema;
GhtTree *tree;
GhtCoordinate coord;
GhtNode *node;
GhtErr err;
PCPOINT pt;
PCDIMENSION *xdim, *ydim;
PCPATCH_GHT *paght = NULL;
size_t pt_size = pa->schema->size;
double x, y;
/* Cannot handle empty patches */
if ( ! pa || ! pa->npoints ) return NULL;
pt.schema = pa->schema;
pt.readonly = PC_TRUE;
xdim = pa->schema->dims[pa->schema->x_position];
ydim = pa->schema->dims[pa->schema->y_position];
schema = ght_schema_from_pc_schema(pa->schema);
if ( ght_tree_new(schema, &tree) != GHT_OK ) return NULL;
/* Build up the tree from the points. */
for ( i = 0; i < pa->npoints; i++ )
{
pt.data = pa->data + pt_size * i;
pc_point_get_double(&pt, xdim, &(coord.x));
pc_point_get_double(&pt, ydim, &(coord.y));
/* Build a node from the x/y information */
/* TODO, make resolution configurable from the schema */
if ( ght_node_new_from_coordinate(&coord, GHT_MAX_HASH_LENGTH, &node) == GHT_OK )
{
/* Add attributes to the node */
for ( j = 0; j < schema->num_dims; j++ )
{
PCDIMENSION *dim;
GhtDimension *ghtdim;
GhtAttribute *attr;
double val;
/* Don't add X or Y as attributes, they are already embodied in the hash */
if ( j == pa->schema->x_position || j == pa->schema->y_position )
continue;
dim = pc_schema_get_dimension(pa->schema, j);
pc_point_get_double(&pt, dim, &val);
ght_schema_get_dimension_by_index(schema, j, &ghtdim);
ght_attribute_new_from_double(ghtdim, val, &attr);
ght_node_add_attribute(node, attr);
}
/* Add the node to the tree */
/* TODO, make duplicate handling configurable from the schema */
if ( ght_tree_insert_node(tree, node) == GHT_OK )
{
pointcount++;
}
else
{
ght_tree_free(tree);
return NULL;
}
}
else
{
ght_tree_free(tree);
return NULL;
}
}
/* Compact the tree */
if ( ght_tree_compact_attributes(tree) == GHT_OK )
{
paght = pcalloc(sizeof(PCPATCH_GHT));
paght->type = PC_GHT;
paght->readonly = PC_FALSE;
paght->schema = pa->schema;
paght->npoints = pointcount;
paght->xmin = pa->xmin;
paght->xmax = pa->xmax;
paght->ymin = pa->ymin;
paght->ymax = pa->ymax;
paght->ght = tree;
}
else
{
ght_tree_free(tree);
}
return paght;
#endif
}
void
pc_patch_ght_free(PCPATCH_GHT *paght)
{
#ifndef HAVE_LIBGHT
pcerror("%s: libght support is not enabled", __func__);
return;
#else
int i;
assert(paght);
assert(paght->schema);
if ( paght->ght )
{
ght_tree_free(paght->ght);
}
pcfree(paght);
#endif
}
#if 0
/* Done */
PCPATCH_UNCOMPRESSED *
pc_patch_uncompressed_from_ght(const PCPATCH_GHT *pdl)
{
int i, j, npoints;
PCPATCH_UNCOMPRESSED *patch;
PCPATCH_DIMENSIONAL *pdl_uncompressed;
const PCSCHEMA *schema;
uint8_t *buf;
npoints = pdl->npoints;
schema = pdl->schema;
patch = pcalloc(sizeof(PCPATCH_UNCOMPRESSED));
patch->schema = schema;
patch->npoints = npoints;
patch->maxpoints = npoints;
patch->readonly = PC_FALSE;
patch->type = PC_NONE;
patch->xmin = pdl->xmin;
patch->xmax = pdl->xmax;
patch->ymin = pdl->ymin;
patch->ymax = pdl->ymax;
patch->datasize = schema->size * pdl->npoints;
patch->data = pcalloc(patch->datasize);
buf = patch->data;
/* Can only read from uncompressed dimensions */
pdl_uncompressed = pc_patch_dimensional_decompress(pdl);
for ( i = 0; i < npoints; i++ )
{
for ( j = 0; j < schema->ndims; j++ )
{
PCDIMENSION *dim = pc_schema_get_dimension(schema, j);
uint8_t *in = pdl_uncompressed->bytes[j].bytes + dim->size * i;
uint8_t *out = buf + dim->byteoffset;
memcpy(out, in, dim->size);
}
buf += schema->size;
}
pc_patch_dimensional_free(pdl_uncompressed);
return patch;
}
char *
pc_patch_ght_to_string(const PCPATCH_GHT *pa)
{
PCPATCH_UNCOMPRESSED *patch = pc_patch_uncompressed_from_ght(pa);
char *str = pc_patch_uncompressed_to_string(patch);
pc_patch_uncompressed_free(patch);
return str;
}
int
pc_patch_ght_compute_extent(PCPATCH_GHT *pdl)
{
int i;
double xmin, xmax, ymin, ymax;
int rv;
PCBYTES *pcb;
assert(pdl);
assert(pdl->schema);
/* Get x extremes */
pcb = &(pdl->bytes[pdl->schema->x_position]);
rv = pc_bytes_minmax(pcb, &xmin, &xmax);
xmin = pc_value_scale_offset(xmin, pdl->schema->dims[pdl->schema->x_position]);
xmax = pc_value_scale_offset(xmax, pdl->schema->dims[pdl->schema->x_position]);
pdl->xmin = xmin;
pdl->xmax = xmax;
/* Get y extremes */
pcb = &(pdl->bytes[pdl->schema->y_position]);
rv = pc_bytes_minmax(pcb, &ymin, &ymax);
ymin = pc_value_scale_offset(xmin, pdl->schema->dims[pdl->schema->y_position]);
ymax = pc_value_scale_offset(xmax, pdl->schema->dims[pdl->schema->y_position]);
pdl->ymin = ymin;
pdl->ymax = ymax;
return PC_SUCCESS;
}
uint8_t *
pc_patch_ght_to_wkb(const PCPATCH_GHT *patch, size_t *wkbsize)
{
/*
byte: endianness (1 = NDR, 0 = XDR)
uint32: pcid (key to POINTCLOUD_SCHEMAS)
uint32: compression (0 = no compression, 1 = dimensional, 2 = GHT)
uint32: npoints
dimensions[]: pcbytes (interpret relative to pcid and compressions)
*/
int ndims = patch->schema->ndims;
int i;
uint8_t *buf;
char endian = machine_endian();
/* endian + pcid + compression + npoints + datasize */
size_t size = 1 + 4 + 4 + 4 + pc_patch_ght_serialized_size(patch);
uint8_t *wkb = pcalloc(size);
uint32_t compression = patch->type;
uint32_t npoints = patch->npoints;
uint32_t pcid = patch->schema->pcid;
wkb[0] = endian; /* Write endian flag */
memcpy(wkb + 1, &pcid, 4); /* Write PCID */
memcpy(wkb + 5, &compression, 4); /* Write compression */
memcpy(wkb + 9, &npoints, 4); /* Write npoints */
buf = wkb + 13;
for ( i = 0; i < ndims; i++ )
{
size_t bsz;
PCBYTES *pcb = &(patch->bytes[i]);
// XXX printf("pcb->(size=%d, interp=%d, npoints=%d, compression=%d, readonly=%d)\n",pcb->size, pcb->interpretation, pcb->npoints, pcb->compression, pcb->readonly);
pc_bytes_serialize(pcb, buf, &bsz);
buf += bsz;
}
if ( wkbsize ) *wkbsize = size;
return wkb;
}
PCPATCH *
pc_patch_ght_from_wkb(const PCSCHEMA *schema, const uint8_t *wkb, size_t wkbsize)
{
/*
byte: endianness (1 = NDR, 0 = XDR)
uint32: pcid (key to POINTCLOUD_SCHEMAS)
uint32: compression (0 = no compression, 1 = dimensional, 2 = GHT)
uint32: npoints
dimensions[]: dims (interpret relative to pcid and compressions)
*/
static size_t hdrsz = 1+4+4+4; /* endian + pcid + compression + npoints */
PCPATCH_GHT *patch;
uint8_t swap_endian = (wkb[0] != machine_endian());
uint32_t npoints, ndims;
const uint8_t *buf;
int i;
if ( wkb_get_compression(wkb) != PC_DIMENSIONAL )
{
pcerror("pc_patch_ght_from_wkb: call with wkb that is not dimensionally compressed");
return NULL;
}
npoints = wkb_get_npoints(wkb);
ndims = schema->ndims;
patch = pcalloc(sizeof(PCPATCH_GHT));
patch->npoints = npoints;
patch->type = PC_DIMENSIONAL;
patch->schema = schema;
patch->readonly = PC_FALSE;
patch->bytes = pcalloc(ndims*sizeof(PCBYTES));
buf = wkb+hdrsz;
for ( i = 0; i < ndims; i++ )
{
PCBYTES *pcb = &(patch->bytes[i]);
PCDIMENSION *dim = schema->dims[i];
pc_bytes_deserialize(buf, dim, pcb, PC_FALSE /*readonly*/, swap_endian);
pcb->npoints = npoints;
buf += pc_bytes_serialized_size(pcb);
}
if ( PC_FAILURE == pc_patch_ght_compute_extent(patch) )
pcerror("pc_patch_ght_compute_extent failed");
return (PCPATCH*)patch;
}
#endif

View File

@ -77,7 +77,7 @@ pc_point_free(PCPOINT *pt)
pcfree(pt);
}
static int
int
pc_point_get_double(const PCPOINT *pt, const PCDIMENSION *dim, double *d)
{
uint8_t *ptr;

View File

@ -172,11 +172,12 @@ pc_schema_calculate_byteoffsets(PCSCHEMA *pcs)
pcs->size = byteoffset;
}
static int
void
pc_schema_set_dimension(PCSCHEMA *s, PCDIMENSION *d)
{
s->dims[d->position] = d;
hashtable_insert(s->namehash, d->name, d);
pc_schema_calculate_byteoffsets(s);
}
@ -278,9 +279,28 @@ pc_schema_to_json(const PCSCHEMA *pcs)
return str;
}
static void pc_schema_check_xy(const PCSCHEMA *s)
void pc_schema_check_xy(PCSCHEMA *s)
{
int i;
for ( i = 0; i < s->ndims; i++ )
{
char *dimname = s->dims[i]->name;
if ( strcasecmp(dimname, "X") == 0 ||
strcasecmp(dimname, "Longitude") == 0 ||
strcasecmp(dimname, "Lon") == 0 )
{
s->x_position = i;
continue;
}
if ( strcasecmp(dimname, "Y") == 0 ||
strcasecmp(dimname, "Latitude") == 0 ||
strcasecmp(dimname, "Lat") == 0 )
{
s->y_position = i;
continue;
}
}
if ( s->x_position < 0 )
pcerror("pc_schema_check_xy: invalid x_position '%d'", s->x_position);

View File

@ -15,7 +15,7 @@ REGRESS = pointcloud
# Add in build/link flags for lib
PG_CPPFLAGS += -I../lib
SHLIB_LINK += $(filter -lm, $(LIBS)) $(XML2_LDFLAGS) $(ZLIB_LDFLAGS) ../lib/$(LIB_A)
SHLIB_LINK += $(filter -lm, $(LIBS)) $(XML2_LDFLAGS) $(ZLIB_LDFLAGS) $(GHT_LDFLAGS) ../lib/$(LIB_A)
# We are going to use PGXS for sure
include $(PGXS)

View File

@ -103,6 +103,11 @@ _PG_init(void)
pc_set_handlers(pgsql_alloc, pgsql_realloc,
pgsql_free, pgsql_error,
pgsql_info, pgsql_warn);
/* This will just no-op if LibGHT is not present */
pc_set_ght_handlers(pgsql_alloc, pgsql_realloc,
pgsql_free, pgsql_error,
pgsql_info, pgsql_warn);
}
/* Module unload callback */