From e7a8d42d7073d53d4d9a5d22f7a5ef48df1ff8f4 Mon Sep 17 00:00:00 2001 From: Paul Ramsey Date: Sat, 25 May 2013 12:04:11 -0700 Subject: [PATCH] Basic stats calculator --- lib/pc_api.h | 30 +++---- lib/pc_api_internal.h | 4 - lib/pc_patch.c | 5 +- lib/pc_stats.c | 205 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 203 insertions(+), 41 deletions(-) diff --git a/lib/pc_api.h b/lib/pc_api.h index a01c0e4..3c17ab3 100644 --- a/lib/pc_api.h +++ b/lib/pc_api.h @@ -83,23 +83,6 @@ typedef struct hashtable *namehash; /* Look-up from dimension name to pointer */ } PCSCHEMA; -/* Used for generic patch statistics */ -typedef struct -{ - double max; - double min; - double sum; -} -PCSTAT; - -typedef struct -{ - uint32_t ndims; - uint32_t num_points; - PCSTAT *stats; -} -PCSTATS; - /* Used for dimensional patch statistics */ typedef struct { @@ -156,6 +139,15 @@ typedef struct double ymax; } PCBOUNDS; +/* Used for generic patch statistics */ +typedef struct +{ + PCPOINT max; + PCPOINT min; + PCPOINT avg; +} +PCSTATS; + /** * Uncompressed Structure for in-memory handling * of patches. A read-only PgSQL patch can be wrapped in @@ -170,6 +162,7 @@ typedef struct const PCSCHEMA *schema; uint32_t npoints; /* How many points we have */ double xmin, xmax, ymin, ymax; + PCSTATS *stats; } PCPATCH; typedef struct @@ -179,6 +172,7 @@ typedef struct const PCSCHEMA *schema; uint32_t npoints; /* How many points we have */ double xmin, xmax, ymin, ymax; + PCSTATS *stats; uint32_t maxpoints; /* How man points we can hold (or 0 for read-only) */ size_t datasize; uint8_t *data; /* A serialized version of the data */ @@ -191,6 +185,7 @@ typedef struct const PCSCHEMA *schema; uint32_t npoints; /* How many points we have */ double xmin, xmax, ymin, ymax; + PCSTATS *stats; PCBYTES *bytes; } PCPATCH_DIMENSIONAL; @@ -201,6 +196,7 @@ typedef struct const PCSCHEMA *schema; uint32_t npoints; /* How many points we have */ double xmin, xmax, ymin, ymax; + PCSTATS *stats; size_t ghtsize; uint8_t *ght; } PCPATCH_GHT; diff --git a/lib/pc_api_internal.h b/lib/pc_api_internal.h index a971d2b..bc22eab 100644 --- a/lib/pc_api_internal.h +++ b/lib/pc_api_internal.h @@ -83,10 +83,6 @@ enum DIMCOMPRESSIONS { PC_DIM_ZLIB = 3 }; - - - - /** What is the endianness of this system? */ char machine_endian(void); diff --git a/lib/pc_patch.c b/lib/pc_patch.c index 5a8ea81..d77256f 100644 --- a/lib/pc_patch.c +++ b/lib/pc_patch.c @@ -356,7 +356,7 @@ pc_patch_from_patchlist(PCPATCH **palist, int numpatches) } default: { - pcerror("%s: unknown compresseion type", __func__, pa->type); + pcerror("%s: unknown compression type", __func__, pa->type); break; } } @@ -364,4 +364,5 @@ pc_patch_from_patchlist(PCPATCH **palist, int numpatches) paout->npoints = totalpoints; return (PCPATCH*)paout; -} \ No newline at end of file +} + diff --git a/lib/pc_stats.c b/lib/pc_stats.c index 85ef974..f22ea5d 100644 --- a/lib/pc_stats.c +++ b/lib/pc_stats.c @@ -9,36 +9,205 @@ ***********************************************************************/ #include "pc_api_internal.h" +#include -void -pc_stats_free(PCSTATS *pcs) +/* PCDOUBLESTAT are members of PCDOUBLESTATS */ +typedef struct { - if ( pcs ) + double min; + double max; + double sum; +} PCDOUBLESTAT; + +/* PCDOUBLESTATS are internal to calculating stats in this module */ +typedef struct +{ + PCDOUBLESTAT *dims; +} PCDOUBLESTATS; + +/* +* Instantiate a new PCDOUBLESTATS for calculation, and set up +* initial values for min/max/sum +*/ +static PCDOUBLESTATS * +pc_dstats_new(int ndims) +{ + int i; + PCDOUBLESTATS *stats = pcalloc(sizeof(PCDOUBLESTATS)); + stats->dims = pcalloc(sizeof(PCDOUBLESTAT)*ndims); + for ( i = 0; i < ndims; i++ ) { - if ( pcs->stats ) - pcfree(pcs->stats); - pcfree(pcs); + stats->dims[i].min = -1 * DBL_MAX; + stats->dims[i].max = DBL_MAX; + stats->dims[i].sum = 0; } + return stats; } +static void +pc_dstats_free(PCDOUBLESTATS *stats) +{ + if ( ! stats) return; + if ( stats->dims ) pcfree(stats->dims); + pcfree(stats); + return; +} + +/** +* Free the standard stats object for in memory patches +*/ +static void +pc_stats_free(PCSTATS *stats) +{ + if ( ! stats->min.readonly ) + pcfree(stats->min.data); + + if ( ! stats->max.readonly ) + pcfree(stats->max.data); + + if ( ! stats->avg.readonly ) + pcfree(stats->avg.data); + + pcfree(stats); + return; +} + +/** +* Build a standard stats object on top of a serialization, allocate just the +* point shells and set the pointers to look into the data area of the +* serialization. +*/ PCSTATS * +pc_stats_new_from_data(const PCSCHEMA *schema, uint8_t *mindata, uint8_t *maxdata, uint8_t *avgdata) +{ + size_t sz = schema->size; + PCSTATS *stats = pcalloc(sizeof(PCSTATS)); + /* All share the schema with the patch */ + stats->min.schema = schema; + stats->max.schema = schema; + stats->avg.schema = schema; + /* Data points into serialization */ + stats->min.data = mindata; + stats->max.data = maxdata; + stats->avg.data = avgdata; + /* Can't modify external data */ + stats->min.readonly = PC_TRUE; + stats->max.readonly = PC_TRUE; + stats->avg.readonly = PC_TRUE; + /* Done */ + return stats; +} + +/** +* Build a standard stats object with read/write memory, allocate the +* point shells and the data areas underneath. Used for initial calcution +* of patch stats, when objects first created. +*/ +static PCSTATS * pc_stats_new(const PCSCHEMA *schema) { - PCSTATS *pcs; + size_t sz = schema->size; + PCSTATS *stats = pcalloc(sizeof(PCSTATS)); + stats->min.schema = schema; + stats->max.schema = schema; + stats->avg.schema = schema; + stats->min.readonly = PC_FALSE; + stats->max.readonly = PC_FALSE; + stats->avg.readonly = PC_FALSE; + stats->min.data = pcalloc(schema->size); + stats->max.data = pcalloc(schema->size);; + stats->avg.data = pcalloc(schema->size);; + return stats; +} + + +static PCSTATS * +pc_stats_new_from_dstats(const PCSCHEMA *schema, const PCDOUBLESTATS *dstats) +{ int i; + PCSTATS *stats = pc_stats_new(schema); - /* This situation no sense */ - if ( ! schema || ! schema->ndims ) return NULL; - - /* Allocate */ - pcs = pcalloc(sizeof(PCSTATS)); - pcs->stats = pcalloc(schema->ndims * sizeof(PCSTAT)); - pcs->ndims = schema->ndims; - - for ( i = 0; i < pcs->ndims; i++ ) + for ( i = 0; i < schema->ndims; i++ ) { - + pc_point_set_double(&(stats->min), schema->dims[i], dstats->dims[i].min); + } + return stats; +} + + +static int +pc_patch_uncompressed_calculate_stats(PCPATCH_UNCOMPRESSED *pa) +{ + int i, j; + const PCSCHEMA *schema = pa->schema; + double val; + PCDOUBLESTATS *dstats = pc_dstats_new(pa->schema->ndims); + + /* Point on stack for fast access to values in patch */ + PCPOINT pt; + pt.readonly = PC_TRUE; + pt.schema = schema; + pt.data = pa->data; + + for ( i = 0; i < pa->npoints; i++ ) + { + for ( j = 0; j < schema->ndims; j++ ) + { + pc_point_get_double(&pt, schema->dims[j], &val); + /* Check minimum */ + if ( val < dstats->dims[j].min ) + dstats->dims[j].min = val; + /* Check maximum */ + if ( val > dstats->dims[j].max ) + dstats->dims[j].max = val; + /* Add to sum */ + dstats->dims[j].sum += val; + } + /* Advance to next point */ + pt.data += schema->size; + } + + pa->stats = pc_stats_new_from_dstats(pa->schema, dstats); + pc_dstats_free(dstats); + return PC_SUCCESS; +} + +/** +* Calculate or re-calculate statistics for a patch. +*/ +int +pc_patch_calculate_stats(PCPATCH *pa) +{ + if ( ! pa ) return PC_FAILURE; + if ( pa->stats ) + pcfree(pa->stats); + + switch ( pa->type ) + { + case PC_DIMENSIONAL: + { + pcerror("%s: stats calculation not enabled for patch type %d", __func__, pa->type); + break; + } + case PC_GHT: + { + pcerror("%s: stats calculation not enabled for patch type %d", __func__, pa->type); + break; + } + case PC_NONE: + { + return pc_patch_uncompressed_calculate_stats((PCPATCH_UNCOMPRESSED*)pa); + } + default: + { + pcerror("%s: unknown compression type", __func__, pa->type); + break; + } } - + pcerror("%s: fatal error", __func__); + return PC_FAILURE; } + + +