mirror of
https://github.com/espruino/Espruino.git
synced 2025-12-08 19:06:15 +00:00
update to new tensorflow
This commit is contained in:
parent
7ac9ed85f7
commit
b216c275aa
@ -99,15 +99,16 @@ void *jswrap_tfmicrointerpreter_getTFMI(JsVar *parent) {
|
||||
return tfPtr;
|
||||
}
|
||||
|
||||
JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor *tensor) {
|
||||
JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, bool isInput) {
|
||||
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
|
||||
JsVar *mi = jsvObjectGetChild(parent, "mi", 0);
|
||||
if (!tensor && !mi) {
|
||||
tf_tensorfinfo tensor = tf_get(tfmi, isInput);
|
||||
if (!tensor.data || !mi) {
|
||||
jsExceptionHere(JSET_ERROR, "Unable to get tensor");
|
||||
return 0;
|
||||
}
|
||||
JsVarDataArrayBufferViewType abType = ARRAYBUFFERVIEW_UNDEFINED;
|
||||
switch (tensor->type) {
|
||||
switch (tensor.type) {
|
||||
case kTfLiteFloat32 :
|
||||
abType = ARRAYBUFFERVIEW_FLOAT32; break;
|
||||
case kTfLiteInt32 :
|
||||
@ -119,12 +120,12 @@ JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor
|
||||
case kTfLiteInt8 :
|
||||
abType = ARRAYBUFFERVIEW_INT8; break;
|
||||
default:
|
||||
jsExceptionHere(JSET_TYPEERROR, "Unsupported Tensor format TfLiteType:%d", tensor->type);
|
||||
jsExceptionHere(JSET_TYPEERROR, "Unsupported Tensor format TfLiteType:%d", tensor.type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
JsVar *ab = jsvNewArrayBufferFromString(mi,0);
|
||||
JsVar *b = jswrap_typedarray_constructor(abType, ab, ((size_t)&tensor->data.f[0])-(size_t)tfmi, tensor->bytes / JSV_ARRAYBUFFER_GET_SIZE(abType));
|
||||
JsVar *b = jswrap_typedarray_constructor(abType, ab, ((size_t)tensor.data)-(size_t)tfmi, tensor.bytes / JSV_ARRAYBUFFER_GET_SIZE(abType));
|
||||
jsvUnLock2(ab,mi);
|
||||
return b;
|
||||
}
|
||||
@ -138,9 +139,7 @@ JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor
|
||||
}
|
||||
*/
|
||||
JsVar *jswrap_tfmicrointerpreter_getInput(JsVar *parent) {
|
||||
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
|
||||
if (!tfmi) return 0;
|
||||
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, tf_get_input(tfmi, 0));
|
||||
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, true);
|
||||
}
|
||||
/*JSON{
|
||||
"type" : "method",
|
||||
@ -152,9 +151,7 @@ JsVar *jswrap_tfmicrointerpreter_getInput(JsVar *parent) {
|
||||
}
|
||||
*/
|
||||
JsVar *jswrap_tfmicrointerpreter_getOutput(JsVar *parent) {
|
||||
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
|
||||
if (!tfmi) return 0;
|
||||
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, tf_get_output(tfmi, 0));
|
||||
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, false);
|
||||
}
|
||||
/*JSON{
|
||||
"type" : "method",
|
||||
|
||||
@ -53,7 +53,7 @@ typedef struct {
|
||||
tflite::ops::micro::AllOpsResolver resolver;
|
||||
#else
|
||||
#define TENSORFLOW_OP_COUNT 6
|
||||
tflite::MicroOpResolver<TENSORFLOW_OP_COUNT> resolver;
|
||||
tflite::MicroMutableOpResolver<TENSORFLOW_OP_COUNT> resolver;
|
||||
#endif
|
||||
// Build an interpreter to run the model with
|
||||
tflite::MicroInterpreter interpreter;
|
||||
@ -88,19 +88,13 @@ bool tf_create(void *dataPtr, size_t arena_size, const char *model_data) {
|
||||
new (&tf->resolver)tflite::ops::micro::AllOpsResolver();
|
||||
#else
|
||||
// Pull in only the operation implementations we need.
|
||||
new (&tf->resolver)tflite::MicroOpResolver<TENSORFLOW_OP_COUNT>();
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
|
||||
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(), tflite::MicroOpResolverAnyVersion());
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_CONV_2D,
|
||||
tflite::ops::micro::Register_CONV_2D(), tflite::MicroOpResolverAnyVersion());
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_AVERAGE_POOL_2D,
|
||||
tflite::ops::micro::Register_AVERAGE_POOL_2D(), tflite::MicroOpResolverAnyVersion());
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_MAX_POOL_2D,
|
||||
tflite::ops::micro::Register_MAX_POOL_2D(), tflite::MicroOpResolverAnyVersion());
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_FULLY_CONNECTED,
|
||||
tflite::ops::micro::Register_FULLY_CONNECTED(), tflite::MicroOpResolverAnyVersion());
|
||||
tf->resolver.AddBuiltin( tflite::BuiltinOperator_SOFTMAX,
|
||||
tflite::ops::micro::Register_SOFTMAX(), tflite::MicroOpResolverAnyVersion());
|
||||
new (&tf->resolver)tflite::MicroMutableOpResolver<TENSORFLOW_OP_COUNT>();
|
||||
tf->resolver.AddDepthwiseConv2D();
|
||||
tf->resolver.AddConv2D();
|
||||
tf->resolver.AddAveragePool2D();
|
||||
tf->resolver.AddMaxPool2D();
|
||||
tf->resolver.AddFullyConnected();
|
||||
tf->resolver.AddSoftmax();
|
||||
#endif
|
||||
|
||||
// Build an interpreter to run the model with
|
||||
@ -133,16 +127,17 @@ bool tf_invoke(void *dataPtr) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TfLiteTensor *tf_get_input(void *dataPtr, int n) {
|
||||
tf_tensorfinfo tf_get(void *dataPtr, bool isInput) {
|
||||
TFData *tf = (TFData*)dataPtr;
|
||||
// Obtain pointers to the model's input and output tensors
|
||||
return tf->interpreter.input(0);
|
||||
}
|
||||
|
||||
TfLiteTensor *tf_get_output(void *dataPtr, int n) {
|
||||
TFData *tf = (TFData*)dataPtr;
|
||||
// Obtain pointers to the model's input and output tensors
|
||||
return tf->interpreter.output(0);
|
||||
tf_tensorfinfo inf;
|
||||
inf.data = 0;
|
||||
TfLiteTensor *tensor = isInput ? tf->interpreter.input(0) : tf->interpreter.output(0);
|
||||
if (tensor) {
|
||||
inf.type = tensor->type;
|
||||
inf.data = &tensor->data.f[0];
|
||||
inf.bytes = tensor->bytes;
|
||||
}
|
||||
return inf;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@ -1,8 +1,14 @@
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
typedef struct {
|
||||
void *data; // pointer to data
|
||||
TfLiteType type; // data type
|
||||
int bytes; // how big is the tensor
|
||||
} tf_tensorfinfo;
|
||||
|
||||
|
||||
size_t tf_get_size(size_t arena_size, const char *model_data);
|
||||
bool tf_create(void *dataPtr, size_t arena_size, const char *model_data);
|
||||
void tf_destroy(void *dataPtr);
|
||||
bool tf_invoke(void *dataPtr);
|
||||
TfLiteTensor *tf_get_input(void *dataPtr, int n);
|
||||
TfLiteTensor *tf_get_output(void *dataPtr, int n);
|
||||
tf_tensorfinfo tf_get(void *dataPtr, bool isInput);
|
||||
|
||||
@ -21,7 +21,7 @@ limitations under the License.
|
||||
// Also update tensorflow/tensorflow.bzl and
|
||||
// tensorflow/tools/pip_package/setup.py
|
||||
#define TF_MAJOR_VERSION 2
|
||||
#define TF_MINOR_VERSION 1
|
||||
#define TF_MINOR_VERSION 4
|
||||
#define TF_PATCH_VERSION 0
|
||||
|
||||
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
||||
@ -108,7 +108,7 @@ limitations under the License.
|
||||
|
||||
#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
|
||||
#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
|
||||
#define TF_GRAPH_DEF_VERSION 398 // Updated: 2020/5/11
|
||||
#define TF_GRAPH_DEF_VERSION 498 // Updated: 2020/8/19
|
||||
|
||||
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
|
||||
//
|
||||
|
||||
@ -67,8 +67,9 @@ typedef struct {
|
||||
typedef enum {
|
||||
kTfLiteActNone = 0,
|
||||
kTfLiteActRelu,
|
||||
kTfLiteActRelu1, // min(max(-1, x), 1)
|
||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
||||
kTfLiteActReluN1To1, // min(max(-1, x), 1)
|
||||
kTfLiteActRelu1 = kTfLiteActReluN1To1, // kTfLiteActRelu1 will be deprecated.
|
||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
||||
kTfLiteActTanh,
|
||||
kTfLiteActSignBit,
|
||||
kTfLiteActSigmoid,
|
||||
@ -198,6 +199,8 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
// Parameter added for the version 4.
|
||||
bool pot_scale_int16;
|
||||
} TfLiteAddParams;
|
||||
|
||||
typedef struct {
|
||||
@ -219,6 +222,8 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
// Parameter added for the version 5.
|
||||
bool pot_scale_int16;
|
||||
} TfLiteSubParams;
|
||||
|
||||
typedef struct {
|
||||
|
||||
@ -79,7 +79,8 @@ TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
|
||||
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
|
||||
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t) {
|
||||
if (t->allocation_type == kTfLiteDynamic) {
|
||||
if (t->allocation_type == kTfLiteDynamic ||
|
||||
t->allocation_type == kTfLitePersistentRo) {
|
||||
free(t->data.raw);
|
||||
}
|
||||
t->data.raw = NULL;
|
||||
@ -172,7 +173,8 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
}
|
||||
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic &&
|
||||
tensor->allocation_type != kTfLitePersistentRo) {
|
||||
return;
|
||||
}
|
||||
// TODO(b/145340303): Tensor data should be aligned.
|
||||
@ -205,6 +207,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
|
||||
return "BOOL";
|
||||
case kTfLiteComplex64:
|
||||
return "COMPLEX64";
|
||||
case kTfLiteComplex128:
|
||||
return "COMPLEX128";
|
||||
case kTfLiteString:
|
||||
return "STRING";
|
||||
case kTfLiteFloat16:
|
||||
|
||||
@ -47,7 +47,8 @@ extern "C" {
|
||||
typedef enum TfLiteStatus {
|
||||
kTfLiteOk = 0,
|
||||
kTfLiteError = 1,
|
||||
kTfLiteDelegateError = 2
|
||||
kTfLiteDelegateError = 2,
|
||||
kTfLiteApplicationError = 3
|
||||
} TfLiteStatus;
|
||||
|
||||
// The list of external context types known to TF Lite. This list exists solely
|
||||
@ -58,7 +59,7 @@ typedef enum TfLiteExternalContextType {
|
||||
kTfLiteEigenContext = 0, // include eigen_support.h to use.
|
||||
kTfLiteGemmLowpContext = 1, // include gemm_support.h to use.
|
||||
kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support.
|
||||
kTfLiteCpuBackendContext = 3, // include cpu_backend_support.h to use.
|
||||
kTfLiteCpuBackendContext = 3, // include cpu_backend_context.h to use.
|
||||
kTfLiteMaxExternalContexts = 4
|
||||
} TfLiteExternalContextType;
|
||||
|
||||
@ -86,8 +87,9 @@ typedef struct TfLiteIntArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1) || \
|
||||
defined(HEXAGON)
|
||||
int data[0];
|
||||
#else
|
||||
int data[];
|
||||
@ -125,6 +127,7 @@ typedef struct TfLiteFloatArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
float data[0];
|
||||
@ -203,6 +206,7 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
// the current function, while also reporting the location of the error.
|
||||
// `a` and `b` may be evaluated more than once, so no side effects or
|
||||
// extremely expensive computations should be done.
|
||||
// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
|
||||
#define TF_LITE_ENSURE_EQ(context, a, b) \
|
||||
do { \
|
||||
if ((a) != (b)) { \
|
||||
@ -230,11 +234,32 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
|
||||
// library.
|
||||
#ifdef SWIG
|
||||
#define TFL_CAPI_EXPORT
|
||||
#else
|
||||
#if defined(_WIN32)
|
||||
#ifdef TFL_COMPILE_LIBRARY
|
||||
#define TFL_CAPI_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define TFL_CAPI_EXPORT __declspec(dllimport)
|
||||
#endif // TFL_COMPILE_LIBRARY
|
||||
#else
|
||||
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
|
||||
#endif // _WIN32
|
||||
#endif // SWIG
|
||||
|
||||
// Single-precision complex data type compatible with the C99 definition.
|
||||
typedef struct TfLiteComplex64 {
|
||||
float re, im; // real and imaginary parts, respectively.
|
||||
} TfLiteComplex64;
|
||||
|
||||
// Double-precision complex data type compatible with the C99 definition.
|
||||
typedef struct TfLiteComplex128 {
|
||||
double re, im; // real and imaginary parts, respectively.
|
||||
} TfLiteComplex128;
|
||||
|
||||
// Half precision data type compatible with the C99 definition.
|
||||
typedef struct TfLiteFloat16 {
|
||||
uint16_t data;
|
||||
@ -254,6 +279,7 @@ typedef enum {
|
||||
kTfLiteInt8 = 9,
|
||||
kTfLiteFloat16 = 10,
|
||||
kTfLiteFloat64 = 11,
|
||||
kTfLiteComplex128 = 12,
|
||||
} TfLiteType;
|
||||
|
||||
// Return the name of a given type, for error reporting purposes.
|
||||
@ -310,26 +336,39 @@ typedef union TfLitePtrUnion {
|
||||
int64_t* i64;
|
||||
float* f;
|
||||
TfLiteFloat16* f16;
|
||||
double* f64;
|
||||
char* raw;
|
||||
const char* raw_const;
|
||||
uint8_t* uint8;
|
||||
bool* b;
|
||||
int16_t* i16;
|
||||
TfLiteComplex64* c64;
|
||||
TfLiteComplex128* c128;
|
||||
int8_t* int8;
|
||||
/* Only use this member. */
|
||||
void* data;
|
||||
} TfLitePtrUnion;
|
||||
|
||||
// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
|
||||
// data (or data externally allocated). kTfLiteArenaRw is arena allocated
|
||||
// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
|
||||
// Memory allocation strategies.
|
||||
// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
|
||||
// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
|
||||
// and available during eval.
|
||||
// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
|
||||
// only available during eval.
|
||||
// * kTfLiteDynamic: Allocated during eval, or for string tensors.
|
||||
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
|
||||
// useful for tensors that can be computed during prepare and treated
|
||||
// as constant inputs for downstream ops (also in prepare).
|
||||
// * kTfLiteCustom: Custom memory allocation provided by the user. See
|
||||
// TfLiteCustomAllocation below.
|
||||
typedef enum TfLiteAllocationType {
|
||||
kTfLiteMemNone = 0,
|
||||
kTfLiteMmapRo,
|
||||
kTfLiteArenaRw,
|
||||
kTfLiteArenaRwPersistent,
|
||||
kTfLiteDynamic,
|
||||
kTfLitePersistentRo,
|
||||
kTfLiteCustom,
|
||||
} TfLiteAllocationType;
|
||||
|
||||
// The delegates should use zero or positive integers to represent handles.
|
||||
@ -362,8 +401,18 @@ typedef struct TfLiteSparsity {
|
||||
int dim_metadata_size;
|
||||
} TfLiteSparsity;
|
||||
|
||||
// Defines a custom memory allocation not owned by the runtime.
|
||||
// `data` should be aligned to kDefaultTensorAlignment defined in
|
||||
// lite/util.h. (Currently 64 bytes)
|
||||
// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
|
||||
typedef struct TfLiteCustomAllocation {
|
||||
void* data;
|
||||
size_t bytes;
|
||||
} TfLiteCustomAllocation;
|
||||
|
||||
// An tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// data including a dimensionality (or NULL if not currently defined).
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
typedef struct TfLiteTensor {
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
@ -429,31 +478,6 @@ typedef struct TfLiteTensor {
|
||||
const TfLiteIntArray* dims_signature;
|
||||
} TfLiteTensor;
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t);
|
||||
|
||||
// Free quantization data.
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
|
||||
|
||||
// Free sparsity parameters.
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
|
||||
|
||||
// Free memory of tensor `t`.
|
||||
void TfLiteTensorFree(TfLiteTensor* t);
|
||||
|
||||
// Set all of a tensor's fields (and free any previously allocated data).
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
|
||||
// types other than kTfLiteDynamic will be ignored.
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// A structure representing an instance of a node.
|
||||
// This structure only exhibits the inputs, outputs and user defined data, not
|
||||
// other features like the type.
|
||||
@ -490,6 +514,130 @@ typedef struct TfLiteNode {
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
struct TfLiteDelegate* delegate;
|
||||
} TfLiteNode;
|
||||
#else // defined(TF_LITE_STATIC_MEMORY)?
|
||||
// NOTE: This flag is opt-in only at compile time.
|
||||
//
|
||||
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
|
||||
// contains only the minimum fields required to initialize and prepare a micro
|
||||
// inference graph. The fields in this struct have been ordered from
|
||||
// largest-to-smallest for optimal struct sizeof.
|
||||
//
|
||||
// This struct does not use:
|
||||
// - allocation
|
||||
// - buffer_handle
|
||||
// - data_is_stale
|
||||
// - delegate
|
||||
// - dims_signature
|
||||
// - name
|
||||
// - sparsity
|
||||
typedef struct TfLiteTensor {
|
||||
// TODO(b/155784997): Consider consolidating these quantization fields:
|
||||
// Quantization information. Replaces params field above.
|
||||
TfLiteQuantization quantization;
|
||||
|
||||
// Quantization information.
|
||||
TfLiteQuantizationParams params;
|
||||
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have. NOTE: the product of elements of `dims`
|
||||
// and the element datatype size should be equal to `bytes` below.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The number of bytes required to store the data of this Tensor. I.e.
|
||||
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
|
||||
// type is kTfLiteFloat32 and dims = {3, 2} then
|
||||
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
|
||||
size_t bytes;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
|
||||
// How memory is mapped
|
||||
// kTfLiteMmapRo: Memory mapped read only.
|
||||
// i.e. weights
|
||||
// kTfLiteArenaRw: Arena allocated read write memory
|
||||
// (i.e. temporaries, outputs).
|
||||
TfLiteAllocationType allocation_type;
|
||||
|
||||
// True if the tensor is a variable.
|
||||
bool is_variable;
|
||||
} TfLiteTensor;
|
||||
|
||||
// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
|
||||
// only the minimum fields required to represent a node.
|
||||
//
|
||||
// This struct does not use:
|
||||
// - delegate
|
||||
// - intermediates
|
||||
// - temporaries
|
||||
typedef struct TfLiteNode {
|
||||
// Inputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* inputs;
|
||||
|
||||
// Outputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* outputs;
|
||||
|
||||
// Opaque data provided by the node implementer through `Registration.init`.
|
||||
void* user_data;
|
||||
|
||||
// Opaque data provided to the node if the node is a builtin. This is usually
|
||||
// a structure defined in builtin_op_data.h
|
||||
void* builtin_data;
|
||||
|
||||
// Custom initial data. This is the opaque data provided in the flatbuffer.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
const void* custom_initial_data;
|
||||
int custom_initial_data_size;
|
||||
} TfLiteNode;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
|
||||
// of information required for a kernel to run during TfLiteRegistration::Eval.
|
||||
// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
|
||||
// builds with this flag by default internally.
|
||||
typedef struct TfLiteEvalTensor {
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
} TfLiteEvalTensor;
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t);
|
||||
|
||||
// Free quantization data.
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
|
||||
|
||||
// Free sparsity parameters.
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
|
||||
|
||||
// Free memory of tensor `t`.
|
||||
void TfLiteTensorFree(TfLiteTensor* t);
|
||||
|
||||
// Set all of a tensor's fields (and free any previously allocated data).
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
|
||||
// types other than kTfLiteDynamic will be ignored.
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
//
|
||||
@ -581,12 +729,11 @@ typedef struct TfLiteContext {
|
||||
void* profiler;
|
||||
|
||||
// Allocate persistent buffer which has the same life time as the interpreter.
|
||||
// Returns nullptr on failure.
|
||||
// The memory is allocated from heap for TFL, and from tail in TFLM.
|
||||
// If *ptr is not nullptr, the pointer will be reallocated.
|
||||
// This method is only available in Prepare stage.
|
||||
// This method is only available in Init or Prepare stage.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx,
|
||||
size_t bytes, void** ptr);
|
||||
void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
|
||||
|
||||
// Allocate a buffer which will be deallocated right after invoke phase.
|
||||
// The memory is allocated from heap in TFL, and from volatile arena in TFLM.
|
||||
@ -641,6 +788,18 @@ typedef struct TfLiteContext {
|
||||
TfLiteStatus (*PreviewDelegatePartitioning)(
|
||||
struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
|
||||
TfLiteDelegateParams** partition_params_array, int* num_partitions);
|
||||
|
||||
// Returns a TfLiteTensor struct for a given index.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
// WARNING: This method may not be available on all platforms.
|
||||
TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
|
||||
// Returns a TfLiteEvalTensor struct for a given index.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
// WARNING: This method may not be available on all platforms.
|
||||
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
} TfLiteContext;
|
||||
|
||||
typedef struct TfLiteRegistration {
|
||||
@ -715,7 +874,26 @@ typedef enum TfLiteDelegateFlags {
|
||||
//
|
||||
// If the delegate isn't capable to handle dynamic tensors, this flag need
|
||||
// to be set to false.
|
||||
kTfLiteDelegateFlagsAllowDynamicTensors = 1
|
||||
kTfLiteDelegateFlagsAllowDynamicTensors = 1,
|
||||
|
||||
// This flag can be used by delegates (that allow dynamic tensors) to ensure
|
||||
// applicable tensor shapes are automatically propagated in the case of tensor
|
||||
// resizing.
|
||||
// This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
|
||||
// of a delegate kernel will have correct shapes before its Prepare() method
|
||||
// is called. The runtime leverages TFLite builtin ops in the original
|
||||
// execution plan to propagate shapes.
|
||||
//
|
||||
// A few points to note:
|
||||
// 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
|
||||
// false, this one is redundant since the delegate kernels are re-initialized
|
||||
// every time tensors are resized.
|
||||
// 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
|
||||
// work is required to prepare the original execution plan.
|
||||
// 3. This flag requires that the original execution plan only have ops with
|
||||
// valid registrations (and not 'dummy' custom ops like with Flex).
|
||||
// WARNING: This feature is experimental and subject to change.
|
||||
kTfLiteDelegateFlagsRequirePropagatedShapes = 2
|
||||
} TfLiteDelegateFlags;
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
@ -734,8 +912,9 @@ typedef struct TfLiteDelegate {
|
||||
struct TfLiteDelegate* delegate);
|
||||
|
||||
// Copy the data from delegate buffer handle into raw memory of the given
|
||||
// 'tensor'. This cannot be null. The delegate is allowed to allocate the raw
|
||||
// bytes as long as it follows the rules for kTfLiteDynamic tensors.
|
||||
// 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
|
||||
// long as it follows the rules for kTfLiteDynamic tensors, in which case this
|
||||
// cannot be null.
|
||||
TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate,
|
||||
TfLiteBufferHandle buffer_handle,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -19,9 +19,12 @@ limitations under the License.
|
||||
// flatbuffer serialization format into in-memory values that are used by the
|
||||
// runtime API and interpreter.
|
||||
|
||||
#include <cstddef>
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -66,6 +69,185 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
|
||||
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseConcatenation(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseFullyConnected(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseGreaterEqual(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseL2Normalization(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseStridedSlice(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
||||
|
||||
@ -15,6 +15,10 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus GetRegistrationFromOpCode(
|
||||
|
||||
@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@ -32,6 +34,16 @@ class OpResolver {
|
||||
/// Finds the op registration of a custom operator by op name.
|
||||
virtual const TfLiteRegistration* FindOp(const char* op,
|
||||
int version) const = 0;
|
||||
|
||||
// Returns optional delegates for resolving and handling ops in the flatbuffer
|
||||
// model. This may be used in addition to the standard TfLiteRegistration
|
||||
// lookup for graph resolution.
|
||||
using TfLiteDelegatePtrVector =
|
||||
std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
|
||||
virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
|
||||
return TfLiteDelegatePtrVector();
|
||||
}
|
||||
|
||||
virtual ~OpResolver() {}
|
||||
};
|
||||
|
||||
|
||||
@ -17,6 +17,8 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
|
||||
|
||||
@ -55,9 +55,12 @@ inline void GetActivationMinMax(FusedActivationFunctionType ac,
|
||||
}
|
||||
}
|
||||
|
||||
inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
|
||||
float output_activation_max) {
|
||||
return std::min(std::max(x, output_activation_min), output_activation_max);
|
||||
template <typename T>
|
||||
inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
|
||||
T output_activation_max) {
|
||||
using std::max;
|
||||
using std::min;
|
||||
return min(max(x, output_activation_min), output_activation_max);
|
||||
}
|
||||
|
||||
// Legacy function, left for compatibility only.
|
||||
@ -135,23 +138,24 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32_t x, int32_t quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
int32_t x, int32_t quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
|
||||
quantized_multiplier);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
int left_shift = shift > 0 ? shift : 0;
|
||||
@ -161,16 +165,16 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
|
||||
right_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int64_t x,
|
||||
int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
// Inputs:
|
||||
// - quantized_multiplier has fixed point at bit 31
|
||||
// - shift is -31 to +7 (negative for right shift)
|
||||
//
|
||||
// Assumptions: The following input ranges are assumed
|
||||
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
|
||||
// - scaling is chosen so final scaled result fits in int32
|
||||
// - scaling is chosen so final scaled result fits in int32_t
|
||||
// - input x is in the range -(1<<47) <= x < (1<<47)
|
||||
assert(quantized_multiplier >= 0);
|
||||
assert(shift >= -31 && shift < 8);
|
||||
@ -215,9 +219,9 @@ inline int CountLeadingSignBits(T integer_input) {
|
||||
using U = typename std::make_unsigned<T>::type;
|
||||
return integer_input >= 0
|
||||
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -259,7 +263,7 @@ inline void gen_lut(const std::function<double(double)>& func, double min,
|
||||
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
|
||||
}
|
||||
|
||||
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
|
||||
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
|
||||
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
|
||||
// 512 base value, lut[513] only for calculate slope
|
||||
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
|
||||
@ -410,6 +414,23 @@ SaturatingRoundingMultiplyByPOTParam(
|
||||
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
|
||||
}
|
||||
|
||||
// Convert int32_t multiplier to int16_t with rounding.
|
||||
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
|
||||
int16_t* multiplier_int16_t) {
|
||||
TFLITE_DCHECK_GE(multiplier_int32_t, 0);
|
||||
static constexpr int32_t kRoundingOffset = 1 << 15;
|
||||
if (multiplier_int32_t >=
|
||||
std::numeric_limits<int32_t>::max() - kRoundingOffset) {
|
||||
*multiplier_int16_t = std::numeric_limits<int16_t>::max();
|
||||
return;
|
||||
}
|
||||
const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
|
||||
TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
|
||||
TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
|
||||
*multiplier_int16_t = result;
|
||||
TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
|
||||
}
|
||||
|
||||
// Minimum output bits to accommodate log of maximum input range. It actually
|
||||
// does not matter if one considers, say, [-64,64] or [-64,64).
|
||||
//
|
||||
@ -418,15 +439,13 @@ SaturatingRoundingMultiplyByPOTParam(
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
|
||||
constexpr int min_log_x_output_bits(int input_bits) {
|
||||
return input_bits > 90
|
||||
? 7
|
||||
: input_bits > 44
|
||||
? 6
|
||||
: input_bits > 21
|
||||
? 5
|
||||
: input_bits > 10
|
||||
? 4
|
||||
: input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
|
||||
return input_bits > 90 ? 7
|
||||
: input_bits > 44 ? 6
|
||||
: input_bits > 21 ? 5
|
||||
: input_bits > 10 ? 4
|
||||
: input_bits > 4 ? 3
|
||||
: input_bits > 1 ? 2
|
||||
: 1;
|
||||
}
|
||||
|
||||
// Although currently the name of this function says that it cannot handle
|
||||
@ -434,17 +453,17 @@ constexpr int min_log_x_output_bits(int input_bits) {
|
||||
// x_max is the largest representable input. In other words, the output range
|
||||
// is symmetric.
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
|
||||
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
|
||||
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
|
||||
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
|
||||
// The reason for accumulating the result with an extra bit of headroom is
|
||||
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
|
||||
// recip_denom will otherwise introduce an error.
|
||||
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
|
||||
|
||||
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1488522236, std::log(2.0));
|
||||
@ -472,10 +491,10 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
// required shift "ourselves" instead of using, say, Rescale.
|
||||
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
|
||||
// z_a_pow_2 = input_integer_bits - z_a_headroom;
|
||||
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
|
||||
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
|
||||
FixedPoint0 r_a_tmp =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
|
||||
const int32 r_a_raw =
|
||||
const int32_t r_a_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
|
||||
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
|
||||
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
|
||||
@ -487,8 +506,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
|
||||
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
|
||||
FixedPoint0 z_b = z_a * sqrt_half;
|
||||
int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
|
||||
const int32 r_b_raw =
|
||||
int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
|
||||
const int32_t r_b_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
|
||||
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
|
||||
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
|
||||
@ -516,9 +535,9 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
}
|
||||
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
|
||||
static_assert(
|
||||
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
|
||||
"Output integer bits must be sufficient to accommodate logs of inputs.");
|
||||
@ -527,25 +546,25 @@ log_x_for_x_greater_than_or_equal_to_1(
|
||||
input_val);
|
||||
}
|
||||
|
||||
inline int32 GetReciprocal(int32 x, int x_integer_digits,
|
||||
int* num_bits_over_unit) {
|
||||
int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));
|
||||
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
|
||||
int* num_bits_over_unit) {
|
||||
int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
|
||||
// This is the number of bits to the left of the binary point above 1.0.
|
||||
// Consider x=1.25. In that case shifted_scale=0.8 and
|
||||
// no later adjustment will be needed.
|
||||
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
|
||||
const int32 shifted_sum_minus_one =
|
||||
static_cast<int32>((static_cast<uint32>(x) << headroom_plus_one) -
|
||||
(static_cast<uint32>(1) << 31));
|
||||
const int32_t shifted_sum_minus_one =
|
||||
static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
|
||||
(static_cast<uint32_t>(1) << 31));
|
||||
|
||||
gemmlowp::FixedPoint<int32, 0> shifted_scale =
|
||||
gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
|
||||
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
|
||||
gemmlowp::FixedPoint<int32, 0>::FromRaw(shifted_sum_minus_one));
|
||||
gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
|
||||
return shifted_scale.raw();
|
||||
}
|
||||
|
||||
inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
int32* output_inv_sqrt,
|
||||
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
|
||||
int32_t* output_inv_sqrt,
|
||||
int* output_shift) {
|
||||
TFLITE_DCHECK_GE(input, 0);
|
||||
if (input <= 1) {
|
||||
@ -565,7 +584,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
++*output_shift;
|
||||
}
|
||||
const unsigned max_left_shift_bits =
|
||||
CountLeadingZeros(static_cast<uint32>(input)) - 1;
|
||||
CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
|
||||
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
|
||||
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
|
||||
*output_shift -= left_shift_bit_pairs;
|
||||
@ -577,8 +596,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
using gemmlowp::SaturatingRoundingMultiplyByPOT;
|
||||
// Using 3 integer bits gives us enough room for the internal arithmetic in
|
||||
// this Newton-Raphson iteration.
|
||||
using F3 = FixedPoint<int32, 3>;
|
||||
using F0 = FixedPoint<int32, 0>;
|
||||
using F3 = FixedPoint<int32_t, 3>;
|
||||
using F0 = FixedPoint<int32_t, 0>;
|
||||
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
|
||||
const F3 fixedpoint_half_input =
|
||||
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
|
||||
|
||||
@ -76,13 +76,15 @@ limitations under the License.
|
||||
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
// TODO(ahentz): Clean up.
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// TODO(b/162019032): Consider removing these type-aliases.
|
||||
using int8 = std::int8_t;
|
||||
using uint8 = std::uint8_t;
|
||||
using int16 = std::int16_t;
|
||||
using uint16 = std::uint16_t;
|
||||
using int32 = std::int32_t;
|
||||
using uint32 = std::uint32_t;
|
||||
#endif // !defined(TF_LITE_STATIC_MEMORY)
|
||||
|
||||
// TFLITE_DEPRECATED()
|
||||
//
|
||||
|
||||
@ -19,8 +19,9 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO)
|
||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
|
||||
defined(__ZEPHYR__)
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX
|
||||
#else
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX std
|
||||
|
||||
@ -342,13 +342,13 @@ void NudgeQuantizationRange(const float min, const float max,
|
||||
const float quant_max_float = static_cast<float>(quant_max);
|
||||
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
|
||||
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
|
||||
uint16 nudged_zero_point;
|
||||
uint16_t nudged_zero_point;
|
||||
if (zero_point_from_min < quant_min_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_min);
|
||||
nudged_zero_point = static_cast<uint16_t>(quant_min);
|
||||
} else if (zero_point_from_min > quant_max_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_max);
|
||||
nudged_zero_point = static_cast<uint16_t>(quant_max);
|
||||
} else {
|
||||
nudged_zero_point = static_cast<uint16>(TfLiteRound(zero_point_from_min));
|
||||
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
|
||||
}
|
||||
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
|
||||
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
|
||||
|
||||
@ -51,34 +51,39 @@ inline void Add(const ArithmeticParams& params,
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
|
||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
||||
// is 32-bit for both cases. The overflow does not happen due to the
|
||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
||||
template <typename T>
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,40 +91,40 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
// broadcast add, so that, for example, scalar-broadcast with batch will still
|
||||
// be fast.
|
||||
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
|
||||
uint8 input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
uint8_t input1_data, const uint8_t* input2_data,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
const int32 input1_val = params.input1_offset + input1_data;
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data;
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@ -132,24 +137,53 @@ inline void Add(const ArithmeticParams& params,
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void AddGeneralParamScale(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
int max_value = std::numeric_limits<int16_t>::max();
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, max_value);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, max_value);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape, int16_t* output_data,
|
||||
bool pot_scale = true) {
|
||||
if (!pot_scale) {
|
||||
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
|
||||
input2_data, output_shape, output_data);
|
||||
return;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
|
||||
const int input1_shift = params.input1_shift;
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
const int16 output_activation_min = params.quantized_activation_min;
|
||||
const int16 output_activation_max = params.quantized_activation_max;
|
||||
const int16_t output_activation_min = params.quantized_activation_min;
|
||||
const int16_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
|
||||
TFLITE_DCHECK_LE(input1_shift, 0);
|
||||
TFLITE_DCHECK_LE(params.input2_shift, 0);
|
||||
const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data;
|
||||
const int16* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
||||
const int16_t* not_shift_input =
|
||||
input1_shift == 0 ? input1_data : input2_data;
|
||||
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
||||
const int input_right_shift =
|
||||
input1_shift == 0 ? -params.input2_shift : -input1_shift;
|
||||
|
||||
@ -161,8 +195,8 @@ inline void Add(const ArithmeticParams& params,
|
||||
F0 scaled_input = F0::FromRaw(
|
||||
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
|
||||
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
|
||||
const int16 raw_output = result.raw();
|
||||
const int16 clamped_output = std::min(
|
||||
const int16_t raw_output = result.raw();
|
||||
const int16_t clamped_output = std::min(
|
||||
output_activation_max, std::max(output_activation_min, raw_output));
|
||||
output_data[i] = clamped_output;
|
||||
}
|
||||
@ -218,11 +252,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
int32_t* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@ -257,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
||||
// is 32-bit for both cases. The overflow does not happen due to the
|
||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
||||
template <typename T>
|
||||
inline void BroadcastAdd4DSlow(
|
||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@ -286,34 +321,34 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val =
|
||||
const int32_t shifted_input1_val =
|
||||
input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val =
|
||||
const int32_t shifted_input2_val =
|
||||
input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier,
|
||||
params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier,
|
||||
params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -322,11 +357,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
const RuntimeShape& unswitched_input1_shape,
|
||||
const uint8* unswitched_input1_data,
|
||||
const uint8_t* unswitched_input1_data,
|
||||
const RuntimeShape& unswitched_input2_shape,
|
||||
const uint8* unswitched_input2_data,
|
||||
const uint8_t* unswitched_input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
ArithmeticParams switched_params = unswitched_params;
|
||||
switched_params.input1_offset = unswitched_params.input2_offset;
|
||||
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
|
||||
@ -341,18 +376,18 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
|
||||
const ArithmeticParams& params =
|
||||
use_unswitched ? unswitched_params : switched_params;
|
||||
const uint8* input1_data =
|
||||
const uint8_t* input1_data =
|
||||
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
|
||||
const uint8* input2_data =
|
||||
const uint8_t* input2_data =
|
||||
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
|
||||
|
||||
// Fivefold nested loops. The second input resets its position for each
|
||||
// iteration of the second loop. The first input resets its position at the
|
||||
// beginning of the fourth loop. The innermost loop is an elementwise add of
|
||||
// sections of the arrays.
|
||||
uint8* output_data_ptr = output_data;
|
||||
const uint8* input1_data_ptr = input1_data;
|
||||
const uint8* input2_data_reset = input2_data;
|
||||
uint8_t* output_data_ptr = output_data;
|
||||
const uint8_t* input1_data_ptr = input1_data;
|
||||
const uint8_t* input2_data_reset = input2_data;
|
||||
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
|
||||
// between input shapes. y3 for input 1 is always broadcast, and so the
|
||||
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
|
||||
@ -368,7 +403,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
|
||||
// dimension.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
const uint8_t* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
@ -397,7 +432,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
// for y4 == 1 and the loop over y3 is contained within the
|
||||
// AddScalarBroadcast function.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
const uint8_t* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
|
||||
@ -18,7 +18,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/string_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -51,18 +50,6 @@ inline bool LessEqualFn(T lhs, T rhs) {
|
||||
return lhs <= rhs;
|
||||
}
|
||||
|
||||
inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
if (lhs.len != rhs.len) return false;
|
||||
for (int i = 0; i < lhs.len; ++i) {
|
||||
if (lhs.str[i] != rhs.str[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
return !StringRefEqualFn(lhs, rhs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ComparisonFn = bool (*)(T, T);
|
||||
|
||||
@ -78,22 +65,6 @@ inline void ComparisonImpl(
|
||||
}
|
||||
}
|
||||
|
||||
inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
|
||||
const RuntimeShape& input1_shape,
|
||||
const TfLiteTensor* input1,
|
||||
const RuntimeShape& input2_shape,
|
||||
const TfLiteTensor* input2,
|
||||
const RuntimeShape& output_shape,
|
||||
bool* output_data) {
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const auto lhs = GetString(input1, i);
|
||||
const auto rhs = GetString(input2, i);
|
||||
output_data[i] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void Comparison(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
@ -105,30 +76,30 @@ inline void Comparison(const ComparisonParams& op_params,
|
||||
input2_data, output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
template <typename T, ComparisonFn<int32_t> F>
|
||||
inline void ComparisonWithScaling(
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int32_t input1_offset = op_params.input1_offset;
|
||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int32_t input2_offset = op_params.input2_offset;
|
||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const int32 input1_val = input1_offset + input1_data[i];
|
||||
const int32 input2_val = input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = input1_offset + input1_data[i];
|
||||
const int32_t input2_val = input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[i] = F(scaled_input1_val, scaled_input2_val);
|
||||
@ -180,31 +151,6 @@ inline void BroadcastComparison4DSlowImpl(
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastComparison4DSlowStringImpl(
|
||||
bool (*F)(const StringRef&, const StringRef&),
|
||||
const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
|
||||
const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
|
||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
||||
const BroadcastComparison4DSlowCommon dims =
|
||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
||||
unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const auto lhs =
|
||||
GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
|
||||
const auto rhs =
|
||||
GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
@ -218,7 +164,7 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
||||
output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
template <typename T, ComparisonFn<int32_t> F>
|
||||
inline void BroadcastComparison4DSlowWithScaling(
|
||||
const ComparisonParams& op_params,
|
||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
||||
@ -230,29 +176,29 @@ inline void BroadcastComparison4DSlowWithScaling(
|
||||
unextended_output_shape);
|
||||
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int32_t input1_offset = op_params.input1_offset;
|
||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int32_t input2_offset = op_params.input2_offset;
|
||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
input1_offset +
|
||||
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
input2_offset +
|
||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
||||
|
||||
@ -74,14 +74,14 @@ inline void Concatenation(const ConcatenationParams& params,
|
||||
// when optimizng this routine further.
|
||||
inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
||||
const RuntimeShape* const* input_shapes,
|
||||
const uint8* const* input_data,
|
||||
const uint8_t* const* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
int axis = params.axis;
|
||||
const int32* input_zeropoint = params.input_zeropoint;
|
||||
const int32_t* input_zeropoint = params.input_zeropoint;
|
||||
const float* input_scale = params.input_scale;
|
||||
int inputs_count = params.inputs_count;
|
||||
const int32 output_zeropoint = params.output_zeropoint;
|
||||
const int32_t output_zeropoint = params.output_zeropoint;
|
||||
const float output_scale = params.output_scale;
|
||||
|
||||
const int concat_dimensions = output_shape.DimensionsCount();
|
||||
@ -110,11 +110,11 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
||||
}
|
||||
|
||||
const float inverse_output_scale = 1.f / output_scale;
|
||||
uint8* output_ptr = output_data;
|
||||
uint8_t* output_ptr = output_data;
|
||||
for (int k = 0; k < outer_size; k++) {
|
||||
for (int i = 0; i < inputs_count; ++i) {
|
||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
||||
const uint8* input_ptr = input_data[i] + k * copy_size;
|
||||
const uint8_t* input_ptr = input_data[i] + k * copy_size;
|
||||
if (input_zeropoint[i] == output_zeropoint &&
|
||||
input_scale[i] == output_scale) {
|
||||
memcpy(output_ptr, input_ptr, copy_size);
|
||||
|
||||
@ -99,11 +99,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data, const RuntimeShape& im2col_shape,
|
||||
uint8* im2col_data, void* cpu_backend_context) {
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data, const RuntimeShape& im2col_shape,
|
||||
uint8_t* im2col_data, void* cpu_backend_context) {
|
||||
(void)cpu_backend_context; // only used in optimized code.
|
||||
(void)im2col_data; // only used in optimized code.
|
||||
(void)im2col_shape; // only used in optimized code.
|
||||
@ -113,13 +113,13 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -143,7 +143,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
@ -154,9 +154,9 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc +=
|
||||
@ -174,7 +174,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -220,7 +220,7 @@ inline void HybridConvPerChannel(
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
@ -231,9 +231,9 @@ inline void HybridConvPerChannel(
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
|
||||
@ -62,21 +62,21 @@ namespace reference_ops {
|
||||
namespace depthwise_conv {
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
const int left_shift = shift > 0 ? shift : 0;
|
||||
const int right_shift = shift > 0 ? 0 : -shift;
|
||||
@ -89,13 +89,12 @@ inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
struct DepthwiseConvBasicKernel {
|
||||
static inline void Run(const DepthwiseParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data,
|
||||
const RuntimeShape& bias_shape, const int32* bias_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
static inline void Run(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
@ -103,12 +102,12 @@ struct DepthwiseConvBasicKernel {
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@ -135,7 +134,7 @@ struct DepthwiseConvBasicKernel {
|
||||
const int oc = m + ic * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
@ -146,9 +145,9 @@ struct DepthwiseConvBasicKernel {
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val =
|
||||
int32_t input_val =
|
||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, oc)];
|
||||
acc += (filter_val + filter_offset) *
|
||||
(input_val + input_offset);
|
||||
@ -164,7 +163,7 @@ struct DepthwiseConvBasicKernel {
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -176,10 +175,10 @@ struct DepthwiseConvBasicKernel {
|
||||
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
|
||||
static inline void RunPerChannel(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
@ -189,12 +188,12 @@ struct DepthwiseConvBasicKernel {
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32* output_multiplier = params.output_multiplier_per_channel;
|
||||
const int32* output_shift = params.output_shift_per_channel;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
const int32_t* output_multiplier = params.output_multiplier_per_channel;
|
||||
const int32_t* output_shift = params.output_shift_per_channel;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -222,7 +221,7 @@ struct DepthwiseConvBasicKernel {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
@ -234,17 +233,18 @@ struct DepthwiseConvBasicKernel {
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we
|
||||
// force real value of 0.0 be represented by a quantized
|
||||
// value. This guarantees that the input_offset is a int8,
|
||||
// even though it is represented using int32. int32 += int8
|
||||
// * (int8 - int8) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// value. This guarantees that the input_offset is a int8_t,
|
||||
// even though it is represented using int32_t. int32_t +=
|
||||
// int8_t
|
||||
// * (int8_t - int8_t) so the highest value we can get from
|
||||
// each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
@ -279,10 +279,10 @@ struct DepthwiseConvBasicKernel {
|
||||
|
||||
inline void DepthwiseConv(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
return depthwise_conv::DepthwiseConvBasicKernel<
|
||||
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
|
||||
input_data, filter_shape,
|
||||
|
||||
@ -32,12 +32,12 @@ inline void Dequantize(const tflite::DequantizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
int32 zero_point = op_params.zero_point;
|
||||
int32_t zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const int32 val = input_data[i];
|
||||
const int32_t val = input_data[i];
|
||||
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
|
||||
output_data[i] = result;
|
||||
}
|
||||
@ -52,11 +52,11 @@ inline void PerChannelDequantize(
|
||||
// Ensure flat size is same.
|
||||
MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
const int32* zero_point = op_params.zero_point;
|
||||
const int32_t* zero_point = op_params.zero_point;
|
||||
const float* scale = op_params.scale;
|
||||
const int32 quantized_dimension = op_params.quantized_dimension;
|
||||
const int32 num_dims = input_shape.DimensionsCount();
|
||||
const int32* dims_data = input_shape.DimsData();
|
||||
const int32_t quantized_dimension = op_params.quantized_dimension;
|
||||
const int32_t num_dims = input_shape.DimensionsCount();
|
||||
const int32_t* dims_data = input_shape.DimsData();
|
||||
std::vector<int> current_dim(num_dims, 0);
|
||||
|
||||
do {
|
||||
@ -64,7 +64,7 @@ inline void PerChannelDequantize(
|
||||
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
|
||||
current_dim.data(), 0, nullptr);
|
||||
const int channel = current_dim[quantized_dimension];
|
||||
const int32 val = input_data[offset];
|
||||
const int32_t val = input_data[offset];
|
||||
const float result =
|
||||
static_cast<float>(scale[channel] * (val - zero_point[channel]));
|
||||
output_data[offset] = result;
|
||||
|
||||
@ -61,17 +61,17 @@ inline void FullyConnected(
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
||||
|
||||
@ -89,10 +89,10 @@ inline void FullyConnected(
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
@ -102,24 +102,24 @@ inline void FullyConnected(
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
|
||||
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(output_offset, 0);
|
||||
@ -138,20 +138,21 @@ inline void FullyConnected(
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum = bias_data[out_c];
|
||||
int32_t accum = bias_data[out_c];
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int16 input_val = input_data[b * accum_depth + d] + input_offset;
|
||||
int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
|
||||
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
|
||||
int16_t filter_val =
|
||||
filter_data[out_c * accum_depth + d] + filter_offset;
|
||||
accum += filter_val * input_val;
|
||||
}
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
accum =
|
||||
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
accum = std::max(accum, output_activation_min - output_offset);
|
||||
accum = std::min(accum, output_activation_max - output_offset);
|
||||
accum += output_offset;
|
||||
@ -162,14 +163,14 @@ inline void FullyConnected(
|
||||
|
||||
inline void ShuffledFullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& weights_shape,
|
||||
const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data, uint8* shuffled_input_workspace_data) {
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& weights_shape,
|
||||
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
|
||||
@ -190,7 +191,7 @@ inline void ShuffledFullyConnected(
|
||||
TFLITE_DCHECK((output_depth % 4) == 0);
|
||||
|
||||
// Shuffling and xoring of input activations into the workspace buffer
|
||||
uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
||||
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
||||
if (batches == 1) {
|
||||
for (int i = 0; i < accum_depth; i++) {
|
||||
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
|
||||
@ -198,13 +199,13 @@ inline void ShuffledFullyConnected(
|
||||
} else if (batches == 4) {
|
||||
for (int c = 0; c < accum_depth; c += 16) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
const uint8* src_data_ptr = input_data + b * accum_depth + c;
|
||||
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
|
||||
for (int j = 0; j < 16; j++) {
|
||||
uint8 src_val = *src_data_ptr++;
|
||||
uint8_t src_val = *src_data_ptr++;
|
||||
// Flip the sign bit, so that the kernel will only need to
|
||||
// reinterpret these uint8 values as int8, getting for free the
|
||||
// reinterpret these uint8_t values as int8_t, getting for free the
|
||||
// subtraction of the zero_point value 128.
|
||||
uint8 dst_val = src_val ^ 0x80;
|
||||
uint8_t dst_val = src_val ^ 0x80;
|
||||
*shuffled_input_workspace_ptr++ = dst_val;
|
||||
}
|
||||
}
|
||||
@ -216,62 +217,62 @@ inline void ShuffledFullyConnected(
|
||||
|
||||
// Actual computation
|
||||
if (batches == 1) {
|
||||
int16* output_ptr = output_data;
|
||||
int16_t* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// so that just reinterpreting them as int8_t values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
const int8_t* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
const int8_t* shuffled_input_data =
|
||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4] = {0};
|
||||
int32_t accum[4] = {0};
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; d += 16) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_data[d + j];
|
||||
int8 weights_val = *shuffled_weights_ptr++;
|
||||
int8_t input_val = shuffled_input_data[d + j];
|
||||
int8_t weights_val = *shuffled_weights_ptr++;
|
||||
accum[i] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
int32_t acc = accum[i] + bias_data[c + i];
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc =
|
||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[c + i] = acc;
|
||||
}
|
||||
}
|
||||
} else if (batches == 4) {
|
||||
int16* output_ptr = output_data;
|
||||
int16_t* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// so that just reinterpreting them as int8_t values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
const int8_t* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
const int8_t* shuffled_input_data =
|
||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
const int8* shuffled_input_ptr = shuffled_input_data;
|
||||
const int8_t* shuffled_input_ptr = shuffled_input_data;
|
||||
// Accumulation loop.
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4][4];
|
||||
int32_t accum[4][4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
accum[i][b] = 0;
|
||||
@ -281,8 +282,8 @@ inline void ShuffledFullyConnected(
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_ptr[16 * b + j];
|
||||
int8 weights_val = shuffled_weights_ptr[16 * i + j];
|
||||
int8_t input_val = shuffled_input_ptr[16 * b + j];
|
||||
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
|
||||
accum[i][b] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
@ -293,14 +294,14 @@ inline void ShuffledFullyConnected(
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i][b] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
int32_t acc = accum[i][b] + bias_data[c + i];
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The
|
||||
// quantized multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
||||
output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[b * output_depth + c + i] = acc;
|
||||
|
||||
@ -23,34 +23,41 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void CheckArithmeticParams(const ArithmeticParams& params) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
// Input offset is negative input zero point. Activation tensors are
|
||||
// asymmetric quantized so they span the full int8 range.
|
||||
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
|
||||
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
|
||||
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
|
||||
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const int8_t* input1_data, const int8_t* input2_data,
|
||||
int8_t* output_data) {
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
CheckArithmeticParams(params);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
@ -61,16 +68,11 @@ inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
CheckArithmeticParams(params);
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
|
||||
@ -22,27 +22,27 @@ namespace reference_integer_ops {
|
||||
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const ConvParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
const int32 input_offset = params.input_offset; // r = s(q - Z)
|
||||
const int32_t input_offset = params.input_offset; // r = s(q - Z)
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
// Consistency check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@ -67,7 +67,7 @@ inline void ConvPerChannel(
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
@ -79,18 +79,18 @@ inline void ConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// guarantees that the input_offset is a int8_t, even though
|
||||
// it is represented using int32_t. int32_t += int8_t *
|
||||
// (int8_t - int8_t) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
@ -125,12 +125,12 @@ inline void ConvPerChannel(
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
// 16-bit data and 8-bit filter
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const ConvParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
int16_t* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
@ -140,10 +140,10 @@ inline void ConvPerChannel(
|
||||
const int pad_height = params.padding_values.height;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
// Consistency check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@ -180,13 +180,13 @@ inline void ConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// int64 += int8 * int16 so the highest value we can
|
||||
// int64_t += int8_t * int16_t so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-32768,
|
||||
// 32767] -
|
||||
// [-32768, 32767]), which is [-8322945, 8322945].
|
||||
|
||||
@ -20,12 +20,12 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
@ -35,10 +35,10 @@ inline void DepthwiseConvPerChannel(
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -66,7 +66,7 @@ inline void DepthwiseConvPerChannel(
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
@ -77,17 +77,17 @@ inline void DepthwiseConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// guarantees that the input_offset is a int8_t, even though
|
||||
// it is represented using int32_t. int32_t += int8_t *
|
||||
// (int8_t - int8_t) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
@ -120,12 +120,12 @@ inline void DepthwiseConvPerChannel(
|
||||
}
|
||||
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
int16_t* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
@ -134,8 +134,8 @@ inline void DepthwiseConvPerChannel(
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -174,9 +174,9 @@ inline void DepthwiseConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// We assume maximum of 2^16 accumulations as with the 8-bit
|
||||
@ -190,7 +190,7 @@ inline void DepthwiseConvPerChannel(
|
||||
if (bias_data) {
|
||||
acc += bias_data[output_channel];
|
||||
}
|
||||
int32 scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[output_channel],
|
||||
output_shift[output_channel]);
|
||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
||||
@ -207,8 +207,8 @@ inline void DepthwiseConvPerChannel(
|
||||
|
||||
inline void DepthwiseConvHybridPerChannel(
|
||||
const DepthwiseParams& params, float* scaling_factors_ptr,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& filter_shape, const int8* filter_data,
|
||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
const RuntimeShape& filter_shape, const int8_t* filter_data,
|
||||
const RuntimeShape& bias_shape, const float* bias_data,
|
||||
const RuntimeShape& output_shape, float* output_data,
|
||||
const float* per_channel_scale, int32_t* input_offset) {
|
||||
@ -247,7 +247,7 @@ inline void DepthwiseConvHybridPerChannel(
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
@ -258,9 +258,9 @@ inline void DepthwiseConvHybridPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
}
|
||||
|
||||
@ -24,15 +24,15 @@ inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
@ -44,10 +44,10 @@ inline void FullyConnected(
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
@ -68,11 +68,11 @@ inline void FullyConnected(
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
@ -86,8 +86,8 @@ inline void FullyConnected(
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int64_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * input_val;
|
||||
}
|
||||
if (bias_data) {
|
||||
|
||||
@ -21,8 +21,8 @@ namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
int32_t depth, const int8* input_data,
|
||||
int8* output_data) {
|
||||
int32_t depth, const int8_t* input_data,
|
||||
int8_t* output_data) {
|
||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
||||
// The output scale must be in sync with Prepare().
|
||||
@ -30,7 +30,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
// to [-1, 127/128].
|
||||
static constexpr int32_t kOutputScale = 7;
|
||||
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
|
||||
// int32 = (int8 - int8) ^ 2.
|
||||
// int32_t = (int8_t - int8_t) ^ 2.
|
||||
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
|
||||
// safe from overflowing in at least 2^16 steps.
|
||||
int32_t acc = 0;
|
||||
@ -55,7 +55,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
std::min(static_cast<int32_t>(kMaxInt8),
|
||||
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
|
||||
output_data[depth * outer_index + inner_index] =
|
||||
static_cast<int8>(output_in_q24);
|
||||
static_cast<int8_t>(output_in_q24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -27,14 +27,14 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
@ -57,13 +57,13 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
// Mul with 16 bit inputs and int8_t outputs.
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
|
||||
int32 output_offset = params.output_offset;
|
||||
int32 output_activation_min = params.quantized_activation_min;
|
||||
int32 output_activation_max = params.quantized_activation_max;
|
||||
int32_t output_offset = params.output_offset;
|
||||
int32_t output_activation_min = params.quantized_activation_min;
|
||||
int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
@ -75,12 +75,12 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
F0 unclamped_result =
|
||||
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
|
||||
int16 rescaled_result =
|
||||
int16_t rescaled_result =
|
||||
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
|
||||
int16 clamped_result =
|
||||
std::min<int16>(output_activation_max - output_offset, rescaled_result);
|
||||
clamped_result =
|
||||
std::max<int16>(output_activation_min - output_offset, clamped_result);
|
||||
int16_t clamped_result = std::min<int16_t>(
|
||||
output_activation_max - output_offset, rescaled_result);
|
||||
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
|
||||
clamped_result);
|
||||
output_data[i] = output_offset + clamped_result;
|
||||
}
|
||||
}
|
||||
@ -104,18 +104,18 @@ inline void BroadcastMul4DSlow(
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
const int32_t clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
|
||||
@ -22,8 +22,9 @@ namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& output_shape, int8* output_data) {
|
||||
const RuntimeShape& input_shape,
|
||||
const int8_t* input_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -52,7 +53,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@ -71,7 +72,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int8>(acc);
|
||||
static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -79,8 +80,8 @@ inline void AveragePool(const PoolParams& params,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
@ -137,8 +138,9 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const int16_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -167,7 +169,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@ -186,7 +188,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int16>(acc);
|
||||
static_cast<int16_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -194,8 +196,8 @@ inline void AveragePool(const PoolParams& params,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
const int16_t* input_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
|
||||
@ -52,40 +52,39 @@ inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
|
||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const uint8_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int32 input_zero_point = op_params.input_zero_point;
|
||||
const int32_t input_zero_point = op_params.input_zero_point;
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
int32 square_l2_norm = 0;
|
||||
int32_t square_l2_norm = 0;
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
||||
square_l2_norm += diff * diff;
|
||||
}
|
||||
int32 inv_l2norm_multiplier;
|
||||
int32_t inv_l2norm_multiplier;
|
||||
int inv_l2norm_shift;
|
||||
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
|
||||
&inv_l2norm_multiplier, &inv_l2norm_shift);
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
|
||||
int32 unclamped_output_val = 128 + rescaled_diff;
|
||||
int32 output_val =
|
||||
std::min(static_cast<int32>(255),
|
||||
std::max(static_cast<int32>(0), unclamped_output_val));
|
||||
output_data[depth * i + c] = static_cast<uint8>(output_val);
|
||||
int32_t unclamped_output_val = 128 + rescaled_diff;
|
||||
int32_t output_val =
|
||||
std::min(static_cast<int32_t>(255),
|
||||
std::max(static_cast<int32_t>(0), unclamped_output_val));
|
||||
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
||||
|
||||
@ -66,8 +66,8 @@ inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void Logistic(const LogisticParams& params,
|
||||
const RuntimeShape& input_shape, const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const RuntimeShape& input_shape, const int16_t* input_data,
|
||||
const RuntimeShape& output_shape, int16_t* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
@ -84,12 +84,12 @@ inline void Logistic(const LogisticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized int8 logistic activation. Cheats by dequantizing and requantizing
|
||||
// around the floating point logistic method. This implementation is slow on
|
||||
// platforms without a floating point unit.
|
||||
// Quantized int8_t logistic activation. Cheats by dequantizing and
|
||||
// requantizing around the floating point logistic method. This implementation
|
||||
// is slow on platforms without a floating point unit.
|
||||
|
||||
// TODO(b/141211002): Delete this int8 implementation once we can reuse the
|
||||
// approach used in TFLite for int8 Logistic.
|
||||
// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
|
||||
// approach used in TFLite for int8_t Logistic.
|
||||
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
float input_scale, int input_zero_point,
|
||||
const RuntimeShape& output_shape, int8_t* output_data,
|
||||
|
||||
@ -24,20 +24,20 @@ namespace reference_ops {
|
||||
// Element-wise mul that can often be used for inner loop of broadcast Mul as
|
||||
// well as the non-broadcast Mul.
|
||||
inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input1_data,
|
||||
const uint8_t* input2_data, uint8_t* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,9 +60,9 @@ inline void Mul(const ArithmeticParams& params,
|
||||
}
|
||||
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@ -73,11 +73,11 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@ -89,22 +89,22 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
const int32_t clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -32,8 +32,8 @@ constexpr int PadKernelMaxDimensionCount() { return 4; }
|
||||
// equivalent to a simple input1_data. For Pad, it should point to a zero
|
||||
// value.
|
||||
//
|
||||
// Note that two typenames are required, so that T=P=int32 is considered a
|
||||
// specialization distinct from P=int32.
|
||||
// Note that two typenames are required, so that T=P=int32_t is considered a
|
||||
// specialization distinct from P=int32_t.
|
||||
template <typename T, typename P>
|
||||
inline void PadImpl(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
@ -116,11 +116,11 @@ inline void Pad(const tflite::PadParams& op_params,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// The second (pad-value) input can be int32 when, say, the first is uint8.
|
||||
// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
|
||||
template <typename T>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
|
||||
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
|
||||
@ -130,40 +130,18 @@ inline void Pad(const tflite::PadParams& op_params,
|
||||
// This version avoids conflicting template matching.
|
||||
template <>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const int32* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
const RuntimeShape& input_shape, const int32_t* input_data,
|
||||
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
int32_t* output_data) {
|
||||
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// One could make all PadImageStyle calls simply delegate the work to the
|
||||
// ordinary Pad. However, it is better that the reference code asserts false in
|
||||
// similar cases.
|
||||
template <typename T, typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
TFLITE_ASSERT_FALSE;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
@ -78,8 +78,9 @@ inline void AveragePool(const PoolParams& params,
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const uint8_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@ -108,7 +109,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@ -125,7 +126,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -237,8 +238,8 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
|
||||
@ -269,7 +270,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
uint8 max = 0;
|
||||
uint8_t max = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
@ -281,10 +282,10 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
max = std::max<uint8>(max, params.quantized_activation_min);
|
||||
max = std::min<uint8>(max, params.quantized_activation_max);
|
||||
max = std::max<uint8_t>(max, params.quantized_activation_min);
|
||||
max = std::min<uint8_t>(max, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(max);
|
||||
static_cast<uint8_t>(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -23,7 +23,7 @@ namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Broadcast prelu to output_shape for quantized uint8/int8 data.
|
||||
// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
|
||||
template <typename T>
|
||||
inline void BroadcastPrelu4DSlow(
|
||||
const PreluParams& params, const RuntimeShape& input_shape,
|
||||
@ -44,15 +44,15 @@ inline void BroadcastPrelu4DSlow(
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
int output_index = Offset(extended_output_shape, b, y, x, c);
|
||||
int input_index = SubscriptToIndex(desc1, b, y, x, c);
|
||||
const int32 input_value =
|
||||
const int32_t input_value =
|
||||
params.input_offset + input_data[input_index];
|
||||
int32 output_value;
|
||||
int32_t output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value, params.output_multiplier_1, params.output_shift_1);
|
||||
} else {
|
||||
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
|
||||
const int32 alpha_value =
|
||||
const int32_t alpha_value =
|
||||
params.alpha_offset + alpha_data[alpha_index];
|
||||
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
@ -61,9 +61,9 @@ inline void BroadcastPrelu4DSlow(
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
const int32 quantized_min = std::numeric_limits<T>::min();
|
||||
const int32 quantized_max = std::numeric_limits<T>::max();
|
||||
const int32 clamped_output =
|
||||
const int32_t quantized_min = std::numeric_limits<T>::min();
|
||||
const int32_t quantized_max = std::numeric_limits<T>::max();
|
||||
const int32_t clamped_output =
|
||||
std::min(quantized_max, std::max(quantized_min, output_value));
|
||||
output_data[output_index] = static_cast<T>(clamped_output);
|
||||
}
|
||||
@ -72,6 +72,37 @@ inline void BroadcastPrelu4DSlow(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
|
||||
const T* input_data, const RuntimeShape& alpha_shape,
|
||||
const T* alpha_data, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const int32_t quantized_min = std::numeric_limits<T>::min();
|
||||
const int32_t quantized_max = std::numeric_limits<T>::max();
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input_shape, alpha_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32_t input_value = params.input_offset + input_data[i];
|
||||
int32_t output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value, params.output_multiplier_1, params.output_shift_1);
|
||||
} else {
|
||||
const int32_t alpha_value = params.alpha_offset + alpha_data[i];
|
||||
|
||||
output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
|
||||
params.output_multiplier_2,
|
||||
params.output_shift_2);
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
const int32_t clamped_output =
|
||||
std::min(quantized_max, std::max(quantized_min, output_value));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@ -76,6 +76,10 @@ inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
|
||||
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
|
||||
params->broadcast_category !=
|
||||
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
|
||||
// This is unreachable because at least one else clause in the above loop
|
||||
// must be reached.
|
||||
TFLITE_DCHECK(false);
|
||||
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -15,7 +15,11 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@ -29,18 +33,18 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
OutputT* output_data) {
|
||||
const int32 zero_point = op_params.zero_point;
|
||||
const int32_t zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
static constexpr int32 min_val = std::numeric_limits<OutputT>::min();
|
||||
static constexpr int32 max_val = std::numeric_limits<OutputT>::max();
|
||||
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
|
||||
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const InputT val = input_data[i];
|
||||
int32 unclamped =
|
||||
static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
int32_t unclamped =
|
||||
static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
zero_point;
|
||||
int32 clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@ -249,9 +251,9 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
|
||||
inline void Mean(const tflite::MeanParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape,
|
||||
const uint8_t* input_data, int32 input_zero_point,
|
||||
const uint8_t* input_data, int32_t input_zero_point,
|
||||
float input_scale, const RuntimeShape& unextended_output_shape,
|
||||
uint8_t* output_data, int32 output_zero_point,
|
||||
uint8_t* output_data, int32_t output_zero_point,
|
||||
float output_scale) {
|
||||
ruy::profiler::ScopeLabel label("Mean4D/Uint8");
|
||||
|
||||
@ -280,9 +282,9 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
|
||||
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
|
||||
|
||||
int32 bias =
|
||||
int32_t bias =
|
||||
output_zero_point -
|
||||
static_cast<int32>(input_zero_point * input_scale / output_scale);
|
||||
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
|
||||
double real_scale =
|
||||
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
|
||||
|
||||
@ -291,7 +293,7 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
QuantizeMultiplier(real_scale, &multiplier, &shift);
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
|
||||
@ -310,18 +312,21 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
// It does so in two stages, first calculates the sum of elements along the axis
|
||||
// then divides it by the number of element in axis for quantized values.
|
||||
template <typename T, typename U>
|
||||
inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
|
||||
float input_scale, const int* input_dims,
|
||||
const int input_num_dims, T* output_data,
|
||||
int32 output_zero_point, float output_scale,
|
||||
int32_t output_zero_point, float output_scale,
|
||||
const int* output_dims,
|
||||
const int output_num_dims, const int* axis,
|
||||
const int num_axis_dimensions, bool keep_dims,
|
||||
int* temp_index, int* resolved_axis, U* temp_sum,
|
||||
bool compute_sum) {
|
||||
const bool uint8_case = std::is_same<T, int8_t>::value;
|
||||
const bool uint8_case = std::is_same<T, uint8_t>::value;
|
||||
const bool int16_case = std::is_same<T, int16_t>::value;
|
||||
if (uint8_case) {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
|
||||
} else if (int16_case) {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
|
||||
} else {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
|
||||
}
|
||||
@ -381,11 +386,11 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
float float_mean = static_cast<float>(temp_sum[idx]) /
|
||||
static_cast<float>(num_elements_in_axis);
|
||||
float result =
|
||||
std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point,
|
||||
static_cast<float>(std::numeric_limits<T>::max()));
|
||||
result =
|
||||
std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
|
||||
float result = TfLiteMin(
|
||||
TfLiteRound(float_mean * scale + bias) + output_zero_point,
|
||||
static_cast<float>(std::numeric_limits<T>::max()));
|
||||
result = TfLiteMax(result,
|
||||
static_cast<float>(std::numeric_limits<T>::min()));
|
||||
output_data[idx] = static_cast<T>(result);
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,28 +17,30 @@ limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline int32 GetNearestNeighbor(const int input_value, const int32 input_size,
|
||||
const int32 output_size,
|
||||
const bool align_corners,
|
||||
const bool half_pixel_centers) {
|
||||
inline int32_t GetNearestNeighbor(const int input_value,
|
||||
const int32_t input_size,
|
||||
const int32_t output_size,
|
||||
const bool align_corners,
|
||||
const bool half_pixel_centers) {
|
||||
const float scale =
|
||||
(align_corners && output_size > 1)
|
||||
? (input_size - 1) / static_cast<float>(output_size - 1)
|
||||
: input_size / static_cast<float>(output_size);
|
||||
const float offset = half_pixel_centers ? 0.5f : 0.0f;
|
||||
int32 output_value = std::min(
|
||||
int32_t output_value = std::min(
|
||||
align_corners
|
||||
? static_cast<int32>(std::round((input_value + offset) * scale))
|
||||
: static_cast<int32>(std::floor((input_value + offset) * scale)),
|
||||
? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
|
||||
: static_cast<int32_t>(std::floor((input_value + offset) * scale)),
|
||||
input_size - 1);
|
||||
if (half_pixel_centers) {
|
||||
output_value = std::max(static_cast<int32>(0), output_value);
|
||||
output_value = std::max(static_cast<int32_t>(0), output_value);
|
||||
}
|
||||
return output_value;
|
||||
}
|
||||
@ -47,7 +49,7 @@ template <typename T>
|
||||
inline void ResizeNearestNeighbor(
|
||||
const tflite::ResizeNearestNeighborParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape, const T* input_data,
|
||||
const RuntimeShape& output_size_shape, const int32* output_size_data,
|
||||
const RuntimeShape& output_size_shape, const int32_t* output_size_data,
|
||||
const RuntimeShape& unextended_output_shape, T* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
@ -57,16 +59,16 @@ inline void ResizeNearestNeighbor(
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
int32 input_height = input_shape.Dims(1);
|
||||
int32 input_width = input_shape.Dims(2);
|
||||
int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
int32_t input_height = input_shape.Dims(1);
|
||||
int32_t input_width = input_shape.Dims(2);
|
||||
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
|
||||
// The Tensorflow version of this op allows resize on the width and height
|
||||
// axis only.
|
||||
TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
|
||||
int32 output_height = output_size_data[0];
|
||||
int32 output_width = output_size_data[1];
|
||||
int32_t output_height = output_size_data[0];
|
||||
int32_t output_width = output_size_data[1];
|
||||
|
||||
const int col_offset = input_shape.Dims(3);
|
||||
const int row_offset = input_shape.Dims(2) * col_offset;
|
||||
@ -76,14 +78,14 @@ inline void ResizeNearestNeighbor(
|
||||
T* output_ptr = output_data;
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int y = 0; y < output_height; ++y) {
|
||||
int32 in_y = GetNearestNeighbor(y, input_height, output_height,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* y_input_ptr = input_ptr + in_y * row_offset;
|
||||
for (int x = 0; x < output_width; ++x) {
|
||||
int32 in_x = GetNearestNeighbor(x, input_width, output_width,
|
||||
int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* y_input_ptr = input_ptr + in_y * row_offset;
|
||||
for (int x = 0; x < output_width; ++x) {
|
||||
int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* x_input_ptr = y_input_ptr + in_x * col_offset;
|
||||
memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
|
||||
output_ptr += depth;
|
||||
|
||||
@ -62,13 +62,14 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int8/uint8 input and int8/uint8/int16 output.
|
||||
// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
|
||||
// output.
|
||||
template <typename InputT, typename OutputT>
|
||||
inline void Softmax(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape, const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
const int32 input_beta_multiplier = params.input_multiplier;
|
||||
const int32 input_beta_left_shift = params.input_left_shift;
|
||||
const int32_t input_beta_multiplier = params.input_multiplier;
|
||||
const int32_t input_beta_left_shift = params.input_left_shift;
|
||||
const int diff_min = params.diff_min;
|
||||
// The representation chosen for the input to the exp() function is Q5.26.
|
||||
// We need to leave extra space since values that we skip might be as large as
|
||||
@ -78,9 +79,10 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
static const int kAccumulationIntegerBits = 12;
|
||||
using FixedPointScaledDiff =
|
||||
gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
|
||||
using FixedPointAccum =
|
||||
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
|
||||
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
@ -96,10 +98,10 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
|
||||
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
int32_t input_diff =
|
||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
const int32_t input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
@ -114,28 +116,28 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
|
||||
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
int32_t input_diff =
|
||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
const int32_t input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
|
||||
|
||||
FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
|
||||
int32 unsat_output = gemmlowp::RoundingDivideByPOT(
|
||||
int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
|
||||
(shifted_scale * exp_in_0).raw(),
|
||||
num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
|
||||
|
||||
const int32 shifted_output =
|
||||
const int32_t shifted_output =
|
||||
unsat_output +
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min());
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::min());
|
||||
|
||||
output_data[i * depth + c] = static_cast<OutputT>(std::max(
|
||||
std::min(shifted_output,
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::max())),
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min())));
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
|
||||
} else {
|
||||
output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
|
||||
}
|
||||
@ -143,7 +145,7 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int16 input and int16 output.
|
||||
// Quantized softmax with int16_t input and int16_t output.
|
||||
inline void SoftmaxInt16(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16_t* input_data,
|
||||
|
||||
@ -16,8 +16,10 @@ limitations under the License.
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
@ -15,9 +15,15 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -41,11 +47,11 @@ inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
int32_t* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
@ -106,12 +112,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8");
|
||||
uint8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
@ -134,28 +140,28 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<uint8_t>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
@ -163,12 +169,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32");
|
||||
int32_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
@ -208,7 +214,7 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8");
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
@ -254,6 +260,45 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int64_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int64_t* input2_data,
|
||||
const RuntimeShape& output_shape, int64_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] -
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
params.int64_activation_min, params.int64_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <typename T, int N = 5>
|
||||
void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
@ -294,33 +339,33 @@ void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
// Element-wise Sub that can often be used for inner loop of broadcast sub as
|
||||
// well as the non-broadcast sub.
|
||||
inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input1_data,
|
||||
const uint8_t* input2_data, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@ -336,22 +381,22 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
@ -359,9 +404,9 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
}
|
||||
|
||||
inline void Sub(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@ -428,40 +473,43 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
int32_t* activation_min,
|
||||
int32_t* activation_max) {
|
||||
*activation_min = params.quantized_activation_min;
|
||||
*activation_max = params.quantized_activation_max;
|
||||
}
|
||||
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
float* activation_min, float* activation_max) {
|
||||
*activation_min = params.float_activation_min;
|
||||
*activation_max = params.float_activation_max;
|
||||
}
|
||||
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
int64_t* activation_min,
|
||||
int64_t* activation_max) {
|
||||
*activation_min = params.int64_activation_min;
|
||||
*activation_max = params.int64_activation_max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void SubWithActivation(
|
||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("SubWithActivation");
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
T activation_min, activation_max;
|
||||
SetActivationMinMax(params, &activation_min, &activation_max);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
input1_data[i] - input2_data[i], activation_min, activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@ -24,24 +24,29 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
|
||||
enum class PaddingType : uint8 { kNone, kSame, kValid };
|
||||
enum class FusedActivationFunctionType : uint8_t {
|
||||
kNone,
|
||||
kRelu6,
|
||||
kRelu1,
|
||||
kRelu
|
||||
};
|
||||
enum class PaddingType : uint8_t { kNone, kSame, kValid };
|
||||
|
||||
struct PaddingValues {
|
||||
int16 width;
|
||||
int16 height;
|
||||
int16_t width;
|
||||
int16_t height;
|
||||
// offset is used for calculating "remaining" padding, for example, `width`
|
||||
// is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
|
||||
// 1 + 1 = 2.
|
||||
int16 width_offset;
|
||||
int16_t width_offset;
|
||||
// Same as width_offset except it's over the height dimension.
|
||||
int16 height_offset;
|
||||
int16_t height_offset;
|
||||
};
|
||||
|
||||
// This enumeration allows for non-default formats for the weights array
|
||||
// of a fully-connected operator, allowing the use of special optimized
|
||||
// runtime paths.
|
||||
enum class FullyConnectedWeightsFormat : uint8 {
|
||||
enum class FullyConnectedWeightsFormat : uint8_t {
|
||||
// Default format (flat 2D layout, the inner contiguous dimension
|
||||
// is input_depth, the outer non-contiguous dimension is output_depth)
|
||||
kDefault,
|
||||
@ -88,11 +93,11 @@ enum class FullyConnectedWeightsFormat : uint8 {
|
||||
// maximize arithmetic throughput.
|
||||
//
|
||||
// Finally, the 'Int8' part in the name refers to the fact that this
|
||||
// weights format has each weights value encoded as a signed int8 value,
|
||||
// even if the data type of the weights buffer is uint8. This is intended
|
||||
// weights format has each weights value encoded as a signed int8_t value,
|
||||
// even if the data type of the weights buffer is uint8_t. This is intended
|
||||
// to save runtime kernels the effort to have to XOR the top bit of these
|
||||
// bytes before using them in signed arithmetic, see this file for more
|
||||
// explanations on the 'signed int8 trick' in matrix multiplication kernels:
|
||||
// explanations on the 'signed int8_t trick' in matrix multiplication kernels:
|
||||
//
|
||||
// tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
|
||||
//
|
||||
@ -111,7 +116,7 @@ enum class FullyConnectedWeightsFormat : uint8 {
|
||||
// the real 0 value, and scale designates the difference between the real values
|
||||
// corresponding to consecutive quantized values differing by 1.
|
||||
struct QuantizationParams {
|
||||
int32 zero_point = 0;
|
||||
int32_t zero_point = 0;
|
||||
double scale = 0.0;
|
||||
};
|
||||
|
||||
@ -140,20 +145,20 @@ class RuntimeShape {
|
||||
if (dimensions_count > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32[dimensions_count];
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32_t[dimensions_count];
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeShape(int shape_size, int32 value) : size_(0) {
|
||||
RuntimeShape(int shape_size, int32_t value) : size_(0) {
|
||||
Resize(shape_size);
|
||||
for (int i = 0; i < shape_size; ++i) {
|
||||
SetDim(i, value);
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) {
|
||||
RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) {
|
||||
ReplaceWith(dimensions_count, dims_data);
|
||||
}
|
||||
|
||||
@ -165,33 +170,34 @@ class RuntimeShape {
|
||||
// rolls out.
|
||||
RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
dims_pointer_ = new int32[size_];
|
||||
dims_pointer_ = new int32_t[size_];
|
||||
}
|
||||
std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_);
|
||||
std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_);
|
||||
}
|
||||
|
||||
bool operator==(const RuntimeShape& comp) const {
|
||||
return this->size_ == comp.size_ &&
|
||||
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0;
|
||||
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
|
||||
0;
|
||||
}
|
||||
|
||||
~RuntimeShape() {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
delete[] dims_pointer_;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
inline int32 DimensionsCount() const { return size_; }
|
||||
inline int32 Dims(int i) const {
|
||||
inline int32_t DimensionsCount() const { return size_; }
|
||||
inline int32_t Dims(int i) const {
|
||||
TFLITE_DCHECK_GE(i, 0);
|
||||
TFLITE_DCHECK_LT(i, size_);
|
||||
return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
|
||||
}
|
||||
inline void SetDim(int i, int32 val) {
|
||||
inline void SetDim(int i, int32_t val) {
|
||||
TFLITE_DCHECK_GE(i, 0);
|
||||
TFLITE_DCHECK_LT(i, size_);
|
||||
if (size_ > kMaxSmallSize) {
|
||||
@ -201,20 +207,20 @@ class RuntimeShape {
|
||||
}
|
||||
}
|
||||
|
||||
inline int32* DimsData() {
|
||||
inline int32_t* DimsData() {
|
||||
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
|
||||
}
|
||||
inline const int32* DimsData() const {
|
||||
inline const int32_t* DimsData() const {
|
||||
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
|
||||
}
|
||||
// The caller must ensure that the shape is no bigger than 5-D.
|
||||
inline const int32* DimsDataUpTo5D() const { return dims_; }
|
||||
inline const int32_t* DimsDataUpTo5D() const { return dims_; }
|
||||
|
||||
inline void Resize(int dimensions_count) {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
delete[] dims_pointer_;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
@ -222,16 +228,16 @@ class RuntimeShape {
|
||||
if (dimensions_count > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32[dimensions_count];
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32_t[dimensions_count];
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
inline void ReplaceWith(int dimensions_count, const int32* dims_data) {
|
||||
inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
|
||||
Resize(dimensions_count);
|
||||
int32* dst_dims = DimsData();
|
||||
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32));
|
||||
int32_t* dst_dims = DimsData();
|
||||
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -239,7 +245,7 @@ class RuntimeShape {
|
||||
const int dimensions_count =
|
||||
std::distance(src_iterable.begin(), src_iterable.end());
|
||||
Resize(dimensions_count);
|
||||
int32* data = DimsData();
|
||||
int32_t* data = DimsData();
|
||||
for (auto it : src_iterable) {
|
||||
*data = it;
|
||||
++data;
|
||||
@ -288,13 +294,13 @@ class RuntimeShape {
|
||||
SetDim(i, pad_value);
|
||||
}
|
||||
std::memcpy(DimsData() + size_increase, shape.DimsData(),
|
||||
sizeof(int32) * shape.DimensionsCount());
|
||||
sizeof(int32_t) * shape.DimensionsCount());
|
||||
}
|
||||
|
||||
int32 size_;
|
||||
int32_t size_;
|
||||
union {
|
||||
int32 dims_[kMaxSmallSize];
|
||||
int32* dims_pointer_;
|
||||
int32_t dims_[kMaxSmallSize];
|
||||
int32_t* dims_pointer_;
|
||||
};
|
||||
};
|
||||
|
||||
@ -713,7 +719,7 @@ void ComputeStrides(Dims<N>* dims) {
|
||||
}
|
||||
}
|
||||
|
||||
enum class BroadcastableOpCategory : uint8 {
|
||||
enum class BroadcastableOpCategory : uint8_t {
|
||||
kNone,
|
||||
kNonBroadcast, // Matching input shapes.
|
||||
kFirstInputBroadcastsFast, // Fivefold nested loops.
|
||||
@ -729,21 +735,21 @@ static_assert(sizeof(MinMax) == 8, "");
|
||||
|
||||
struct ActivationParams {
|
||||
FusedActivationFunctionType activation_type;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
};
|
||||
|
||||
struct ReluParams : public ActivationParams {
|
||||
int32 input_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32 output_shift;
|
||||
int32_t input_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
};
|
||||
|
||||
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
|
||||
// op for pattern-specific optimization.
|
||||
enum class ResizingCategory : uint8 {
|
||||
enum class ResizingCategory : uint8_t {
|
||||
kNone,
|
||||
kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}.
|
||||
kGenericResize,
|
||||
@ -753,24 +759,29 @@ enum class ResizingCategory : uint8 {
|
||||
struct ArithmeticParams {
|
||||
// Shape dependent / common to data / op types.
|
||||
BroadcastableOpCategory broadcast_category;
|
||||
// uint8 inference params.
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
// uint8_t inference params.
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// Add / Sub, not Mul, uint8 inference params.
|
||||
// Add / Sub, not Mul, uint8_t inference params.
|
||||
int left_shift;
|
||||
int32 input1_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
int32 input2_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
|
||||
// TODO(b/158622529): Union the following activation params.
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
// int64_t activation params.
|
||||
int64_t int64_activation_min;
|
||||
int64_t int64_activation_max;
|
||||
|
||||
// Processed output dimensions.
|
||||
// Let input "a" be the one that broadcasts in the faster-changing dimension.
|
||||
@ -785,22 +796,22 @@ struct ArithmeticParams {
|
||||
};
|
||||
|
||||
struct ConcatenationParams {
|
||||
int8 axis;
|
||||
const int32* input_zeropoint;
|
||||
int8_t axis;
|
||||
const int32_t* input_zeropoint;
|
||||
const float* input_scale;
|
||||
uint16 inputs_count;
|
||||
int32 output_zeropoint;
|
||||
uint16_t inputs_count;
|
||||
int32_t output_zeropoint;
|
||||
float output_scale;
|
||||
};
|
||||
|
||||
struct ComparisonParams {
|
||||
// uint8 inference params.
|
||||
// uint8_t inference params.
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input1_multiplier;
|
||||
int32_t input1_offset;
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
int32 input2_offset;
|
||||
int32 input2_multiplier;
|
||||
int32_t input2_offset;
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
// Shape dependent / common to inference types.
|
||||
bool is_broadcast;
|
||||
@ -810,81 +821,81 @@ struct ConvParams {
|
||||
PaddingType padding_type;
|
||||
PaddingValues padding_values;
|
||||
// TODO(starka): This was just "stride", so check that width+height is OK.
|
||||
int16 stride_width;
|
||||
int16 stride_height;
|
||||
int16 dilation_width_factor;
|
||||
int16 dilation_height_factor;
|
||||
// uint8 inference params.
|
||||
int16_t stride_width;
|
||||
int16_t stride_height;
|
||||
int16_t dilation_width_factor;
|
||||
int16_t dilation_height_factor;
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
};
|
||||
|
||||
struct DepthToSpaceParams {
|
||||
int32 block_size;
|
||||
int32_t block_size;
|
||||
};
|
||||
|
||||
struct DepthwiseParams {
|
||||
PaddingType padding_type;
|
||||
PaddingValues padding_values;
|
||||
int16 stride_width;
|
||||
int16 stride_height;
|
||||
int16 dilation_width_factor;
|
||||
int16 dilation_height_factor;
|
||||
int16 depth_multiplier;
|
||||
// uint8 inference params.
|
||||
int16_t stride_width;
|
||||
int16_t stride_height;
|
||||
int16_t dilation_width_factor;
|
||||
int16_t dilation_height_factor;
|
||||
int16_t depth_multiplier;
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
const int32* output_multiplier_per_channel;
|
||||
const int32* output_shift_per_channel;
|
||||
const int32_t* output_multiplier_per_channel;
|
||||
const int32_t* output_shift_per_channel;
|
||||
};
|
||||
|
||||
struct DequantizationParams {
|
||||
double scale;
|
||||
int32 zero_point;
|
||||
int32_t zero_point;
|
||||
};
|
||||
|
||||
struct PerChannelDequantizationParams {
|
||||
const float* scale;
|
||||
const int32* zero_point;
|
||||
int32 quantized_dimension;
|
||||
const int32_t* zero_point;
|
||||
int32_t quantized_dimension;
|
||||
};
|
||||
|
||||
struct FakeQuantParams {
|
||||
MinMax minmax;
|
||||
int32 num_bits;
|
||||
int32_t num_bits;
|
||||
};
|
||||
|
||||
struct FullyConnectedParams {
|
||||
// uint8 inference params.
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
@ -895,16 +906,16 @@ struct FullyConnectedParams {
|
||||
};
|
||||
|
||||
struct GatherParams {
|
||||
int16 axis;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct L2NormalizationParams {
|
||||
// uint8 inference params.
|
||||
int32 input_zero_point;
|
||||
// uint8_t inference params.
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
struct LocalResponseNormalizationParams {
|
||||
int32 range;
|
||||
int32_t range;
|
||||
double bias;
|
||||
double alpha;
|
||||
double beta;
|
||||
@ -932,50 +943,50 @@ struct HardSwishParams {
|
||||
};
|
||||
|
||||
struct LogisticParams {
|
||||
// uint8 inference params.
|
||||
int32 input_zero_point;
|
||||
int32 input_range_radius;
|
||||
int32 input_multiplier;
|
||||
// uint8_t inference params.
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
struct LstmCellParams {
|
||||
int32 weights_zero_point;
|
||||
int32 accum_multiplier;
|
||||
int32_t weights_zero_point;
|
||||
int32_t accum_multiplier;
|
||||
int accum_shift;
|
||||
int state_integer_bits;
|
||||
};
|
||||
|
||||
struct MeanParams {
|
||||
int8 axis_count;
|
||||
int16 axis[4];
|
||||
int8_t axis_count;
|
||||
int16_t axis[4];
|
||||
};
|
||||
|
||||
struct PackParams {
|
||||
int8 axis;
|
||||
const int32* input_zeropoint;
|
||||
int8_t axis;
|
||||
const int32_t* input_zeropoint;
|
||||
const float* input_scale;
|
||||
uint16 inputs_count;
|
||||
int32 output_zeropoint;
|
||||
uint16_t inputs_count;
|
||||
int32_t output_zeropoint;
|
||||
float output_scale;
|
||||
};
|
||||
|
||||
struct PadParams {
|
||||
int8 left_padding_count;
|
||||
int32 left_padding[4];
|
||||
int8 right_padding_count;
|
||||
int32 right_padding[4];
|
||||
int8_t left_padding_count;
|
||||
int32_t left_padding[4];
|
||||
int8_t right_padding_count;
|
||||
int32_t right_padding[4];
|
||||
ResizingCategory resizing_category;
|
||||
};
|
||||
|
||||
struct PreluParams {
|
||||
int32 input_offset;
|
||||
int32 alpha_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier_1;
|
||||
int32 output_shift_1;
|
||||
int32 output_multiplier_2;
|
||||
int32 output_shift_2;
|
||||
int32_t input_offset;
|
||||
int32_t alpha_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier_1;
|
||||
int output_shift_1;
|
||||
int32_t output_multiplier_2;
|
||||
int output_shift_2;
|
||||
};
|
||||
|
||||
struct PoolParams {
|
||||
@ -986,17 +997,17 @@ struct PoolParams {
|
||||
int stride_width;
|
||||
int filter_height;
|
||||
int filter_width;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
};
|
||||
|
||||
struct ReshapeParams {
|
||||
int8 shape_count;
|
||||
int32 shape[4];
|
||||
int8_t shape_count;
|
||||
int32_t shape[4];
|
||||
};
|
||||
|
||||
struct ResizeBilinearParams {
|
||||
@ -1013,91 +1024,93 @@ struct ResizeNearestNeighborParams {
|
||||
};
|
||||
|
||||
struct SliceParams {
|
||||
int8 begin_count;
|
||||
int32 begin[4];
|
||||
int8 size_count;
|
||||
int32 size[4];
|
||||
int8_t begin_count;
|
||||
int32_t begin[4];
|
||||
int8_t size_count;
|
||||
int32_t size[4];
|
||||
};
|
||||
|
||||
struct SoftmaxParams {
|
||||
// beta is not really used (not a Tensorflow parameter) and not implemented
|
||||
// for LogSoftmax.
|
||||
double beta;
|
||||
// uint8 inference params. Used even when beta defaults to 1.0.
|
||||
int32 input_multiplier;
|
||||
int32 input_left_shift;
|
||||
// uint8_t inference params. Used even when beta defaults to 1.0.
|
||||
int32_t input_multiplier;
|
||||
int32_t input_left_shift;
|
||||
// Reverse scaling is only used by LogSoftmax.
|
||||
int32 reverse_scaling_divisor;
|
||||
int32 reverse_scaling_right_shift;
|
||||
int32_t reverse_scaling_divisor;
|
||||
int32_t reverse_scaling_right_shift;
|
||||
int diff_min;
|
||||
int32_t zero_point;
|
||||
float scale;
|
||||
float* table;
|
||||
int16_t* exp_lut;
|
||||
int16_t* one_over_one_plus_x_lut;
|
||||
uint8_t* uint8_table1;
|
||||
uint8_t* uint8_table2;
|
||||
};
|
||||
|
||||
struct SpaceToBatchParams {
|
||||
// "Zero" padding for uint8 means padding with the output offset.
|
||||
int32 output_offset;
|
||||
// "Zero" padding for uint8_t means padding with the output offset.
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
struct SpaceToDepthParams {
|
||||
int32 block_size;
|
||||
int32_t block_size;
|
||||
};
|
||||
|
||||
struct SplitParams {
|
||||
// Graphs that split into, say, 2000 nodes are encountered. The indices in
|
||||
// OperatorEdges are of type uint16.
|
||||
uint16 num_split;
|
||||
int16 axis;
|
||||
// OperatorEdges are of type uint16_t.
|
||||
uint16_t num_split;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct SqueezeParams {
|
||||
int8 squeeze_dims_count;
|
||||
int32 squeeze_dims[4];
|
||||
int8_t squeeze_dims_count;
|
||||
int32_t squeeze_dims[4];
|
||||
};
|
||||
|
||||
struct StridedSliceParams {
|
||||
int8 start_indices_count;
|
||||
int32 start_indices[5];
|
||||
int8 stop_indices_count;
|
||||
int32 stop_indices[5];
|
||||
int8 strides_count;
|
||||
int32 strides[5];
|
||||
int8_t start_indices_count;
|
||||
int32_t start_indices[5];
|
||||
int8_t stop_indices_count;
|
||||
int32_t stop_indices[5];
|
||||
int8_t strides_count;
|
||||
int32_t strides[5];
|
||||
|
||||
int16 begin_mask;
|
||||
int16 ellipsis_mask;
|
||||
int16 end_mask;
|
||||
int16 new_axis_mask;
|
||||
int16 shrink_axis_mask;
|
||||
int16_t begin_mask;
|
||||
int16_t ellipsis_mask;
|
||||
int16_t end_mask;
|
||||
int16_t new_axis_mask;
|
||||
int16_t shrink_axis_mask;
|
||||
};
|
||||
|
||||
struct TanhParams {
|
||||
int32 input_zero_point;
|
||||
int32 input_range_radius;
|
||||
int32 input_multiplier;
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
struct TransposeParams {
|
||||
int8 perm_count;
|
||||
int32 perm[5];
|
||||
int8_t perm_count;
|
||||
int32_t perm[5];
|
||||
};
|
||||
|
||||
struct UnpackParams {
|
||||
uint16 num_split;
|
||||
int16 axis;
|
||||
uint16_t num_split;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct LeakyReluParams {
|
||||
float alpha;
|
||||
int32 input_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier_alpha;
|
||||
int32 output_shift_alpha;
|
||||
int32 output_multiplier_identity;
|
||||
int32 output_shift_identity;
|
||||
int32_t input_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier_alpha;
|
||||
int32_t output_shift_alpha;
|
||||
int32_t output_multiplier_identity;
|
||||
int32_t output_shift_identity;
|
||||
};
|
||||
|
||||
template <typename P>
|
||||
@ -1107,13 +1120,19 @@ inline void SetActivationParams(float min, float max, P* params) {
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void SetActivationParams(int32 min, int32 max, P* params) {
|
||||
inline void SetActivationParams(int32_t min, int32_t max, P* params) {
|
||||
params->quantized_activation_min = min;
|
||||
params->quantized_activation_max = max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int32* min, int32* max) {
|
||||
inline void SetActivationParams(int64_t min, int64_t max, P* params) {
|
||||
params->int64_activation_min = min;
|
||||
params->int64_activation_max = max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) {
|
||||
*min = params.quantized_activation_min;
|
||||
*max = params.quantized_activation_max;
|
||||
}
|
||||
@ -1124,6 +1143,11 @@ inline void GetActivationParams(const P& params, float* min, float* max) {
|
||||
*max = params.float_activation_max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
|
||||
*min = params.int64_activation_min;
|
||||
*max = params.int64_activation_max;
|
||||
}
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_
|
||||
|
||||
@ -14,15 +14,63 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
inline TfLiteTensor* GetMutableInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
if (context->tensors != nullptr) {
|
||||
return &context->tensors[node->inputs->data[index]];
|
||||
} else {
|
||||
return context->GetTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace.
|
||||
|
||||
const TfLiteTensor* GetInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableInput(context, node, index);
|
||||
}
|
||||
|
||||
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
TfLiteTensor* tensor = GetMutableInput(context, node, index);
|
||||
return tensor->is_variable ? tensor : nullptr;
|
||||
}
|
||||
|
||||
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
if (context->tensors != nullptr) {
|
||||
return &context->tensors[node->outputs->data[index]];
|
||||
} else {
|
||||
return context->GetTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
}
|
||||
|
||||
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
const bool use_tensor = index < node->inputs->size &&
|
||||
node->inputs->data[index] != kTfLiteOptionalTensor;
|
||||
if (use_tensor) {
|
||||
return GetMutableInput(context, node, index);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Per-axis
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
@ -126,11 +174,27 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
// pipeline.
|
||||
if (bias) {
|
||||
const double bias_scale = static_cast<double>(bias->params.scale);
|
||||
// Here we're making sure the input_product_scale & bias_scale the same.
|
||||
// Normally this should be guaranteed by the training pipeline, we are
|
||||
// setting the threshold to be 2e-6 to allow some numeric stability
|
||||
// difference.
|
||||
TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 2e-6);
|
||||
// Here we're making sure the input_product_scale & bias_scale are about the
|
||||
// same. Since we have:
|
||||
// (output - output_zp) * output_scale =
|
||||
// input_product_scale * input_product + bias * bias_scale ---- (0)
|
||||
//
|
||||
// (0) equals:
|
||||
// (input_product + bias) * input_product_scale ----- (1)
|
||||
// +
|
||||
// bias * (bias_scale - input_product_scale) ------ (2)
|
||||
//
|
||||
// For the real kernel computation, we're doing (1), so we really need to
|
||||
// make sure (2) has minimum impact on the output, so:
|
||||
// bias * (bias_scale - input_product_scale) / output_scale should be
|
||||
// a small number for an integer.
|
||||
// Since normally bias should be within a small range.
|
||||
// We should expect (bias_scale - input_product_scale) / output_scale to
|
||||
// be a small number like 0.02.
|
||||
const double scale_diff = std::abs(input_product_scale - bias_scale);
|
||||
const double output_scale = static_cast<double>(output->params.scale);
|
||||
|
||||
TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
|
||||
}
|
||||
return GetQuantizedConvolutionMultipler(context, input, filter, output,
|
||||
multiplier);
|
||||
@ -167,7 +231,7 @@ void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*act_min = std::max(qmin, quantize(0.0));
|
||||
*act_max = std::min(qmax, quantize(6.0));
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
} else if (activation == kTfLiteActReluN1To1) {
|
||||
*act_min = std::max(qmin, quantize(-1.0));
|
||||
*act_max = std::min(qmax, quantize(1.0));
|
||||
} else {
|
||||
|
||||
@ -15,52 +15,55 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// A fair number of functions in this header have historically been inline.
|
||||
// It is ok to change functions to not be inline if the latency with
|
||||
// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is
|
||||
// made, move the newly non-inlined function declarations to the top of this
|
||||
// header file.
|
||||
const TfLiteTensor* GetInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index);
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index);
|
||||
|
||||
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index);
|
||||
|
||||
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index);
|
||||
|
||||
inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
|
||||
inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
|
||||
return t->dims->data[dim];
|
||||
}
|
||||
inline const TfLiteTensor* GetInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
// Note: You must check if result is not null:
|
||||
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
inline TfLiteTensor* GetVariableInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TfLiteTensor* tensor =
|
||||
&context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
return (tensor->is_variable) ? tensor : nullptr;
|
||||
}
|
||||
inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->outputs->data[index])];
|
||||
}
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
inline TfLiteTensor* GetTemporary(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[flatbuffers::EndianScalar(
|
||||
node->temporaries->data[index])];
|
||||
return &context->tensors[node->temporaries->data[index]];
|
||||
}
|
||||
inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[node->intermediates->data[index]];
|
||||
}
|
||||
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
|
||||
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
|
||||
inline int NumIntermediates(const TfLiteNode* node) {
|
||||
return node->intermediates->size;
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
|
||||
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
|
||||
|
||||
inline int64_t NumElements(const TfLiteIntArray* dims) {
|
||||
int64_t count = 1;
|
||||
@ -74,19 +77,11 @@ inline int64_t NumElements(const TfLiteTensor* t) {
|
||||
return NumElements(t->dims);
|
||||
}
|
||||
|
||||
inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
const bool use_tensor = index < node->inputs->size &&
|
||||
node->inputs->data[index] != kTfLiteOptionalTensor;
|
||||
if (use_tensor) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Determines whether tensor is constant.
|
||||
// TODO(b/138199592): Introduce new query which checks for constant OR
|
||||
// persistent-read-only, which would be useful for most tensor kernels that
|
||||
// are potentially dynamic based on the input tensor value availability at the
|
||||
// time of prepare.
|
||||
inline bool IsConstantTensor(const TfLiteTensor* tensor) {
|
||||
return tensor->allocation_type == kTfLiteMmapRo;
|
||||
}
|
||||
@ -105,6 +100,14 @@ inline void SetTensorToDynamic(TfLiteTensor* tensor) {
|
||||
}
|
||||
}
|
||||
|
||||
// Sets tensor to persistent and read-only.
|
||||
inline void SetTensorToPersistentRo(TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLitePersistentRo) {
|
||||
tensor->allocation_type = kTfLitePersistentRo;
|
||||
tensor->data.raw = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Determines whether it is a hybrid op - one that has float inputs and
|
||||
// quantized weights.
|
||||
inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
|
||||
@ -162,7 +165,7 @@ void CalculateActivationRange(TfLiteFusedActivation activation,
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*activation_min = 0;
|
||||
*activation_max = 6;
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
} else if (activation == kTfLiteActReluN1To1) {
|
||||
*activation_min = -1;
|
||||
*activation_max = 1;
|
||||
} else {
|
||||
|
||||
@ -19,7 +19,7 @@ limitations under the License.
|
||||
// non-portable function.
|
||||
#ifdef TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#define DEBUG_LOG(x) \
|
||||
do { \
|
||||
@ -36,7 +36,6 @@ inline void InfiniteLoop() {
|
||||
|
||||
#else // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
@ -45,6 +44,15 @@ inline void InfiniteLoop() {
|
||||
fprintf(stderr, "%s", (x)); \
|
||||
} while (0)
|
||||
|
||||
// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
|
||||
#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name) \
|
||||
do { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
|
||||
__FILE__, __LINE__, TfLiteTypeGetName(type), \
|
||||
(op_name)); \
|
||||
return kTfLiteError; \
|
||||
} while (0)
|
||||
|
||||
#define TFLITE_ABORT abort()
|
||||
|
||||
#endif // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
@ -15,10 +15,10 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/examples/hello_world/main_functions.h"
|
||||
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/examples/hello_world/constants.h"
|
||||
#include "tensorflow/lite/micro/examples/hello_world/model.h"
|
||||
#include "tensorflow/lite/micro/examples/hello_world/output_handler.h"
|
||||
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@ -34,8 +34,12 @@ TfLiteTensor* output = nullptr;
|
||||
int inference_count = 0;
|
||||
|
||||
// Create an area of memory to use for input, output, and intermediate arrays.
|
||||
// Finding the minimum value for your model may require some trial and error.
|
||||
constexpr int kTensorArenaSize = 2 * 1024;
|
||||
// Minimum arena size, at the time of writing. After allocating tensors
|
||||
// you can retrieve this value by invoking interpreter.arena_used_bytes().
|
||||
const int kModelArenaSize = 2468;
|
||||
// Extra headroom for model + alignment + future interpreter changes.
|
||||
const int kExtraArenaSize = 560 + 16 + 100;
|
||||
const int kTensorArenaSize = kModelArenaSize + kExtraArenaSize;
|
||||
uint8_t tensor_arena[kTensorArenaSize];
|
||||
} // namespace
|
||||
|
||||
@ -60,7 +64,7 @@ void setup() {
|
||||
|
||||
// This pulls in all the operation implementations we need.
|
||||
// NOLINTNEXTLINE(runtime-global-variables)
|
||||
static tflite::ops::micro::AllOpsResolver resolver;
|
||||
static tflite::AllOpsResolver resolver;
|
||||
|
||||
// Build an interpreter to run the model with.
|
||||
static tflite::MicroInterpreter static_interpreter(
|
||||
|
||||
@ -24,19 +24,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/examples/hello_world/model.h"
|
||||
|
||||
// We need to keep the data array aligned on some architectures.
|
||||
#ifdef __has_attribute
|
||||
#define HAVE_ATTRIBUTE(x) __has_attribute(x)
|
||||
#else
|
||||
#define HAVE_ATTRIBUTE(x) 0
|
||||
#endif
|
||||
#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
|
||||
#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4)))
|
||||
#else
|
||||
#define DATA_ALIGN_ATTRIBUTE
|
||||
#endif
|
||||
|
||||
const unsigned char g_model[] DATA_ALIGN_ATTRIBUTE = {
|
||||
// Keep model aligned to 8 bytes to guarantee aligned 64-bit accesses.
|
||||
alignas(8) const unsigned char g_model[] = {
|
||||
0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,
|
||||
0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
|
||||
0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
|
||||
@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return std::max(0.0f, a);
|
||||
case kTfLiteActRelu1:
|
||||
return std::max(-1.0f, std::min(a, 1.0f));
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return std::max(0.0f, std::min(a, 6.0f));
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
|
||||
@ -18,30 +18,82 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
uint8_t six_uint8;
|
||||
uint8_t zero_uint8;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename Q>
|
||||
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
|
||||
const Q* input_data, const RuntimeShape& output_shape,
|
||||
Q* output_data) {
|
||||
template <typename T>
|
||||
inline void ReluQuantized(const ReluOpData& data,
|
||||
const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const T* input_data,
|
||||
T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
@ -77,33 +129,57 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
}
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
CalculateReluOpData<uint8_t>(input, output, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output));
|
||||
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@ -114,37 +190,62 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
data->six_uint8 = FloatToAsymmetricQuantizedUInt8(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_uint8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const int8_t six = FloatToAsymmetricQuantizedInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const int8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<int8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const uint8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<uint8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@ -157,28 +258,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return {/*init=*/activations::ReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return {/*init=*/activations::Relu6Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -23,6 +23,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -40,18 +42,22 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
|
||||
// Used only for float evals:
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
@ -89,24 +95,28 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output))
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<float>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<float>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
} else {
|
||||
@ -117,9 +127,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@ -135,12 +145,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
|
||||
tflite::micro::GetTensorData<dtype>(input1), \
|
||||
tflite::micro::GetTensorShape(input2), \
|
||||
tflite::micro::GetTensorData<dtype>(input2), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<dtype>(output));
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
@ -160,21 +173,45 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData data;
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, &data, input1, input2, output);
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -187,16 +224,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration* Register_ADD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* axis = GetInput(context, node, kAxis);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxis);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
|
||||
GetTensorData<axis_type>(axis), GetTensorShape(output), \
|
||||
GetTensorData<output_type>(output), is_arg_max)
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<data_type>(input), \
|
||||
tflite::micro::GetTensorData<axis_type>(axis), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_type>(output), \
|
||||
is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8 and int8 are "
|
||||
"Only float32, uint8_t and int8_t are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration* Register_ARG_MAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MAX() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_ARG_MIN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -32,8 +33,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@ -43,26 +44,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration* Register_CEIL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
|
||||
GetTensorData<int8_t>(output));
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
|
||||
@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -25,103 +26,109 @@ namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
ComparisonParams params;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(ruic): optimize macros below to using template functions.
|
||||
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
|
||||
template <typename input_dtype> \
|
||||
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
|
||||
const TfLiteTensor* input1, \
|
||||
const TfLiteTensor* input2, TfLiteTensor* output, \
|
||||
bool requires_broadcast) { \
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
|
||||
auto input1_offset = -input1->params.zero_point; \
|
||||
auto input2_offset = -input2->params.zero_point; \
|
||||
const int left_shift = 8; \
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
op_params.input1_offset = input1_offset; \
|
||||
op_params.input1_multiplier = input1_multiplier; \
|
||||
op_params.input1_shift = input1_shift; \
|
||||
op_params.input2_offset = input2_offset; \
|
||||
op_params.input2_multiplier = input2_multiplier; \
|
||||
op_params.input2_shift = input2_shift; \
|
||||
if (requires_broadcast) { \
|
||||
reference_ops::Broadcast4DSlow##opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} else { \
|
||||
reference_ops::opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
TF_LITE_QUANTIZE_COMPARISON(Equal);
|
||||
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Greater);
|
||||
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Less);
|
||||
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
|
||||
#undef TF_LITE_QUANTIZE_COMPARISON
|
||||
|
||||
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
|
||||
{ \
|
||||
ComparisonParams op_params; \
|
||||
requires_broadcast \
|
||||
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)) \
|
||||
: reference_ops::opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)); \
|
||||
}
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -291,78 +608,113 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
|
||||
auto input1_offset = -input1->params.zero_point;
|
||||
auto input2_offset = -input2->params.zero_point;
|
||||
const int kLeftShift = 8;
|
||||
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier,
|
||||
&input1_shift);
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier,
|
||||
&input2_shift);
|
||||
|
||||
data->params.left_shift = kLeftShift;
|
||||
data->params.input1_offset = input1_offset;
|
||||
data->params.input1_multiplier = input1_multiplier;
|
||||
data->params.input1_shift = input1_shift;
|
||||
data->params.input2_offset = input2_offset;
|
||||
data->params.input2_multiplier = input2_multiplier;
|
||||
data->params.input2_shift = input2_shift;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration* Register_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_NOT_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NOT_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -18,10 +18,11 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -31,6 +32,104 @@ namespace concatenation {
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
ConcatenationParams params;
|
||||
};
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllInputTensorShapes(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
RuntimeShape shape = tflite::micro::GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllInputTensorData(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
all_data[i] = tflite::micro::GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::ConcatenationWithScaling(
|
||||
data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
@ -63,125 +162,63 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor '%s' has %d dimensions.",
|
||||
input->name, num_dimensions);
|
||||
"Tensor has %d dimensions.",
|
||||
num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate OpData.
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
case kTfLiteInt32:
|
||||
case kTfLiteInt64: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
break;
|
||||
}
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
|
||||
float* input_scales =
|
||||
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(float)));
|
||||
|
||||
int32_t* input_zero_points =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(int32_t)));
|
||||
|
||||
// Allocate persistent scale and zeropoint buffers.
|
||||
// Store input scale and zero point values in OpParams:
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
input_scales[i] = t->params.scale;
|
||||
input_zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
|
||||
data->params.input_scale = input_scales;
|
||||
data->params.input_zeropoint = input_zero_points;
|
||||
data->params.output_zeropoint = output->params.zero_point;
|
||||
data->params.output_scale = output->params.scale;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllTensorShapes(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
RuntimeShape shape = GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllTensorData(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data[i] = GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets scale and zero point from a list of tensors
|
||||
inline void GetAllQuantizationParam(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
float scales[kMaxInputNum],
|
||||
int32 zero_points[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
scales[i] = t->params.scale;
|
||||
zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
|
||||
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
|
||||
GetTensorShape(output),
|
||||
GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
float inputs_scale[kMaxInputNum];
|
||||
int32 inputs_zero_point[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
|
||||
inputs_zero_point);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
op_params.input_zeropoint = inputs_zero_point;
|
||||
op_params.input_scale = inputs_scale;
|
||||
op_params.output_zeropoint = output->params.zero_point;
|
||||
op_params.output_scale = output->params.scale;
|
||||
|
||||
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
|
||||
inputs_data, GetTensorShape(output),
|
||||
GetTensorData<uint8>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
|
||||
@ -214,16 +251,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration* Register_CONCATENATION() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONCATENATION() {
|
||||
return {/*init=*/concatenation::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -42,6 +43,12 @@ constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
@ -109,12 +116,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -137,12 +139,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
reinterpret_cast<void**>(&data->per_channel_output_shift)));
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@ -163,19 +165,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
return CalculateOpData(context, node, params, input_width, input_height,
|
||||
filter_width, filter_height, output_width,
|
||||
output_height, input->type, data);
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
@ -193,24 +202,29 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
@ -222,18 +236,21 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@ -249,21 +266,31 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
@ -291,16 +318,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/conv::Prepare,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/conv::Prepare,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -24,6 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -42,6 +43,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
@ -95,12 +102,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -111,6 +113,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
|
||||
@ -120,16 +123,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
// Per channel quantization is only needed for int8 inference. For other
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
|
||||
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t),
|
||||
reinterpret_cast<void**>(&data->per_channel_output_shift)));
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@ -150,14 +153,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
return CalculateOpData(context, node, params, width, height, filter_width,
|
||||
filter_height, data_type, data);
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@ -165,8 +175,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
@ -176,74 +186,87 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData* data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -254,24 +277,29 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -283,16 +311,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace depthwise_conv
|
||||
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/depthwise_conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/depthwise_conv::Prepare,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/depthwise_conv::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/depthwise_conv::Prepare,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -29,20 +30,17 @@ namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -69,6 +67,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -76,28 +78,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@ -106,21 +111,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int flat_size =
|
||||
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), flat_size, data->output_multiplier,
|
||||
data->output_shift, input->params.zero_point,
|
||||
output->params.zero_point, GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int8_t>(input), flat_size, data->output_multiplier,
|
||||
data->output_shift, input->params.zero_point,
|
||||
output->params.zero_point, GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -141,16 +148,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration* Register_DEQUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -40,7 +42,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@ -52,13 +54,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
|
||||
const int64_t num_elements = NumElements(input);
|
||||
const T* in_data = GetTensorData<T>(input);
|
||||
T* out_data = GetTensorData<T>(output);
|
||||
for (int64_t i = 0; i < num_elements; ++i) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
|
||||
const size_t num_elements = ElementCount(*input->dims);
|
||||
const T* in_data = tflite::micro::GetTensorData<T>(input);
|
||||
T* out_data = tflite::micro::GetTensorData<T>(output);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@ -106,137 +108,103 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalLogical(context, node, [](bool v) { return !v; });
|
||||
}
|
||||
|
||||
TfLiteStatus TANHEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::tanh);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration* Register_ABS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ABS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SIN() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_COS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_COS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOG() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RSQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RSQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQUARE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQUARE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_NOT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_TANH() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::TANHEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGICAL_NOT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration* Register_FLOOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -40,6 +41,10 @@ struct OpData {
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
@ -64,6 +69,10 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
@ -72,12 +81,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -93,7 +97,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
@ -102,13 +106,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = -filter->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
@ -116,20 +122,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<int8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
@ -141,12 +152,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
|
||||
GetTensorShape(output), GetTensorData<output_data_type>(output))
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
@ -165,8 +180,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@ -174,10 +190,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -186,10 +206,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
@ -216,16 +240,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace fully_connected
|
||||
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED() {
|
||||
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -14,16 +14,19 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
namespace {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
@ -33,9 +36,15 @@ enum KernelType {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
L2NormalizationParams* data =
|
||||
static_cast<L2NormalizationParams*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
@ -48,29 +57,36 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
|
||||
}
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
}
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
data->input_zero_point = 0;
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
#endif
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(L2NormalizationParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const L2NormalizationParams& data =
|
||||
*(static_cast<const L2NormalizationParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
@ -87,39 +103,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = 0; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<float>(input), GetTensorShape(output), \
|
||||
GetTensorData<float>(output), epsilon)
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
epsilon);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = input->params.zero_point; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<uint8>(input), GetTensorShape(output), \
|
||||
GetTensorData<uint8>(output))
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(
|
||||
data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = GetTensorShape(input);
|
||||
const auto output_shape = GetTensorShape(output);
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
|
||||
depth, GetTensorData<int8>(input),
|
||||
GetTensorData<int8>(output));
|
||||
reference_integer_ops::L2Normalization(
|
||||
data.input_zero_point, outer_size, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
|
||||
output->type);
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
@ -128,22 +137,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration* Register_L2NORM_REF() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
|
||||
return &r;
|
||||
TfLiteRegistration Register_L2NORM_REF() {
|
||||
return {/*init=*/l2norm::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION() {
|
||||
return Register_L2NORM_REF();
|
||||
}
|
||||
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (HaveSameShapes(input1, input2)) {
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_OR() {
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_AND() {
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -44,7 +45,7 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
@ -54,6 +55,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
@ -64,18 +67,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
OpData data;
|
||||
CalculateArithmeticOpData(context, node, &data);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@ -88,10 +107,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
input->params.zero_point, data.input_range_radius,
|
||||
data.input_multiplier, data.input_left_shift,
|
||||
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||
GetTensorData<int8_t>(output));
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@ -113,16 +133,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_LOGISTIC() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return {/*init=*/activations::LogisticInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::LogisticPrepare,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = GetInput(context, node, kInputTensor1);
|
||||
input2 = GetInput(context, node, kInputTensor2);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteTensor* input1;
|
||||
const TfLiteTensor* input2;
|
||||
TfLiteTensor* output;
|
||||
const TfLiteEvalTensor* input1;
|
||||
const TfLiteEvalTensor* input2;
|
||||
TfLiteEvalTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
GetTensorShape(op_context.input1),
|
||||
GetTensorData<data_type>(op_context.input1),
|
||||
GetTensorShape(op_context.input2),
|
||||
GetTensorData<data_type>(op_context.input2),
|
||||
GetTensorShape(op_context.output),
|
||||
GetTensorData<data_type>(op_context.output),
|
||||
tflite::micro::GetTensorShape(op_context.input1),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input1),
|
||||
tflite::micro::GetTensorShape(op_context.input2),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input2),
|
||||
tflite::micro::GetTensorShape(op_context.output),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration* Register_MAXIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MAXIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MINIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MINIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -29,59 +29,61 @@ namespace micro {
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration* Register_ABS();
|
||||
TfLiteRegistration* Register_ADD();
|
||||
TfLiteRegistration* Register_ARG_MAX();
|
||||
TfLiteRegistration* Register_ARG_MIN();
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration* Register_CEIL();
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration* Register_CONV_2D();
|
||||
TfLiteRegistration* Register_CONCATENATION();
|
||||
TfLiteRegistration* Register_COS();
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
TfLiteRegistration* Register_EQUAL();
|
||||
TfLiteRegistration* Register_FLOOR();
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration* Register_GREATER();
|
||||
TfLiteRegistration* Register_GREATER_EQUAL();
|
||||
TfLiteRegistration* Register_LESS();
|
||||
TfLiteRegistration* Register_LESS_EQUAL();
|
||||
TfLiteRegistration* Register_LOG();
|
||||
TfLiteRegistration* Register_LOGICAL_AND();
|
||||
TfLiteRegistration* Register_LOGICAL_NOT();
|
||||
TfLiteRegistration* Register_LOGICAL_OR();
|
||||
TfLiteRegistration* Register_LOGISTIC();
|
||||
TfLiteRegistration* Register_MAXIMUM();
|
||||
TfLiteRegistration* Register_MAX_POOL_2D();
|
||||
TfLiteRegistration* Register_MEAN();
|
||||
TfLiteRegistration* Register_MINIMUM();
|
||||
TfLiteRegistration* Register_MUL();
|
||||
TfLiteRegistration* Register_NEG();
|
||||
TfLiteRegistration* Register_NOT_EQUAL();
|
||||
TfLiteRegistration* Register_PACK();
|
||||
TfLiteRegistration* Register_PAD();
|
||||
TfLiteRegistration* Register_PADV2();
|
||||
TfLiteRegistration* Register_PRELU();
|
||||
TfLiteRegistration* Register_QUANTIZE();
|
||||
TfLiteRegistration* Register_RELU();
|
||||
TfLiteRegistration* Register_RELU6();
|
||||
TfLiteRegistration* Register_RESHAPE();
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration* Register_ROUND();
|
||||
TfLiteRegistration* Register_RSQRT();
|
||||
TfLiteRegistration* Register_SIN();
|
||||
TfLiteRegistration* Register_SOFTMAX();
|
||||
TfLiteRegistration* Register_SPLIT();
|
||||
TfLiteRegistration* Register_SQRT();
|
||||
TfLiteRegistration* Register_SQUARE();
|
||||
TfLiteRegistration* Register_STRIDED_SLICE();
|
||||
TfLiteRegistration* Register_SUB();
|
||||
TfLiteRegistration* Register_SVDF();
|
||||
TfLiteRegistration* Register_UNPACK();
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration* Register_TANH();
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@ -21,22 +21,31 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
namespace {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_zero_point;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
@ -48,105 +57,155 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
} // namespace
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
CalculateOpData(context, node, params, &data);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, &data, input1, input2, output);
|
||||
EvalQuantized(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input1, input2, output);
|
||||
EvalFloat(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -158,16 +217,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration* Register_MUL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {/*init=*/mul::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/mul::Prepare,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
reference_ops::Negate(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration* Register_NEG() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NEG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -16,7 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteTensor* output, int values_count, int axis) {
|
||||
TfLiteEvalTensor* output, int values_count, int axis) {
|
||||
const TfLiteEvalTensor* input0 =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteTensor* input0 = GetInput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = GetTensorData<T>(output);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
const T* input_data = GetTensorData<T>(t);
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration* Register_PACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -16,189 +16,205 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
#ifdef MEMORY_SANITIZER
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#else
|
||||
#define __msan_check_mem_is_initialized(ptr, size)
|
||||
#endif
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
namespace {
|
||||
|
||||
struct PadContext {
|
||||
PadContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
paddings = GetInput(context, node, 1);
|
||||
constant_values = nullptr;
|
||||
if (NumInputs(node) == 3) {
|
||||
constant_values = GetOptionalInputTensor(context, node, 2);
|
||||
} else {
|
||||
constant_values = nullptr;
|
||||
}
|
||||
output = GetOutput(context, node, 0);
|
||||
dims = NumDimensions(input);
|
||||
|
||||
resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
const int32* paddings_data = GetTensorData<int32>(paddings);
|
||||
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
|
||||
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
|
||||
if (IsConstantTensor(paddings) && paddings_total == 8 &&
|
||||
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
}
|
||||
const TfLiteTensor* constant_values;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* paddings;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
ResizingCategory resizing_category;
|
||||
struct OpData {
|
||||
PadParams params;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
PadContext op_context(context, node);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
|
||||
if (op_context.constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type,
|
||||
op_context.constant_values->type);
|
||||
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
|
||||
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
|
||||
const TfLiteTensor* constant_values =
|
||||
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
|
||||
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <=
|
||||
reference_ops::PadKernelMaxDimensionCount());
|
||||
|
||||
if (constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
|
||||
op_context.output->dims->size * 2);
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
|
||||
output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
for (int i = 0; i < op_context.output->dims->size; i++) {
|
||||
int output_dim = op_context.output->dims->data[i];
|
||||
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
|
||||
paddings_data[i * 2 + 1];
|
||||
// NOTE: This data is only available because the paddings buffer is stored in
|
||||
// the flatbuffer:
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
|
||||
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
|
||||
for (int i = 0; i < output->dims->size; i++) {
|
||||
int output_dim = output->dims->data[i];
|
||||
int expected_dim =
|
||||
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(
|
||||
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
|
||||
// Calculate OpData:
|
||||
data->params.resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
data->params.resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
|
||||
const int num_input_dimensions = NumDimensions(input);
|
||||
data->params.left_padding_count = num_input_dimensions;
|
||||
data->params.right_padding_count = num_input_dimensions;
|
||||
|
||||
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
|
||||
data->params.left_padding[idx] = paddings_data[idx * 2];
|
||||
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
if (constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
|
||||
static_cast<double>(constant_values->params.scale));
|
||||
}
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
PadContext op_context(context, node);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
if (op_context.constant_values != nullptr) {
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
|
||||
}
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, /*index=*/0);
|
||||
const TfLiteEvalTensor* constant_values =
|
||||
NumInputs(node) == 3
|
||||
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
|
||||
|
||||
// Create before and after padding arrays that are accepted by the kernel.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
|
||||
tflite::PadParams op_params;
|
||||
memset(&op_params, 0, sizeof(PadParams));
|
||||
op_params.left_padding_count = op_context.dims;
|
||||
op_params.right_padding_count = op_context.dims;
|
||||
|
||||
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
|
||||
op_params.left_padding[idx] = paddings_data[idx * 2];
|
||||
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
|
||||
const scalar pad_value_copy = pad_value; \
|
||||
\
|
||||
type::op_name(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<scalar>(op_context.output))
|
||||
switch (op_context.input->type) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value = op_context.constant_values == nullptr
|
||||
? 0.f
|
||||
: *GetTensorData<float>(op_context.constant_values);
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
|
||||
float pad_value =
|
||||
constant_values == nullptr
|
||||
? 0.f
|
||||
: *tflite::micro::GetTensorData<float>(constant_values);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE(context, op_context.output->params.scale ==
|
||||
op_context.constant_values->params.scale);
|
||||
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
op_context.constant_values == nullptr
|
||||
constant_values == nullptr
|
||||
? 0
|
||||
: *GetTensorData<int32_t>(op_context.constant_values);
|
||||
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
|
||||
: *tflite::micro::GetTensorData<int32_t>(constant_values);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(op_context.input->type));
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
@ -207,29 +223,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PAD() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration* Register_PADV2() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PADV2() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
float activation_min_f32;
|
||||
float activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, &data, input, output);
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, &data, input, output);
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, &data, input, output);
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, &data, input, output);
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
@ -207,30 +209,57 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MAX_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -15,20 +15,45 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
@ -60,51 +85,64 @@ inline void BroadcastPrelu4DSlowFloat(
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier_1 = 0;
|
||||
int output_shift_1 = 0;
|
||||
int32_t output_multiplier_2 = 0;
|
||||
int output_shift_2 = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, &output_multiplier_1,
|
||||
&output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, &output_multiplier_2,
|
||||
&output_shift_2);
|
||||
}
|
||||
|
||||
return CalculatePreluParams(input, alpha, output, params);
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const PreluParams& params =
|
||||
*(static_cast<const PreluParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(alpha), GetTensorData<float>(alpha),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<float>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
PreluParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier_1 = output_multiplier_1;
|
||||
op_params.output_shift_1 = output_shift_1;
|
||||
op_params.output_multiplier_2 = output_multiplier_2;
|
||||
op_params.output_shift_2 = output_shift_2;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<uint8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<int8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8 are supported currently, got %d.",
|
||||
context, "Only float32 and uint8_t are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@ -112,16 +150,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_PRELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PRELU() {
|
||||
return {/*init=*/activations::PreluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -27,20 +28,18 @@ namespace micro {
|
||||
namespace quantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -63,18 +62,27 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16);
|
||||
|
||||
if (input->type == kTfLiteInt16 && output->type == kTfLiteInt8) {
|
||||
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -82,25 +90,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
@ -111,10 +126,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), size, data->output_multiplier,
|
||||
data->output_shift, input->params.zero_point,
|
||||
output->params.zero_point, GetTensorData<int8_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@ -136,17 +177,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/quantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
// quantized output, in int8_t or uint8_t format.
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/quantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -21,6 +21,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -50,7 +52,7 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@ -58,7 +60,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16>(axis_data[i]);
|
||||
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
@ -67,24 +69,25 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
|
||||
int num_axis = static_cast<int>(NumElements(axis));
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
|
||||
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
&op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
|
||||
(input->dims->size == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
@ -95,22 +98,24 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
GetTensorData<float>(output)));
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
tflite::micro::GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"is supported.");
|
||||
@ -119,16 +124,15 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration* Register_MEAN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@ -18,6 +18,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -61,7 +64,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@ -74,13 +77,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input->bytes; ++i) {
|
||||
for (size_t i = 0; i < input_bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
@ -89,16 +100,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration* Register_RESHAPE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -49,8 +50,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Dynamic tensors are unsupported in tfmicro.");
|
||||
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
@ -61,9 +61,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* size =
|
||||
tflite::micro::GetEvalInput(context, node, kSizeTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
@ -71,22 +74,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int32>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int32>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8 or int8.",
|
||||
"Output type is %d, requires float, uint8_t or int8_t.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
@ -95,16 +107,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -32,8 +33,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@ -43,26 +44,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration* Register_ROUND() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -42,7 +43,8 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16 softmax output does not require symmetric scaling
|
||||
// NOTE: Current int16_t softmax output does not require symmetric
|
||||
// scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
@ -72,60 +74,75 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
|
||||
} // namespace
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
SoftmaxParams op_data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateSoftmaxParams(context, input, output, params, &op_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
SoftmaxFloat(input, output, op_data);
|
||||
SoftmaxFloat(input, output, *data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
SoftmaxQuantized(input, output, op_data);
|
||||
SoftmaxQuantized(input, output, *data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@ -136,16 +153,15 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_SOFTMAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/activations::SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -25,10 +26,11 @@ namespace split {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int axis_value) {
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const int output_count = NumOutputs(node);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = GetTensorData<T>(input);
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
const int copy_size = output_dims->data[axis] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
@ -65,23 +67,28 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* axis = GetInput(context, node, 0);
|
||||
const TfLiteTensor* input = GetInput(context, node, 1);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
int axis_value = GetTensorData<int32_t>(axis)[0];
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += NumDimensions(input);
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@ -111,16 +118,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace split
|
||||
|
||||
TfLiteRegistration* Register_SPLIT() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SPLIT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/split::Prepare,
|
||||
/*invoke=*/split::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -15,23 +15,20 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace strided_slice {
|
||||
|
||||
enum KernelType {
|
||||
kReference,
|
||||
// TODO(soroosh): add kGenericOptimized
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kBeginTensor = 1;
|
||||
constexpr int kEndTensor = 2;
|
||||
@ -120,64 +117,74 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
StridedSliceParams* op_params =
|
||||
static_cast<StridedSliceParams*>(node->user_data);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
StridedSliceContext op_context(context, node);
|
||||
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
|
||||
"input dim should not exceed 4");
|
||||
auto params = BuildStridedSliceParams(&op_context);
|
||||
memcpy(op_params, ¶ms, sizeof(StridedSliceParams));
|
||||
return CheckOutputSize(context, &op_context);
|
||||
}
|
||||
|
||||
template <KernelType kernel_type>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
StridedSliceContext op_context(context, node);
|
||||
auto op_params = BuildStridedSliceParams(&op_context);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const StridedSliceParams& op_params =
|
||||
*(static_cast<const StridedSliceParams*>(node->user_data));
|
||||
|
||||
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
|
||||
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<data_type>(op_context.input), \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<data_type>(op_context.output))
|
||||
|
||||
switch (op_context.input->type) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, float);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
|
||||
}
|
||||
reference_ops::StridedSlice(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(op_context.input->type),
|
||||
op_context.input->type);
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_STRIDED_SLICE
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace strided_slice
|
||||
|
||||
TfLiteRegistration* Register_STRIDED_SLICE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_STRIDED_SLICE() {
|
||||
return {/*init=*/strided_slice::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/strided_slice::Prepare,
|
||||
/*invoke=*/strided_slice::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -21,8 +21,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -40,18 +42,18 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
@ -93,31 +95,59 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_SUB(opname) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<float>(input2), \
|
||||
GetTensorShape(output), GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
|
||||
tflite::reference_ops::SubWithActivation(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteSubParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@ -133,25 +163,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_SUB(opname, dtype) \
|
||||
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<dtype>(input2), \
|
||||
GetTensorShape(output), GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_SUB
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@ -160,13 +211,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSub(context, node, params, &data, input1, input2, output);
|
||||
@ -184,16 +237,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace sub
|
||||
|
||||
TfLiteRegistration* Register_SUB() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SUB() {
|
||||
return {/*init=*/sub::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/sub::Prepare,
|
||||
/*invoke=*/sub::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -32,14 +33,18 @@ namespace svdf {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32 effective_scale_1_a;
|
||||
int32 effective_scale_2_a;
|
||||
int32_t effective_scale_1_a;
|
||||
int32_t effective_scale_2_a;
|
||||
// b versions of each scale are kept at int since the numbers are just the
|
||||
// shift value - typically between [-32, 32].
|
||||
int effective_scale_1_b;
|
||||
int effective_scale_2_b;
|
||||
int scratch_tensor_index;
|
||||
int scratch_output_tensor_index;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int input_zero_point;
|
||||
int output_zero_point;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -114,11 +119,11 @@ static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
}
|
||||
|
||||
inline void EvalFloatSVDF(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
|
||||
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
|
||||
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
|
||||
int scratch_tensor_index, TfLiteTensor* activation_state,
|
||||
TfLiteTensor* output) {
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
@ -126,12 +131,14 @@ inline void EvalFloatSVDF(
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr = GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = GetTensorData<float>(bias);
|
||||
const float* input_ptr = GetTensorData<float>(input);
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = GetTensorData<float>(activation_state);
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
@ -139,7 +146,7 @@ inline void EvalFloatSVDF(
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = GetTensorData<float>(output);
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@ -185,14 +192,13 @@ inline void EvalFloatSVDF(
|
||||
}
|
||||
|
||||
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input_tensor,
|
||||
const TfLiteTensor* weights_feature_tensor,
|
||||
const TfLiteTensor* weights_time_tensor,
|
||||
const TfLiteTensor* bias_tensor,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteTensor* activation_state_tensor,
|
||||
TfLiteTensor* output_tensor, const OpData& data,
|
||||
int32_t input_zp, int32_t output_zp) {
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor, const OpData& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
@ -209,7 +215,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
|
||||
int16_t* const state_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
@ -225,10 +232,11 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = GetTensorData<int8_t>(input_tensor);
|
||||
int16_t* state =
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
GetTensorData<int8_t>(weights_feature_tensor);
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<int16_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int16_t>::min();
|
||||
int16_t* result_in_batch = state + (n_memory - 1);
|
||||
@ -238,7 +246,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
@ -261,9 +270,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector1_ptr =
|
||||
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
|
||||
const int16_t* vector2_ptr =
|
||||
GetTensorData<int16_t>(activation_state_tensor) +
|
||||
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
@ -281,7 +291,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
@ -316,9 +327,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + output_zp;
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -338,12 +350,7 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@ -382,7 +389,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8, {2, batch_size, num_units}
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
@ -408,9 +415,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||
@ -419,35 +431,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const auto* input_params =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
|
||||
const auto* weights_feature_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_feature->quantization.params);
|
||||
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
activation_state->quantization.params);
|
||||
const auto* weight_time_params =
|
||||
static_cast<const TfLiteAffineQuantization*>(
|
||||
weights_time->quantization.params);
|
||||
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
|
||||
output->quantization.params);
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
|
||||
state_params->scale->data[0]);
|
||||
const double effective_scale_2 = static_cast<double>(
|
||||
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
||||
output_params->scale->data[0]);
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@ -467,10 +474,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
@ -484,20 +488,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* weights_feature =
|
||||
GetInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteTensor* weights_time =
|
||||
GetInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* activation_state =
|
||||
GetVariableInput(context, node, kInputActivationStateTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
@ -508,11 +516,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
|
||||
|
||||
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||
params, activation_state, output, data,
|
||||
input->params.zero_point, output->params.zero_point);
|
||||
params, activation_state, output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
@ -527,16 +532,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace svdf
|
||||
|
||||
TfLiteRegistration* Register_SVDF() {
|
||||
static TfLiteRegistration r = {/*init=*/svdf::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/svdf::Prepare,
|
||||
/*invoke=*/svdf::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return {/*init=*/svdf::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/svdf::Prepare,
|
||||
/*invoke=*/svdf::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -17,6 +17,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@ -28,14 +29,16 @@ constexpr int kInputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input, int output_count, int axis) {
|
||||
const TfLiteTensor* output0 = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input, int output_count,
|
||||
int axis) {
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
const int dimensions = input_dims->size;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += NumDimensions(input);
|
||||
axis += input->dims->size;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LT(axis, dimensions);
|
||||
@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
|
||||
|
||||
const T* input_data = GetTensorData<T>(input);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(input);
|
||||
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteTensor* t = GetOutput(context, node, i);
|
||||
T* output_data = GetTensorData<T>(t);
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
T* output_ptr = output_data + copy_size * k;
|
||||
int loc = k * output_count * copy_size + i * copy_size;
|
||||
@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteUnpackParams* data =
|
||||
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@ -101,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace unpack
|
||||
|
||||
TfLiteRegistration* Register_UNPACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_UNPACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/unpack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@ -15,9 +15,15 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -40,8 +46,7 @@ size_t AlignSizeUp(size_t size, size_t alignment) {
|
||||
return aligned_size;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
ErrorReporter* reporter) {
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
|
||||
switch (type) {
|
||||
case kTfLiteFloat32:
|
||||
*size = sizeof(float);
|
||||
@ -67,9 +72,10 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
case kTfLiteComplex64:
|
||||
*size = sizeof(float) * 2;
|
||||
break;
|
||||
case kTfLiteComplex128:
|
||||
*size = sizeof(double) * 2;
|
||||
break;
|
||||
default:
|
||||
reporter->Report("Type %s (%d) not is not supported",
|
||||
TfLiteTypeGetName(type), type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@ -79,17 +85,71 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter) {
|
||||
int element_count = 1;
|
||||
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
|
||||
element_count *= flatbuffer_tensor.shape()->Get(n);
|
||||
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
|
||||
// so has 1 element.
|
||||
if (flatbuffer_tensor.shape() != nullptr) {
|
||||
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
|
||||
element_count *= flatbuffer_tensor.shape()->Get(n);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteType tf_lite_type;
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
|
||||
&tf_lite_type, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
TfLiteTypeSizeOf(tf_lite_type, type_size, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
|
||||
*bytes = element_count * (*type_size);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes) {
|
||||
TFLITE_DCHECK(out_bytes != nullptr);
|
||||
|
||||
int element_count = 1;
|
||||
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
|
||||
if (eval_tensor->dims != nullptr) {
|
||||
for (int n = 0; n < eval_tensor->dims->size; ++n) {
|
||||
element_count *= eval_tensor->dims->data[n];
|
||||
}
|
||||
}
|
||||
size_t type_size;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
|
||||
*out_bytes = element_count * type_size;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteTensor* input = nullptr;
|
||||
|
||||
TF_LITE_ENSURE(context, input1->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, input2->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, output->dims->size == 0);
|
||||
|
||||
input = input1->dims->size > input2->dims->size ? input1 : input2;
|
||||
TF_LITE_ENSURE(context, output->type == input->type);
|
||||
|
||||
size_t size = 0;
|
||||
TfLiteTypeSizeOf(input->type, &size);
|
||||
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
size *= input->dims->data[i];
|
||||
}
|
||||
|
||||
output->bytes = size;
|
||||
|
||||
output->dims =
|
||||
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
|
||||
context, TfLiteIntArrayGetSizeInBytes(size)));
|
||||
|
||||
output->dims->size = input->dims->size;
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
output->dims->data[i] = input->dims->data[i];
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@ -15,6 +15,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@ -31,14 +34,26 @@ uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
|
||||
size_t AlignSizeUp(size_t size, size_t alignment);
|
||||
|
||||
// Returns size in bytes for a given TfLiteType.
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
|
||||
ErrorReporter* reporter);
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
|
||||
|
||||
// How many bytes are needed to hold a tensor's contents.
|
||||
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
|
||||
// returned in out_bytes.
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes);
|
||||
|
||||
// Deduce output dimensions from input and allocate given size.
|
||||
// Useful for operators with two inputs where the largest input should equal the
|
||||
// output dimension.
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
|
||||
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
||||
next_free += sizeof(BufferRequirements) * max_buffer_count_;
|
||||
|
||||
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
||||
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
||||
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
|
||||
@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
current->size = size;
|
||||
current->first_time_used = first_time_used;
|
||||
current->last_time_used = last_time_used;
|
||||
current->offline_offset = kOnlinePlannedBuffer;
|
||||
++buffer_count_;
|
||||
need_to_calculate_offsets_ = true;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
|
||||
int last_time_used, int offline_offset) {
|
||||
BufferRequirements* current = &requirements_[buffer_count_];
|
||||
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
|
||||
kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
current->offline_offset = offline_offset;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
|
||||
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
|
||||
const int last_time_used) const {
|
||||
@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
|
||||
ListEntry* result = nullptr;
|
||||
ListEntry* candidate_next_entry;
|
||||
if (start == nullptr) {
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[0];
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
} else {
|
||||
if (start->next_entry_index == -1) {
|
||||
return nullptr;
|
||||
@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
// This helps find a more compact layout. Intuitively, you can think
|
||||
// about putting the large buffers in place first, and then the
|
||||
// smaller buffers can fit in the gaps, rather than fragmenting the
|
||||
// gaps with small buffers at the beginning.
|
||||
// gaps with small buffers at the beginning. Add offline planned offsets
|
||||
// first in the list, since they have a predetermined offset.
|
||||
int idx_from_tail = buffer_count_;
|
||||
int idx_from_head = 0;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
|
||||
buffer_ids_sorted_by_size_[i] = i;
|
||||
buffer_offsets_[i] = -1;
|
||||
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
|
||||
idx_from_tail--;
|
||||
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_tail] = i;
|
||||
buffer_offsets_[i] = -1;
|
||||
} else {
|
||||
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_head] = i;
|
||||
buffer_offsets_[i] = requirements_[i].offline_offset;
|
||||
idx_from_head++;
|
||||
}
|
||||
}
|
||||
// This sorting algorithm is naive, and may end up taking a very long time
|
||||
// with hundreds of buffers.
|
||||
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
|
||||
buffer_count_);
|
||||
|
||||
// Put the largest buffer at offset zero to start the process.
|
||||
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
|
||||
first_entry->offset = 0;
|
||||
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
|
||||
first_entry->next_entry_index = -1;
|
||||
// This sorting algorithm is naive, and may end up taking a very long time
|
||||
// with hundreds of buffers. Do not sort the offline planned offsets.
|
||||
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
|
||||
&buffer_ids_sorted_[idx_from_head],
|
||||
buffer_count_ - idx_from_head);
|
||||
|
||||
// Initialize the first entry to the first buffer in
|
||||
// buffer_ids_sorted_.
|
||||
// - If there are no offline planned offsets, the largest buffer will be
|
||||
// first, and the buffers will be handled in size order.
|
||||
// - If offline offsets are present, these will be handled first in order
|
||||
// for the greedy algorithm to utilized gaps in the offline plan.
|
||||
first_entry_index_ = 0;
|
||||
next_free_entry_ = 1;
|
||||
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
|
||||
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
first_entry->next_entry_index = -1; // to mark the entry as end of list
|
||||
int buffer_id = buffer_ids_sorted_[0];
|
||||
first_entry->requirements_index = buffer_id;
|
||||
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
|
||||
buffer_offsets_[buffer_id] = 0;
|
||||
}
|
||||
first_entry->offset = buffer_offsets_[buffer_id];
|
||||
|
||||
// Work through the rest of the buffers to find a good gap to place each one.
|
||||
for (int i = 1; i < buffer_count_; ++i) {
|
||||
// The id is the order the buffer was originally added by the client.
|
||||
const int buffer_id = buffer_ids_sorted_by_size_[i];
|
||||
buffer_id = buffer_ids_sorted_[i];
|
||||
// Look at what size and time range the buffer needs to be active.
|
||||
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
|
||||
const int wanted_size = wanted_requirements->size;
|
||||
@ -168,37 +203,43 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
// so that it's easy to find the next buffer in memory, and so the gap.
|
||||
// The candidate_entry variable holds the buffer that we're considering
|
||||
// placing the current buffer after.
|
||||
ListEntry* prior_entry = nullptr;
|
||||
|
||||
int candidate_offset = 0;
|
||||
// Loop through the offset-ordered list of buffers, looking for gaps.
|
||||
while (true) {
|
||||
// Find out what the next active buffer is.
|
||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
|
||||
ListEntry* prior_entry = nullptr;
|
||||
while (true) {
|
||||
// Find out what the next active buffer is.
|
||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||
|
||||
if (prior_entry) {
|
||||
BufferRequirements* candidate_requirements =
|
||||
&requirements_[prior_entry->requirements_index];
|
||||
const int prior_entry_offset =
|
||||
prior_entry->offset + candidate_requirements->size;
|
||||
if (prior_entry_offset > candidate_offset) {
|
||||
candidate_offset = prior_entry_offset;
|
||||
if (prior_entry) {
|
||||
BufferRequirements* candidate_requirements =
|
||||
&requirements_[prior_entry->requirements_index];
|
||||
const int prior_entry_offset =
|
||||
prior_entry->offset + candidate_requirements->size;
|
||||
if (prior_entry_offset > candidate_offset) {
|
||||
candidate_offset = prior_entry_offset;
|
||||
}
|
||||
}
|
||||
if (next_entry == nullptr) {
|
||||
// We're at the end of the list, so we can always append the buffer
|
||||
// here.
|
||||
break;
|
||||
}
|
||||
// Find out how much space there is between us and the next buffer.
|
||||
const int gap = next_entry->offset - candidate_offset;
|
||||
if (gap >= wanted_size) {
|
||||
// This entry has a big enough gap between it and the next, so
|
||||
// use it!
|
||||
break;
|
||||
}
|
||||
// The gap wasn't big enough, so move on to another candidate.
|
||||
prior_entry = next_entry;
|
||||
}
|
||||
if (next_entry == nullptr) {
|
||||
// We're at the end of the list, so we can always append the buffer
|
||||
// here.
|
||||
break;
|
||||
}
|
||||
// Find out how much space there is between us and the next buffer.
|
||||
const int gap = next_entry->offset - candidate_offset;
|
||||
if (gap >= wanted_size) {
|
||||
// This entry has a big enough gap between it and the next, so
|
||||
// use it!
|
||||
break;
|
||||
}
|
||||
// The gap wasn't big enough, so move on to another candidate.
|
||||
prior_entry = next_entry;
|
||||
} else {
|
||||
// Offline planned offset are to be considered constant
|
||||
candidate_offset = wanted_requirements->offline_offset;
|
||||
}
|
||||
// At this point, we've either found a gap (possibly at the end of the
|
||||
// list) and want to place the buffer there, or there are no other active
|
||||
@ -212,26 +253,36 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
new_entry->requirements_index = buffer_id;
|
||||
const int new_entry_index = next_free_entry_;
|
||||
++next_free_entry_;
|
||||
ListEntry* current_entry = first_entry;
|
||||
// Make sure that we insert the buffer at the correct place in the ordered
|
||||
// list.
|
||||
while (true) {
|
||||
const int next_entry_index = current_entry->next_entry_index;
|
||||
if (next_entry_index == -1) {
|
||||
// We're at the end of the list, so just add the new entry here.
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
new_entry->next_entry_index = -1;
|
||||
break;
|
||||
|
||||
if (first_entry->offset > candidate_offset) {
|
||||
// The new entry offset is smaller than the first entry offset =>
|
||||
// replace the first entry
|
||||
first_entry = new_entry;
|
||||
first_entry->next_entry_index = first_entry_index_;
|
||||
first_entry_index_ = new_entry_index;
|
||||
} else {
|
||||
ListEntry* current_entry = first_entry;
|
||||
// Make sure that we insert the buffer at the correct place in the
|
||||
// buffer-offset-ordered list
|
||||
while (true) {
|
||||
const int next_entry_index = current_entry->next_entry_index;
|
||||
if (next_entry_index == -1) {
|
||||
// We're at the end of the list, so just add the new entry here.
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
new_entry->next_entry_index = -1;
|
||||
break;
|
||||
}
|
||||
// not at the end of the list -> take a look at next entry
|
||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||
if (next_entry->offset > candidate_offset) {
|
||||
// We're at the right spot to do an insertion and retain the sorting
|
||||
// order, so place the new entry here.
|
||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
break;
|
||||
}
|
||||
current_entry = next_entry;
|
||||
}
|
||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||
if (next_entry->offset > candidate_offset) {
|
||||
// We're at the right spot to do an insertion and retain the sorting
|
||||
// order, so place the new entry here.
|
||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
break;
|
||||
}
|
||||
current_entry = next_entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
|
||||
if (buffer_count_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
ListEntry* entry = &buffers_sorted_by_offset_[0];
|
||||
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
size_t max_size = 0;
|
||||
while (entry) {
|
||||
BufferRequirements* requirements =
|
||||
|
||||
@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
constexpr int kOnlinePlannedBuffer = -1;
|
||||
|
||||
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
||||
// to minimize the overall arena size needed.
|
||||
//
|
||||
@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used) override;
|
||||
|
||||
// Record details of an offline planned buffer offset we want to place.
|
||||
// offline_offset is the buffer offset from the start of the arena.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used,
|
||||
int offline_offset);
|
||||
|
||||
// Returns the high-water mark of used memory. This is the minimum size of a
|
||||
// memory arena you'd need to allocate to hold these buffers.
|
||||
size_t GetMaximumMemorySize() override;
|
||||
@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
static size_t per_buffer_size() {
|
||||
const int per_buffer_size =
|
||||
sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_by_size_
|
||||
sizeof(int) + // buffer_ids_sorted_by_size_
|
||||
sizeof(int) + // buffer_sizes_sorted_
|
||||
sizeof(int) + // buffer_ids_sorted_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
return per_buffer_size;
|
||||
@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||
// Records the client-provided information about each buffer.
|
||||
struct BufferRequirements {
|
||||
int size;
|
||||
int offline_offset;
|
||||
int first_time_used;
|
||||
int last_time_used;
|
||||
};
|
||||
|
||||
// Working arrays used during the layout algorithm.
|
||||
BufferRequirements* requirements_;
|
||||
int* buffer_sizes_sorted_by_size_;
|
||||
int* buffer_ids_sorted_by_size_;
|
||||
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
|
||||
// {
|
||||
// offline planned buffers,
|
||||
// online planned buffers sorted by size
|
||||
// }
|
||||
int* buffer_sizes_sorted_;
|
||||
int* buffer_ids_sorted_;
|
||||
ListEntry* buffers_sorted_by_offset_;
|
||||
int next_free_entry_;
|
||||
int next_free_entry_; // Index of the next free entry of
|
||||
// buffers_sorted_by_offset_
|
||||
int first_entry_index_; // Index of the first entry (smallest offset) of
|
||||
// buffers_sorted_by_offset_
|
||||
|
||||
// Stores the outcome of the plan, the location of each buffer in the arena.
|
||||
int* buffer_offsets_;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
b/160894903
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
@ -15,9 +15,14 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
@ -26,16 +31,21 @@ namespace tflite {
|
||||
// Namespace used for unittests.
|
||||
namespace internal {
|
||||
|
||||
// Sets up all of the data structure members for a runtime tensor
|
||||
// based on the contents of a serialized tensor.
|
||||
TfLiteStatus InitializeRuntimeTensor(
|
||||
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
|
||||
// Sets up all of the data structure members for a TfLiteTensor based on the
|
||||
// contents of a serialized tensor in the flatbuffer.
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new
|
||||
// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
|
||||
// flatbuffer quantization or dimension allocations to take place in either the
|
||||
// temp or tail section of the arena.
|
||||
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
|
||||
SimpleMemoryAllocator* allocator, bool allocate_temp,
|
||||
const tflite::Tensor& flatbuffer_tensor,
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
|
||||
ErrorReporter* error_reporter, TfLiteTensor* result);
|
||||
|
||||
// A handle tracking scratch buffer allocation. This handle is created by
|
||||
// `RequestScratchBufferInArena`. `data` field is populated in
|
||||
// `FinishTensorAllocation` after static memory planning.
|
||||
// `FinishModelAllocation` after static memory planning.
|
||||
// TODO(b/150257460) As a future optimization, this struct could be replaced by
|
||||
// a union, since once `data` is populated, `bytes` and `node_idx` is not
|
||||
// needed.
|
||||
@ -59,7 +69,17 @@ typedef struct {
|
||||
|
||||
// Allocator responsible for allocating memory for all intermediate tensors
|
||||
// necessary to invoke a model.
|
||||
|
||||
//
|
||||
// The lifetime of the model, tensor arena and error reporter must be at
|
||||
// least as long as that of the allocator object, since the allocator needs
|
||||
// them to be accessible during its entire lifetime.
|
||||
//
|
||||
// The MicroAllocator simply plans out additional allocations that are required
|
||||
// to standup a model for inference in TF Micro. This class currently relies on
|
||||
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
|
||||
// arena. These allocations are divided into head (non-persistent) and tail
|
||||
// (persistent) regions:
|
||||
//
|
||||
// Memory layout to help understand how it works
|
||||
// This information could change in the future version.
|
||||
// ************** .memory_allocator->GetBuffer()
|
||||
@ -72,42 +92,73 @@ typedef struct {
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
class MicroAllocator {
|
||||
public:
|
||||
// The lifetime of the model, tensor allocator and error reporter must be at
|
||||
// least as long as that of the allocator object, since the allocator needs
|
||||
// them to be accessible during its entire lifetime.
|
||||
|
||||
// Creates a MicroAllocator instance from a given tensor arena. This arena
|
||||
// will be managed by the created instance.
|
||||
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
|
||||
// bytes aligned, otherwise some head room will be wasted.
|
||||
MicroAllocator(TfLiteContext* context, const Model* model,
|
||||
uint8_t* tensor_arena, size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
// TODO(b/157615197): Cleanup constructor + factory usage.
|
||||
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Runs through the model and allocates all necessary input, output and
|
||||
// intermediate tensors.
|
||||
// WARNING: doing any allocation after calling this method has the risk of
|
||||
// corrupting tensor data so this method should be the last non-const method
|
||||
// called in this class.
|
||||
TfLiteStatus FinishTensorAllocation();
|
||||
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
|
||||
// intance. This allocator instance will use the SimpleMemoryAllocator
|
||||
// instance to manage allocations internally.
|
||||
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishTensorAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const;
|
||||
// Begin allocating internal resources required for model inference.
|
||||
// This method will run through the flatbuffer data supplied in the model to
|
||||
// properly allocate tensor, node, and op registration data. This method is
|
||||
// expected to be followed with a call to FinishModelAllocation() before
|
||||
// resuming allocation with another model. All persistent tensor buffers are
|
||||
// stored in the out-param eval_tensors. This value is allocated from the
|
||||
// persistent memory arena and will be used to host runtime tensor buffers.
|
||||
TfLiteStatus StartModelAllocation(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration** node_and_registrations,
|
||||
TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Run through the model to allocate nodes and registrations. We need to keep
|
||||
// them for the entire life time of the model to allow persistent tensors.
|
||||
// This method needs to be called before FinishTensorAllocation method.
|
||||
TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const OpResolver& op_resolver,
|
||||
NodeAndRegistration** node_and_registrations);
|
||||
// Finish allocating internal resources required for model inference.
|
||||
// This method will plan non-persistent buffers and commit a memory plan to
|
||||
// the 'head' section of the memory arena. All variable tensor data will also
|
||||
// be allocated. This method should be called after assigning model resources
|
||||
// in StartModelAllocation(). The eval_tensors pointer should be the value
|
||||
// passed into this class during StartModelAllocation().
|
||||
TfLiteStatus FinishModelAllocation(const Model* model,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// persistent arena memory is only guaranteed for the lifetime of the
|
||||
// application. The eval_tensors pointer should be the value passed into this
|
||||
// class during StartModelAllocation() and contains the source-of-truth for
|
||||
// buffers.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Allocates a TfLiteTensor struct and populates the returned value with
|
||||
// properties from the model flatbuffer. This struct is allocated from
|
||||
// temporary arena memory is only guaranteed until a call is made to
|
||||
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
|
||||
// into this class during StartModelAllocation() and contains the
|
||||
// source-of-truth for buffers.
|
||||
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
|
||||
TfLiteEvalTensor* eval_tensors,
|
||||
int tensor_index);
|
||||
|
||||
// Resets all temporary allocations. This method should be called after a
|
||||
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
|
||||
// AllocateTfLiteTensor()).
|
||||
virtual void ResetTempAllocations();
|
||||
|
||||
// Allocates persistent buffer which has the same life time as the allocator.
|
||||
// The memory is immediately available and is allocated from the tail of the
|
||||
// arena.
|
||||
TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr);
|
||||
void* AllocatePersistentBuffer(size_t bytes);
|
||||
|
||||
// Register a scratch buffer of size `bytes` for Node with `node_id`.
|
||||
// This method only allocates a BufferHandle holding information for memory
|
||||
// planning. The buffer ptr is ready after `FinishTensorAllocation` and can
|
||||
// planning. The buffer ptr is ready after `FinishModelAllocation` and can
|
||||
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
|
||||
// Note that there should be no tail allocation between two consecutive
|
||||
// `RequestScratchBufferInArena` calls.
|
||||
@ -116,16 +167,74 @@ class MicroAllocator {
|
||||
// Returns the pointer to the planned scratch buffer.
|
||||
void* GetScratchBuffer(int buffer_idx) const;
|
||||
|
||||
private:
|
||||
TfLiteStatus Init();
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishModelAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const;
|
||||
|
||||
const Model* model_;
|
||||
// A simple memory allocator that always allocate from the arena tail.
|
||||
protected:
|
||||
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
|
||||
ErrorReporter* error_reporter);
|
||||
virtual ~MicroAllocator();
|
||||
|
||||
// Allocates an array in the arena to hold pointers to the node and
|
||||
// registration pointers required to represent the inference graph of the
|
||||
// model.
|
||||
virtual TfLiteStatus AllocateNodeAndRegistrations(
|
||||
const Model* model, NodeAndRegistration** node_and_registrations);
|
||||
|
||||
// Populates node and registration pointers representing the inference graph
|
||||
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
|
||||
// instance). Persistent data (e.g. operator data) is allocated from the
|
||||
// arena.
|
||||
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
|
||||
const Model* model, const MicroOpResolver& op_resolver,
|
||||
NodeAndRegistration* node_and_registrations);
|
||||
|
||||
// Allocates the list of persistent TfLiteEvalTensors that are used for the
|
||||
// "eval" phase of model inference. These structs will be the source of truth
|
||||
// for all tensor buffers. Allocation results are stored in the out-param
|
||||
// eval_tensors.
|
||||
virtual TfLiteStatus AllocateTfLiteEvalTensors(
|
||||
const Model* model, TfLiteEvalTensor** eval_tensors);
|
||||
|
||||
// Allocates persistent tensor buffers for variable tensors in the subgraph.
|
||||
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this method. It is only used to record TfLiteTensor persistent allocations.
|
||||
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
|
||||
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
|
||||
|
||||
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
|
||||
// quantization data is allocated from either the tail (persistent) or temp
|
||||
// sections of the arena based on the allocation flag.
|
||||
// TODO(b/160894903): Once all kernels have been updated to the new API drop
|
||||
// this function since all allocations for quantized data will take place in
|
||||
// the temp section.
|
||||
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
|
||||
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
|
||||
int tensor_index, bool allocate_temp);
|
||||
|
||||
ErrorReporter* error_reporter() const;
|
||||
|
||||
// Returns the first subgraph from the model.
|
||||
const SubGraph* GetSubGraphFromModel(const Model* model);
|
||||
|
||||
private:
|
||||
// Commits a memory plan for all non-persistent buffer allocations in the
|
||||
// 'head' section of the memory arena. The eval_tensors pointer is the list of
|
||||
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
|
||||
// will be allocated into the head section in this function call.
|
||||
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
|
||||
const SubGraph* subgraph,
|
||||
TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
// A simple memory allocator that always allocate from the arena tail or head.
|
||||
SimpleMemoryAllocator* memory_allocator_;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext* context_;
|
||||
// Indicating if the allocator is ready for allocation.
|
||||
bool active_ = false;
|
||||
bool model_is_allocating_;
|
||||
|
||||
// In reverse order for efficiency.
|
||||
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
|
||||
@ -134,9 +243,7 @@ class MicroAllocator {
|
||||
// How many scratch buffers have been allocated.
|
||||
size_t scratch_buffer_count_ = 0;
|
||||
|
||||
const SubGraph* subgraph_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@ -15,7 +15,10 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
#include "tensorflow/lite/micro/micro_string.h"
|
||||
#endif
|
||||
|
||||
|
||||
@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// Minor modifications made for Espruino Microcontroller build by Gordon Williams <gw@pur3.co.uk>
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -25,8 +27,6 @@ class MicroErrorReporter : public ErrorReporter {
|
||||
public:
|
||||
~MicroErrorReporter() override {}
|
||||
int Report(const char* format, va_list args) override;
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
|
||||
@ -15,15 +15,16 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
class MicroErrorReporter : public ErrorReporter {
|
||||
public:
|
||||
~MicroErrorReporter() {}
|
||||
~MicroErrorReporter() override {}
|
||||
int Report(const char* format, va_list args) override;
|
||||
|
||||
private:
|
||||
|
||||
@ -14,16 +14,24 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/micro/micro_profiler.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
|
||||
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
|
||||
return registration->custom_name;
|
||||
@ -31,15 +39,20 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
|
||||
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
|
||||
}
|
||||
}
|
||||
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace internal {
|
||||
|
||||
TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
|
||||
size_t bytes, void** ptr) {
|
||||
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
|
||||
MicroAllocator* allocator, const Model* model)
|
||||
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
|
||||
|
||||
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
|
||||
size_t bytes) {
|
||||
return reinterpret_cast<ContextHelper*>(ctx->impl_)
|
||||
->allocator_->AllocatePersistentBuffer(bytes, ptr);
|
||||
->allocator_->AllocatePersistentBuffer(bytes);
|
||||
}
|
||||
|
||||
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
|
||||
@ -57,55 +70,72 @@ void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
|
||||
|
||||
void ContextHelper::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
#endif
|
||||
}
|
||||
|
||||
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx) {
|
||||
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
||||
return helper->allocator_->AllocateTempTfLiteTensor(
|
||||
helper->model_, helper->eval_tensors_, tensor_idx);
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_idx) {
|
||||
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
|
||||
return &helper->eval_tensors_[tensor_idx];
|
||||
}
|
||||
|
||||
void ContextHelper::SetNodeIndex(int idx) { current_node_idx_ = idx; }
|
||||
|
||||
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
|
||||
eval_tensors_ = eval_tensors;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
const OpResolver& op_resolver,
|
||||
const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena,
|
||||
size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter)
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
allocator_(&context_, model_, tensor_arena, tensor_arena_size,
|
||||
error_reporter_),
|
||||
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
|
||||
error_reporter)),
|
||||
tensors_allocated_(false),
|
||||
context_helper_(error_reporter_, &allocator_) {
|
||||
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
||||
model->subgraphs();
|
||||
if (subgraphs->size() != 1) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Only 1 subgraph is currently supported.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return;
|
||||
}
|
||||
subgraph_ = (*subgraphs)[0];
|
||||
initialization_status_(kTfLiteError),
|
||||
eval_tensors_(nullptr),
|
||||
context_helper_(error_reporter_, &allocator_, model),
|
||||
input_tensor_(nullptr),
|
||||
output_tensor_(nullptr) {
|
||||
Init(profiler);
|
||||
}
|
||||
|
||||
context_.impl_ = static_cast<void*>(&context_helper_);
|
||||
context_.ReportError = context_helper_.ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
|
||||
// If the system is big endian then convert weights from the flatbuffer from
|
||||
// little to big endian on startup so that it does not need to be done during
|
||||
// inference.
|
||||
// NOTE: This requires that the flatbuffer is held in memory which can be
|
||||
// modified by this process.
|
||||
if (!FLATBUFFERS_LITTLEENDIAN) {
|
||||
for (size_t t = 0; t < tensors_size(); ++t) {
|
||||
TfLiteTensor* thisTensor = &context_.tensors[t];
|
||||
if (thisTensor->allocation_type == kTfLiteMmapRo)
|
||||
CorrectTensorEndianness(thisTensor);
|
||||
}
|
||||
}
|
||||
|
||||
initialization_status_ = kTfLiteOk;
|
||||
MicroInterpreter::MicroInterpreter(const Model* model,
|
||||
const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator,
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler)
|
||||
: model_(model),
|
||||
op_resolver_(op_resolver),
|
||||
error_reporter_(error_reporter),
|
||||
allocator_(*allocator),
|
||||
tensors_allocated_(false),
|
||||
initialization_status_(kTfLiteError),
|
||||
eval_tensors_(nullptr),
|
||||
context_helper_(error_reporter_, &allocator_, model),
|
||||
input_tensor_(nullptr),
|
||||
output_tensor_(nullptr) {
|
||||
Init(profiler);
|
||||
}
|
||||
|
||||
MicroInterpreter::~MicroInterpreter() {
|
||||
@ -123,7 +153,28 @@ MicroInterpreter::~MicroInterpreter() {
|
||||
}
|
||||
}
|
||||
|
||||
void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
|
||||
void MicroInterpreter::Init(tflite::Profiler* profiler) {
|
||||
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
||||
model_->subgraphs();
|
||||
if (subgraphs->size() != 1) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Only 1 subgraph is currently supported.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return;
|
||||
}
|
||||
subgraph_ = (*subgraphs)[0];
|
||||
|
||||
context_.impl_ = static_cast<void*>(&context_helper_);
|
||||
context_.ReportError = context_helper_.ReportOpError;
|
||||
context_.GetTensor = context_helper_.GetTensor;
|
||||
context_.GetEvalTensor = context_helper_.GetEvalTensor;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.profiler = profiler;
|
||||
|
||||
initialization_status_ = kTfLiteOk;
|
||||
}
|
||||
|
||||
void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
|
||||
int32_t tensorSize = 1;
|
||||
for (int d = 0; d < tensorCorr->dims->size; ++d)
|
||||
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
|
||||
@ -147,6 +198,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
|
||||
case TfLiteType::kTfLiteComplex64:
|
||||
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
|
||||
break;
|
||||
case TfLiteType::kTfLiteComplex128:
|
||||
CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize);
|
||||
break;
|
||||
default:
|
||||
// Do nothing for other data types.
|
||||
break;
|
||||
@ -161,8 +215,42 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
|
||||
}
|
||||
|
||||
TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
TF_LITE_ENSURE_OK(&context_, allocator_.AllocateNodeAndRegistrations(
|
||||
op_resolver_, &node_and_registrations_));
|
||||
if (allocator_.StartModelAllocation(model_, op_resolver_,
|
||||
&node_and_registrations_,
|
||||
&eval_tensors_) != kTfLiteOk) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed starting model allocation.\n");
|
||||
initialization_status_ = kTfLiteError;
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// Update the pointer now that TfLiteEvalTensor allocation has completed on
|
||||
// the context helper.
|
||||
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
|
||||
// into the interpreter.
|
||||
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
|
||||
|
||||
// If the system is big endian then convert weights from the flatbuffer from
|
||||
// little to big endian on startup so that it does not need to be done during
|
||||
// inference.
|
||||
// NOTE: This requires that the flatbuffer is held in memory which can be
|
||||
// modified by this process.
|
||||
if (!FLATBUFFERS_LITTLEENDIAN) {
|
||||
for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
|
||||
if (auto* buffer =
|
||||
(*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
|
||||
// If we've found a buffer, does it have any data?
|
||||
if (auto* array = buffer->data()) {
|
||||
// If it has any data, is the data size larger than zero?
|
||||
if (array->size()) {
|
||||
// Update the endianness of the corresponding eval tensor since that
|
||||
// struct holds the buffer used at inference time.
|
||||
CorrectTensorEndianness(&eval_tensors_[t]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only allow AllocatePersistentBuffer in Init stage.
|
||||
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
|
||||
@ -189,8 +277,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
}
|
||||
context_helper_.SetNodeIndex(-1);
|
||||
|
||||
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
|
||||
// in Prepare stage.
|
||||
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
|
||||
// available in Prepare stage.
|
||||
context_.RequestScratchBufferInArena =
|
||||
context_helper_.RequestScratchBufferInArena;
|
||||
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
||||
@ -208,6 +296,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
allocator_.ResetTempAllocations();
|
||||
}
|
||||
context_helper_.SetNodeIndex(-1);
|
||||
|
||||
@ -217,7 +306,10 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
|
||||
context_.RequestScratchBufferInArena = nullptr;
|
||||
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
|
||||
|
||||
TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
|
||||
TF_LITE_ENSURE_OK(&context_,
|
||||
allocator_.FinishModelAllocation(model_, eval_tensors_));
|
||||
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
|
||||
|
||||
tensors_allocated_ = true;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@ -240,7 +332,23 @@ TfLiteStatus MicroInterpreter::Invoke() {
|
||||
auto* registration = node_and_registrations_[i].registration;
|
||||
|
||||
if (registration->invoke) {
|
||||
TfLiteStatus invoke_status = registration->invoke(&context_, node);
|
||||
TfLiteStatus invoke_status;
|
||||
#ifndef NDEBUG // Omit profiler overhead from release builds.
|
||||
// The case where profiler == nullptr is handled by
|
||||
// ScopedOperatorProfile.
|
||||
tflite::Profiler* profiler =
|
||||
reinterpret_cast<tflite::Profiler*>(context_.profiler);
|
||||
ScopedOperatorProfile scoped_profiler(
|
||||
profiler, OpNameFromRegistration(registration), i);
|
||||
#endif
|
||||
invoke_status = registration->invoke(&context_, node);
|
||||
|
||||
// All TfLiteTensor structs used in the kernel are allocated from temp
|
||||
// memory in the allocator. This creates a chain of allocations in the
|
||||
// temp section. The call below resets the chain of allocations to
|
||||
// prepare for the next call.
|
||||
allocator_.ResetTempAllocations();
|
||||
|
||||
if (invoke_status == kTfLiteError) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
@ -257,50 +365,82 @@ TfLiteStatus MicroInterpreter::Invoke() {
|
||||
|
||||
TfLiteTensor* MicroInterpreter::input(size_t index) {
|
||||
const size_t length = inputs_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Input index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &(context_.tensors[inputs().Get(index)]);
|
||||
if (index != 0) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Input tensors not at index 0 are allocated from the "
|
||||
"persistent memory arena. Repeat calls will cause excess "
|
||||
"allocation!");
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
inputs().Get(index));
|
||||
}
|
||||
if (input_tensor_ == nullptr) {
|
||||
input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
|
||||
model_, eval_tensors_, inputs().Get(index));
|
||||
}
|
||||
return input_tensor_;
|
||||
}
|
||||
|
||||
TfLiteTensor* MicroInterpreter::output(size_t index) {
|
||||
const size_t length = outputs_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Output index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &(context_.tensors[outputs().Get(index)]);
|
||||
if (index != 0) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Output tensors not at index 0 are allocated from the "
|
||||
"persistent memory arena. Repeat calls will cause excess "
|
||||
"allocation!");
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
outputs().Get(index));
|
||||
}
|
||||
if (output_tensor_ == nullptr) {
|
||||
// TODO(b/160894903): This API will allocate TfLiteTensor structs from
|
||||
// persistent (tail) memory and cache on this pointer.
|
||||
output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
|
||||
model_, eval_tensors_, outputs().Get(index));
|
||||
}
|
||||
return output_tensor_;
|
||||
}
|
||||
|
||||
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
|
||||
const size_t length = tensors_size();
|
||||
if ((index < 0) || (index >= length)) {
|
||||
if (index >= length) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Tensor index %d out of range (length is %d)", index,
|
||||
length);
|
||||
return nullptr;
|
||||
}
|
||||
return &context_.tensors[index];
|
||||
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
||||
index);
|
||||
}
|
||||
|
||||
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
|
||||
const size_t length = tensors_size();
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
TfLiteTensor* cur_tensor = tensor(i);
|
||||
if (cur_tensor->is_variable) {
|
||||
TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
|
||||
if (status != kTfLiteOk) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to reset variable tensor at index: %d", i);
|
||||
return status;
|
||||
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
|
||||
auto* tensor = subgraph_->tensors()->Get(i);
|
||||
if (tensor->is_variable()) {
|
||||
size_t buffer_size;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
|
||||
|
||||
int value = 0;
|
||||
if (tensor->type() == tflite::TensorType_INT8) {
|
||||
value = tensor->quantization()->zero_point()->Get(0);
|
||||
}
|
||||
memset(eval_tensors_[i].data.raw, value, buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
|
||||
@ -15,13 +15,18 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/portable_type_to_tflitetype.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include "tensorflow/lite/type_to_tflitetype.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@ -30,29 +35,36 @@ namespace internal {
|
||||
// A helper class to encapsulate the implementation of APIs in Context.
|
||||
// context->impl_ points to an instance of this class.
|
||||
// Check tensorflow/lite/c/common.h for detailed descriptions.
|
||||
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
|
||||
class ContextHelper {
|
||||
public:
|
||||
explicit ContextHelper(ErrorReporter* error_reporter,
|
||||
MicroAllocator* allocator)
|
||||
: allocator_(allocator), error_reporter_(error_reporter) {}
|
||||
|
||||
static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
|
||||
void** ptr);
|
||||
MicroAllocator* allocator, const Model* model);
|
||||
|
||||
// Functions that will be assigned to function pointers on TfLiteContext:
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
|
||||
size_t bytes,
|
||||
int* buffer_idx);
|
||||
|
||||
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
|
||||
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
|
||||
void SetNodeIndex(int idx) { current_node_idx_ = idx; }
|
||||
// Sets the current node index to assist with scratch buffer allocations:
|
||||
void SetNodeIndex(int idx);
|
||||
|
||||
// Sets the pointer to a list of TfLiteEvalTensor instances.
|
||||
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
|
||||
|
||||
private:
|
||||
MicroAllocator* allocator_;
|
||||
ErrorReporter* error_reporter_;
|
||||
const Model* model_;
|
||||
TfLiteEvalTensor* eval_tensors_;
|
||||
int current_node_idx_ = -1;
|
||||
};
|
||||
|
||||
@ -60,17 +72,26 @@ class ContextHelper {
|
||||
|
||||
class MicroInterpreter {
|
||||
public:
|
||||
// The lifetime of the model, op resolver, tensor arena, and error reporter
|
||||
// must be at least as long as that of the interpreter object, since the
|
||||
// interpreter may need to access them at any time. This means that you should
|
||||
// usually create them with the same scope as each other, for example having
|
||||
// them all allocated on the stack as local variables through a top-level
|
||||
// function.
|
||||
// The interpreter doesn't do any deallocation of any of the pointed-to
|
||||
// objects, ownership remains with the caller.
|
||||
MicroInterpreter(const Model* model, const OpResolver& op_resolver,
|
||||
// The lifetime of the model, op resolver, tensor arena, error reporter and
|
||||
// profiler must be at least as long as that of the interpreter object, since
|
||||
// the interpreter may need to access them at any time. This means that you
|
||||
// should usually create them with the same scope as each other, for example
|
||||
// having them all allocated on the stack as local variables through a
|
||||
// top-level function. The interpreter doesn't do any deallocation of any of
|
||||
// the pointed-to objects, ownership remains with the caller.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
uint8_t* tensor_arena, size_t tensor_arena_size,
|
||||
ErrorReporter* error_reporter);
|
||||
ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
// Create an interpreter instance using an existing MicroAllocator instance.
|
||||
// This constructor should be used when creating an allocator that needs to
|
||||
// have allocation handled in more than one interpreter or for recording
|
||||
// allocations inside the interpreter. The lifetime of the allocator must be
|
||||
// as long as that of the interpreter object.
|
||||
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
|
||||
MicroAllocator* allocator, ErrorReporter* error_reporter,
|
||||
tflite::Profiler* profiler = nullptr);
|
||||
|
||||
~MicroInterpreter();
|
||||
|
||||
@ -147,8 +168,16 @@ class MicroInterpreter {
|
||||
// arena_used_bytes() + 16.
|
||||
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
|
||||
|
||||
protected:
|
||||
const MicroAllocator& allocator() const { return allocator_; }
|
||||
const TfLiteContext& context() const { return context_; }
|
||||
|
||||
private:
|
||||
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
|
||||
// TODO(b/158263161): Consider switching to Create() function to enable better
|
||||
// error reporting during initialization.
|
||||
void Init(tflite::Profiler* profiler);
|
||||
|
||||
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
|
||||
|
||||
template <class T>
|
||||
void CorrectTensorDataEndianness(T* data, int32_t size);
|
||||
@ -156,16 +185,22 @@ class MicroInterpreter {
|
||||
NodeAndRegistration* node_and_registrations_ = nullptr;
|
||||
|
||||
const Model* model_;
|
||||
const OpResolver& op_resolver_;
|
||||
const MicroOpResolver& op_resolver_;
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext context_ = {};
|
||||
MicroAllocator allocator_;
|
||||
MicroAllocator& allocator_;
|
||||
bool tensors_allocated_;
|
||||
|
||||
TfLiteStatus initialization_status_;
|
||||
|
||||
const SubGraph* subgraph_;
|
||||
TfLiteEvalTensor* eval_tensors_;
|
||||
internal::ContextHelper context_helper_;
|
||||
|
||||
// TODO(b/160894903): Clean these pointers up when all APIs are updated to new
|
||||
// TfLiteEvalTensor buffers.
|
||||
TfLiteTensor* input_tensor_;
|
||||
TfLiteTensor* output_tensor_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -16,110 +16,441 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#ifndef TFLITE_REGISTRATIONS_MAX
|
||||
#define TFLITE_REGISTRATIONS_MAX (128)
|
||||
#endif
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Op versions discussed in this file are enumerated here:
|
||||
// tensorflow/lite/tools/versioning/op_version.cc
|
||||
|
||||
inline int MicroOpResolverAnyVersion() { return 0; }
|
||||
|
||||
template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
|
||||
class MicroOpResolver : public OpResolver {
|
||||
template <unsigned int tOpCount>
|
||||
class MicroMutableOpResolver : public MicroOpResolver {
|
||||
public:
|
||||
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
|
||||
int version) const override {
|
||||
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
|
||||
: error_reporter_(error_reporter) {}
|
||||
|
||||
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
|
||||
if (op == BuiltinOperator_CUSTOM) return nullptr;
|
||||
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if ((registration.builtin_code == op) &&
|
||||
(registration.version == MicroOpResolverAnyVersion() ||
|
||||
version == MicroOpResolverAnyVersion() ||
|
||||
registration.version == version)) {
|
||||
if (registration.builtin_code == op) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TfLiteRegistration* FindOp(const char* op, int version) const override {
|
||||
const TfLiteRegistration* FindOp(const char* op) const override {
|
||||
for (unsigned int i = 0; i < registrations_len_; ++i) {
|
||||
const TfLiteRegistration& registration = registrations_[i];
|
||||
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
|
||||
(strcmp(registration.custom_name, op) == 0) &&
|
||||
(registration.version == MicroOpResolverAnyVersion() ||
|
||||
version == MicroOpResolverAnyVersion() ||
|
||||
registration.version == version)) {
|
||||
(strcmp(registration.custom_name, op) == 0)) {
|
||||
return ®istration;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
|
||||
int version = 1) {
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
// TODO(b/147748244) - Add error reporting hooks so we can report this!
|
||||
return;
|
||||
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
|
||||
BuiltinOperator op) const override {
|
||||
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
|
||||
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
|
||||
if (builtin_codes_[i] == op) return builtin_parsers_[i];
|
||||
}
|
||||
TfLiteRegistration* new_registration = ®istrations_[registrations_len_];
|
||||
registrations_len_ += 1;
|
||||
|
||||
*new_registration = *registration;
|
||||
new_registration->builtin_code = op;
|
||||
new_registration->version = version;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
|
||||
int min_version, int max_version) {
|
||||
for (int version = min_version; version <= max_version; ++version) {
|
||||
AddBuiltin(op, registration, version);
|
||||
}
|
||||
}
|
||||
|
||||
void AddCustom(const char* name, TfLiteRegistration* registration,
|
||||
int version = 1) {
|
||||
// Registers a Custom Operator with the MicroOpResolver.
|
||||
//
|
||||
// Only the first call for a given name will be successful. i.e. if this
|
||||
// function is called again for a previously added Custom Operator, the
|
||||
// MicroOpResolver will be unchanged and this function will return
|
||||
// kTfLiteError.
|
||||
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
// TODO(b/147748244) - Add error reporting hooks so we can report this!
|
||||
return;
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Couldn't register custom op '%s', resolver size is too small (%d)",
|
||||
name, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(name) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddCustom for the same op more than once "
|
||||
"is not supported (Op: %s).",
|
||||
name);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TfLiteRegistration* new_registration = ®istrations_[registrations_len_];
|
||||
registrations_len_ += 1;
|
||||
|
||||
*new_registration = *registration;
|
||||
new_registration->builtin_code = BuiltinOperator_CUSTOM;
|
||||
new_registration->custom_name = name;
|
||||
new_registration->version = version;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AddCustom(const char* name, TfLiteRegistration* registration,
|
||||
int min_version, int max_version) {
|
||||
for (int version = min_version; version <= max_version; ++version) {
|
||||
AddCustom(name, registration, version);
|
||||
}
|
||||
// The Add* functions below add the various Builtin operators to the
|
||||
// MicroMutableOpResolver object.
|
||||
|
||||
TfLiteStatus AddAbs() {
|
||||
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
|
||||
ParseAbs);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAdd() {
|
||||
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
|
||||
ParseAdd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMax() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MAX,
|
||||
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddArgMin() {
|
||||
return AddBuiltin(BuiltinOperator_ARG_MIN,
|
||||
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddAveragePool2D() {
|
||||
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
|
||||
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
|
||||
ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCeil() {
|
||||
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
|
||||
ParseCeil);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCircularBuffer() {
|
||||
return AddCustom("CIRCULAR_BUFFER",
|
||||
tflite::ops::micro::Register_CIRCULAR_BUFFER());
|
||||
}
|
||||
|
||||
TfLiteStatus AddConcatenation() {
|
||||
return AddBuiltin(BuiltinOperator_CONCATENATION,
|
||||
tflite::ops::micro::Register_CONCATENATION(),
|
||||
ParseConcatenation);
|
||||
}
|
||||
|
||||
TfLiteStatus AddConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_CONV_2D,
|
||||
tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddCos() {
|
||||
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
|
||||
ParseCos);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDepthwiseConv2D() {
|
||||
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
|
||||
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
|
||||
ParseDepthwiseConv2D);
|
||||
}
|
||||
|
||||
TfLiteStatus AddDequantize() {
|
||||
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
|
||||
tflite::ops::micro::Register_DEQUANTIZE(),
|
||||
ParseDequantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddEqual() {
|
||||
return AddBuiltin(BuiltinOperator_EQUAL,
|
||||
tflite::ops::micro::Register_EQUAL(), ParseEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFloor() {
|
||||
return AddBuiltin(BuiltinOperator_FLOOR,
|
||||
tflite::ops::micro::Register_FLOOR(), ParseFloor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddFullyConnected() {
|
||||
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
|
||||
tflite::ops::micro::Register_FULLY_CONNECTED(),
|
||||
ParseFullyConnected);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreater() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER,
|
||||
tflite::ops::micro::Register_GREATER(), ParseGreater);
|
||||
}
|
||||
|
||||
TfLiteStatus AddGreaterEqual() {
|
||||
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
|
||||
tflite::ops::micro::Register_GREATER_EQUAL(),
|
||||
ParseGreaterEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddHardSwish() {
|
||||
return AddBuiltin(BuiltinOperator_HARD_SWISH,
|
||||
tflite::ops::micro::Register_HARD_SWISH(),
|
||||
ParseHardSwish);
|
||||
}
|
||||
|
||||
TfLiteStatus AddL2Normalization() {
|
||||
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
|
||||
tflite::ops::micro::Register_L2_NORMALIZATION(),
|
||||
ParseL2Normalization);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLess() {
|
||||
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
|
||||
ParseLess);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLessEqual() {
|
||||
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
|
||||
tflite::ops::micro::Register_LESS_EQUAL(),
|
||||
ParseLessEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLog() {
|
||||
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
|
||||
ParseLog);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalAnd() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
|
||||
tflite::ops::micro::Register_LOGICAL_AND(),
|
||||
ParseLogicalAnd);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalNot() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
|
||||
tflite::ops::micro::Register_LOGICAL_NOT(),
|
||||
ParseLogicalNot);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogicalOr() {
|
||||
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
|
||||
tflite::ops::micro::Register_LOGICAL_OR(),
|
||||
ParseLogicalOr);
|
||||
}
|
||||
|
||||
TfLiteStatus AddLogistic() {
|
||||
return AddBuiltin(BuiltinOperator_LOGISTIC,
|
||||
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaximum() {
|
||||
return AddBuiltin(BuiltinOperator_MAXIMUM,
|
||||
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMaxPool2D() {
|
||||
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
|
||||
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMean() {
|
||||
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
|
||||
ParseReducer);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMinimum() {
|
||||
return AddBuiltin(BuiltinOperator_MINIMUM,
|
||||
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
|
||||
}
|
||||
|
||||
TfLiteStatus AddMul() {
|
||||
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
|
||||
ParseMul);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNeg() {
|
||||
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
|
||||
ParseNeg);
|
||||
}
|
||||
|
||||
TfLiteStatus AddNotEqual() {
|
||||
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
|
||||
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPack() {
|
||||
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
|
||||
ParsePack);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPad() {
|
||||
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
|
||||
ParsePad);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPadV2() {
|
||||
return AddBuiltin(BuiltinOperator_PADV2,
|
||||
tflite::ops::micro::Register_PADV2(), ParsePadV2);
|
||||
}
|
||||
|
||||
TfLiteStatus AddPrelu() {
|
||||
return AddBuiltin(BuiltinOperator_PRELU,
|
||||
tflite::ops::micro::Register_PRELU(), ParsePrelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddQuantize() {
|
||||
return AddBuiltin(BuiltinOperator_QUANTIZE,
|
||||
tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu() {
|
||||
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
|
||||
ParseRelu);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRelu6() {
|
||||
return AddBuiltin(BuiltinOperator_RELU6,
|
||||
tflite::ops::micro::Register_RELU6(), ParseRelu6);
|
||||
}
|
||||
|
||||
TfLiteStatus AddReshape() {
|
||||
return AddBuiltin(BuiltinOperator_RESHAPE,
|
||||
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
|
||||
}
|
||||
|
||||
TfLiteStatus AddResizeNearestNeighbor() {
|
||||
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
ParseResizeNearestNeighbor);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRound() {
|
||||
return AddBuiltin(BuiltinOperator_ROUND,
|
||||
tflite::ops::micro::Register_ROUND(), ParseRound);
|
||||
}
|
||||
|
||||
TfLiteStatus AddRsqrt() {
|
||||
return AddBuiltin(BuiltinOperator_RSQRT,
|
||||
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSin() {
|
||||
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
|
||||
ParseSin);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSoftmax() {
|
||||
return AddBuiltin(BuiltinOperator_SOFTMAX,
|
||||
tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSplit() {
|
||||
return AddBuiltin(BuiltinOperator_SPLIT,
|
||||
tflite::ops::micro::Register_SPLIT(), ParseSplit);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSqrt() {
|
||||
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
|
||||
ParseSqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSquare() {
|
||||
return AddBuiltin(BuiltinOperator_SQUARE,
|
||||
tflite::ops::micro::Register_SQUARE(), ParseSquare);
|
||||
}
|
||||
|
||||
TfLiteStatus AddStridedSlice() {
|
||||
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
|
||||
tflite::ops::micro::Register_STRIDED_SLICE(),
|
||||
ParseStridedSlice);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSub() {
|
||||
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
|
||||
ParseSub);
|
||||
}
|
||||
|
||||
TfLiteStatus AddSvdf() {
|
||||
return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
|
||||
ParseSvdf);
|
||||
}
|
||||
|
||||
TfLiteStatus AddTanh() {
|
||||
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
|
||||
ParseTanh);
|
||||
}
|
||||
|
||||
TfLiteStatus AddUnpack() {
|
||||
return AddBuiltin(BuiltinOperator_UNPACK,
|
||||
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
|
||||
}
|
||||
|
||||
unsigned int GetRegistrationLength() { return registrations_len_; }
|
||||
|
||||
private:
|
||||
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
|
||||
const TfLiteRegistration& registration,
|
||||
MicroOpResolver::BuiltinParseFunction parser) {
|
||||
if (op == BuiltinOperator_CUSTOM) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Invalid parameter BuiltinOperator_CUSTOM to the "
|
||||
"AddBuiltin function.");
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (FindOp(op) != nullptr) {
|
||||
if (error_reporter_ != nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Calling AddBuiltin with the same op more than "
|
||||
"once is not supported (Op: #%d).",
|
||||
op);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (registrations_len_ >= tOpCount) {
|
||||
if (error_reporter_) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Couldn't register builtin op #%d, resolver size "
|
||||
"is too small (%d).",
|
||||
op, tOpCount);
|
||||
}
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
registrations_[registrations_len_] = registration;
|
||||
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
|
||||
// it in regardless.
|
||||
registrations_[registrations_len_].builtin_code = op;
|
||||
registrations_len_++;
|
||||
|
||||
builtin_codes_[num_buitin_ops_] = op;
|
||||
builtin_parsers_[num_buitin_ops_] = parser;
|
||||
num_buitin_ops_++;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration registrations_[tOpCount];
|
||||
unsigned int registrations_len_ = 0;
|
||||
public:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
// TODO(b/147854028): Consider switching all uses of MicroMutableOpResolver to
|
||||
// MicroOpResolver.
|
||||
class MicroMutableOpResolver
|
||||
: public MicroOpResolver<TFLITE_REGISTRATIONS_MAX> {
|
||||
private:
|
||||
// Arrays (and counter) to store the builtin codes and their corresponding
|
||||
// parse functions as these are registered with the Op Resolver.
|
||||
BuiltinOperator builtin_codes_[tOpCount];
|
||||
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
|
||||
unsigned int num_buitin_ops_ = 0;
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
public:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
|
||||
@ -20,8 +20,18 @@ limitations under the License.
|
||||
#endif
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
@ -75,6 +85,8 @@ const char* TensorTypeName(TfLiteType type) {
|
||||
return "kTfLiteInt16";
|
||||
case kTfLiteComplex64:
|
||||
return "kTfLiteComplex64";
|
||||
case kTfLiteComplex128:
|
||||
return "kTfLiteComplex128";
|
||||
case kTfLiteFloat16:
|
||||
return "kTfLiteFloat16";
|
||||
case kTfLiteFloat64:
|
||||
@ -95,11 +107,44 @@ const char* AllocTypeName(TfLiteAllocationType type) {
|
||||
return "kTfLiteArenaRw";
|
||||
case kTfLiteArenaRwPersistent:
|
||||
return "kTfLiteArenaRwPersistent";
|
||||
case kTfLitePersistentRo:
|
||||
return "kTfLitePersistentRo";
|
||||
case kTfLiteCustom:
|
||||
return "kTfLiteCustom";
|
||||
}
|
||||
return "(invalid)";
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Helper function to print model flatbuffer data. This function is not called
|
||||
// by default. Hence it's not linked in to the final binary code.
|
||||
void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
auto* subgraphs = model->subgraphs();
|
||||
const SubGraph* subgraph = (*subgraphs)[0];
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
|
||||
subgraph->tensors();
|
||||
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
|
||||
model->buffers();
|
||||
TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
|
||||
for (size_t i = 0; i < tensors->size(); ++i) {
|
||||
const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
|
||||
size_t type_size, tensor_size;
|
||||
auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
|
||||
auto* array = buffer->data();
|
||||
int array_size = 0;
|
||||
if (array) {
|
||||
array_size = array->size();
|
||||
}
|
||||
BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
|
||||
error_reporter);
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
|
||||
!array_size && !flatbuffer_tensor.is_variable(), tensor_size);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter) {
|
||||
printf("Interpreter has %zu tensors and %zu nodes\n",
|
||||
@ -113,10 +158,9 @@ void PrintInterpreterState(MicroInterpreter* interpreter) {
|
||||
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
|
||||
tensor_index++) {
|
||||
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
|
||||
printf("Tensor %3zu %-20s %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
|
||||
tensor->name, TensorTypeName(tensor->type),
|
||||
AllocTypeName(tensor->allocation_type), tensor->bytes,
|
||||
static_cast<double>(tensor->bytes / (1 << 20)));
|
||||
printf("Tensor %3zu %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
|
||||
TensorTypeName(tensor->type), AllocTypeName(tensor->allocation_type),
|
||||
tensor->bytes, static_cast<double>(tensor->bytes / (1 << 20)));
|
||||
PrintTfLiteIntVector(tensor->dims);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
@ -20,6 +20,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
|
||||
namespace tflite {
|
||||
// Helper function to print model flatbuffer data. This function is not called
|
||||
// by default. Hence it's not linked in to the final binary code.
|
||||
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter);
|
||||
} // namespace tflite
|
||||
|
||||
@ -23,6 +23,7 @@ limitations under the License.
|
||||
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
namespace {
|
||||
|
||||
@ -125,7 +126,8 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
|
||||
const int32_t exponent_shift = 23;
|
||||
const int32_t exponent_bias = 127;
|
||||
const uint32_t fraction_mask = 0x007fffff;
|
||||
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
|
||||
uint32_t u;
|
||||
memcpy(&u, &f, sizeof(int32_t));
|
||||
const int32_t exponent =
|
||||
((u & exponent_mask) >> exponent_shift) - exponent_bias;
|
||||
const uint32_t fraction = (u & fraction_mask);
|
||||
@ -163,7 +165,47 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
|
||||
*current = '.';
|
||||
current += 1;
|
||||
*current = 0;
|
||||
|
||||
// Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate
|
||||
// zeros off the end of the fraction. Every fractional value takes 7 bytes.
|
||||
// For example, 2500 would be written into the buffer as 0002500 since it
|
||||
// represents .00025.
|
||||
constexpr int kMaxFractionalDigits = 7;
|
||||
|
||||
// Abort early if there is not enough space in the buffer.
|
||||
if (current_end - current <= kMaxFractionalDigits) {
|
||||
return current;
|
||||
}
|
||||
|
||||
// Pre-fill buffer with zeros to ensure zero-truncation works properly.
|
||||
for (int i = 1; i < kMaxFractionalDigits; i++) {
|
||||
*(current + i) = '0';
|
||||
}
|
||||
|
||||
// Track how large the fraction is to add leading zeros.
|
||||
char* previous = current;
|
||||
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
|
||||
int fraction_digits = current - previous;
|
||||
int leading_zeros = kMaxFractionalDigits - fraction_digits;
|
||||
|
||||
// Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion
|
||||
// works properly.
|
||||
*current = '0';
|
||||
|
||||
// Shift fraction values and prepent zeros.
|
||||
for (int i = 0; i < fraction_digits; i++) {
|
||||
current--;
|
||||
*(current + leading_zeros) = *current;
|
||||
*current = '0';
|
||||
}
|
||||
current += kMaxFractionalDigits;
|
||||
|
||||
// Truncate trailing zeros for cleaner logs. Ensure we leave at least one
|
||||
// fractional character for the case when scaled_fraction is 0.
|
||||
while (*(current - 1) == '0' && (current - 1) > previous) {
|
||||
current--;
|
||||
}
|
||||
*current = 0;
|
||||
current = StrCatStr(current, (current_end - current), "*2^");
|
||||
current = StrCatInt32(current, (current_end - current), exponent);
|
||||
return current;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user