update to new tensorflow

This commit is contained in:
Gordon Williams 2020-08-20 09:53:40 +01:00
parent 7ac9ed85f7
commit b216c275aa
110 changed files with 7905 additions and 4007 deletions

View File

@ -99,15 +99,16 @@ void *jswrap_tfmicrointerpreter_getTFMI(JsVar *parent) {
return tfPtr;
}
JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor *tensor) {
JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, bool isInput) {
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
JsVar *mi = jsvObjectGetChild(parent, "mi", 0);
if (!tensor && !mi) {
tf_tensorfinfo tensor = tf_get(tfmi, isInput);
if (!tensor.data || !mi) {
jsExceptionHere(JSET_ERROR, "Unable to get tensor");
return 0;
}
JsVarDataArrayBufferViewType abType = ARRAYBUFFERVIEW_UNDEFINED;
switch (tensor->type) {
switch (tensor.type) {
case kTfLiteFloat32 :
abType = ARRAYBUFFERVIEW_FLOAT32; break;
case kTfLiteInt32 :
@ -119,12 +120,12 @@ JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor
case kTfLiteInt8 :
abType = ARRAYBUFFERVIEW_INT8; break;
default:
jsExceptionHere(JSET_TYPEERROR, "Unsupported Tensor format TfLiteType:%d", tensor->type);
jsExceptionHere(JSET_TYPEERROR, "Unsupported Tensor format TfLiteType:%d", tensor.type);
return 0;
}
JsVar *ab = jsvNewArrayBufferFromString(mi,0);
JsVar *b = jswrap_typedarray_constructor(abType, ab, ((size_t)&tensor->data.f[0])-(size_t)tfmi, tensor->bytes / JSV_ARRAYBUFFER_GET_SIZE(abType));
JsVar *b = jswrap_typedarray_constructor(abType, ab, ((size_t)tensor.data)-(size_t)tfmi, tensor.bytes / JSV_ARRAYBUFFER_GET_SIZE(abType));
jsvUnLock2(ab,mi);
return b;
}
@ -138,9 +139,7 @@ JsVar *jswrap_tfmicrointerpreter_tensorToArrayBuffer(JsVar *parent, TfLiteTensor
}
*/
JsVar *jswrap_tfmicrointerpreter_getInput(JsVar *parent) {
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
if (!tfmi) return 0;
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, tf_get_input(tfmi, 0));
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, true);
}
/*JSON{
"type" : "method",
@ -152,9 +151,7 @@ JsVar *jswrap_tfmicrointerpreter_getInput(JsVar *parent) {
}
*/
JsVar *jswrap_tfmicrointerpreter_getOutput(JsVar *parent) {
void *tfmi = jswrap_tfmicrointerpreter_getTFMI(parent);
if (!tfmi) return 0;
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, tf_get_output(tfmi, 0));
return jswrap_tfmicrointerpreter_tensorToArrayBuffer(parent, false);
}
/*JSON{
"type" : "method",

View File

@ -53,7 +53,7 @@ typedef struct {
tflite::ops::micro::AllOpsResolver resolver;
#else
#define TENSORFLOW_OP_COUNT 6
tflite::MicroOpResolver<TENSORFLOW_OP_COUNT> resolver;
tflite::MicroMutableOpResolver<TENSORFLOW_OP_COUNT> resolver;
#endif
// Build an interpreter to run the model with
tflite::MicroInterpreter interpreter;
@ -88,19 +88,13 @@ bool tf_create(void *dataPtr, size_t arena_size, const char *model_data) {
new (&tf->resolver)tflite::ops::micro::AllOpsResolver();
#else
// Pull in only the operation implementations we need.
new (&tf->resolver)tflite::MicroOpResolver<TENSORFLOW_OP_COUNT>();
tf->resolver.AddBuiltin( tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(), tflite::MicroOpResolverAnyVersion());
tf->resolver.AddBuiltin( tflite::BuiltinOperator_CONV_2D,
tflite::ops::micro::Register_CONV_2D(), tflite::MicroOpResolverAnyVersion());
tf->resolver.AddBuiltin( tflite::BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(), tflite::MicroOpResolverAnyVersion());
tf->resolver.AddBuiltin( tflite::BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), tflite::MicroOpResolverAnyVersion());
tf->resolver.AddBuiltin( tflite::BuiltinOperator_FULLY_CONNECTED,
tflite::ops::micro::Register_FULLY_CONNECTED(), tflite::MicroOpResolverAnyVersion());
tf->resolver.AddBuiltin( tflite::BuiltinOperator_SOFTMAX,
tflite::ops::micro::Register_SOFTMAX(), tflite::MicroOpResolverAnyVersion());
new (&tf->resolver)tflite::MicroMutableOpResolver<TENSORFLOW_OP_COUNT>();
tf->resolver.AddDepthwiseConv2D();
tf->resolver.AddConv2D();
tf->resolver.AddAveragePool2D();
tf->resolver.AddMaxPool2D();
tf->resolver.AddFullyConnected();
tf->resolver.AddSoftmax();
#endif
// Build an interpreter to run the model with
@ -133,16 +127,17 @@ bool tf_invoke(void *dataPtr) {
return true;
}
TfLiteTensor *tf_get_input(void *dataPtr, int n) {
tf_tensorfinfo tf_get(void *dataPtr, bool isInput) {
TFData *tf = (TFData*)dataPtr;
// Obtain pointers to the model's input and output tensors
return tf->interpreter.input(0);
}
TfLiteTensor *tf_get_output(void *dataPtr, int n) {
TFData *tf = (TFData*)dataPtr;
// Obtain pointers to the model's input and output tensors
return tf->interpreter.output(0);
tf_tensorfinfo inf;
inf.data = 0;
TfLiteTensor *tensor = isInput ? tf->interpreter.input(0) : tf->interpreter.output(0);
if (tensor) {
inf.type = tensor->type;
inf.data = &tensor->data.f[0];
inf.bytes = tensor->bytes;
}
return inf;
}
} // extern "C"

View File

@ -1,8 +1,14 @@
#include "tensorflow/lite/c/common.h"
typedef struct {
void *data; // pointer to data
TfLiteType type; // data type
int bytes; // how big is the tensor
} tf_tensorfinfo;
size_t tf_get_size(size_t arena_size, const char *model_data);
bool tf_create(void *dataPtr, size_t arena_size, const char *model_data);
void tf_destroy(void *dataPtr);
bool tf_invoke(void *dataPtr);
TfLiteTensor *tf_get_input(void *dataPtr, int n);
TfLiteTensor *tf_get_output(void *dataPtr, int n);
tf_tensorfinfo tf_get(void *dataPtr, bool isInput);

View File

@ -21,7 +21,7 @@ limitations under the License.
// Also update tensorflow/tensorflow.bzl and
// tensorflow/tools/pip_package/setup.py
#define TF_MAJOR_VERSION 2
#define TF_MINOR_VERSION 1
#define TF_MINOR_VERSION 4
#define TF_PATCH_VERSION 0
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
@ -108,7 +108,7 @@ limitations under the License.
#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
#define TF_GRAPH_DEF_VERSION 398 // Updated: 2020/5/11
#define TF_GRAPH_DEF_VERSION 498 // Updated: 2020/8/19
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
//

View File

@ -67,8 +67,9 @@ typedef struct {
typedef enum {
kTfLiteActNone = 0,
kTfLiteActRelu,
kTfLiteActRelu1, // min(max(-1, x), 1)
kTfLiteActRelu6, // min(max(0, x), 6)
kTfLiteActReluN1To1, // min(max(-1, x), 1)
kTfLiteActRelu1 = kTfLiteActReluN1To1, // kTfLiteActRelu1 will be deprecated.
kTfLiteActRelu6, // min(max(0, x), 6)
kTfLiteActTanh,
kTfLiteActSignBit,
kTfLiteActSigmoid,
@ -198,6 +199,8 @@ typedef struct {
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 4.
bool pot_scale_int16;
} TfLiteAddParams;
typedef struct {
@ -219,6 +222,8 @@ typedef struct {
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 5.
bool pot_scale_int16;
} TfLiteSubParams;
typedef struct {

View File

@ -79,7 +79,8 @@ TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
void TfLiteTensorDataFree(TfLiteTensor* t) {
if (t->allocation_type == kTfLiteDynamic) {
if (t->allocation_type == kTfLiteDynamic ||
t->allocation_type == kTfLitePersistentRo) {
free(t->data.raw);
}
t->data.raw = NULL;
@ -172,7 +173,8 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
}
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
if (tensor->allocation_type != kTfLiteDynamic) {
if (tensor->allocation_type != kTfLiteDynamic &&
tensor->allocation_type != kTfLitePersistentRo) {
return;
}
// TODO(b/145340303): Tensor data should be aligned.
@ -205,6 +207,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "BOOL";
case kTfLiteComplex64:
return "COMPLEX64";
case kTfLiteComplex128:
return "COMPLEX128";
case kTfLiteString:
return "STRING";
case kTfLiteFloat16:

View File

@ -47,7 +47,8 @@ extern "C" {
typedef enum TfLiteStatus {
kTfLiteOk = 0,
kTfLiteError = 1,
kTfLiteDelegateError = 2
kTfLiteDelegateError = 2,
kTfLiteApplicationError = 3
} TfLiteStatus;
// The list of external context types known to TF Lite. This list exists solely
@ -58,7 +59,7 @@ typedef enum TfLiteExternalContextType {
kTfLiteEigenContext = 0, // include eigen_support.h to use.
kTfLiteGemmLowpContext = 1, // include gemm_support.h to use.
kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support.
kTfLiteCpuBackendContext = 3, // include cpu_backend_support.h to use.
kTfLiteCpuBackendContext = 3, // include cpu_backend_context.h to use.
kTfLiteMaxExternalContexts = 4
} TfLiteExternalContextType;
@ -86,8 +87,9 @@ typedef struct TfLiteIntArray {
int size;
// gcc 6.1+ have a bug where flexible members aren't properly handled
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
__GNUC_MINOR__ >= 1
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
__GNUC_MINOR__ >= 1) || \
defined(HEXAGON)
int data[0];
#else
int data[];
@ -125,6 +127,7 @@ typedef struct TfLiteFloatArray {
int size;
// gcc 6.1+ have a bug where flexible members aren't properly handled
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
__GNUC_MINOR__ >= 1
float data[0];
@ -203,6 +206,7 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
// the current function, while also reporting the location of the error.
// `a` and `b` may be evaluated more than once, so no side effects or
// extremely expensive computations should be done.
// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
#define TF_LITE_ENSURE_EQ(context, a, b) \
do { \
if ((a) != (b)) { \
@ -230,11 +234,32 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
} \
} while (0)
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
// library.
#ifdef SWIG
#define TFL_CAPI_EXPORT
#else
#if defined(_WIN32)
#ifdef TFL_COMPILE_LIBRARY
#define TFL_CAPI_EXPORT __declspec(dllexport)
#else
#define TFL_CAPI_EXPORT __declspec(dllimport)
#endif // TFL_COMPILE_LIBRARY
#else
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
#endif // _WIN32
#endif // SWIG
// Single-precision complex data type compatible with the C99 definition.
typedef struct TfLiteComplex64 {
float re, im; // real and imaginary parts, respectively.
} TfLiteComplex64;
// Double-precision complex data type compatible with the C99 definition.
typedef struct TfLiteComplex128 {
double re, im; // real and imaginary parts, respectively.
} TfLiteComplex128;
// Half precision data type compatible with the C99 definition.
typedef struct TfLiteFloat16 {
uint16_t data;
@ -254,6 +279,7 @@ typedef enum {
kTfLiteInt8 = 9,
kTfLiteFloat16 = 10,
kTfLiteFloat64 = 11,
kTfLiteComplex128 = 12,
} TfLiteType;
// Return the name of a given type, for error reporting purposes.
@ -310,26 +336,39 @@ typedef union TfLitePtrUnion {
int64_t* i64;
float* f;
TfLiteFloat16* f16;
double* f64;
char* raw;
const char* raw_const;
uint8_t* uint8;
bool* b;
int16_t* i16;
TfLiteComplex64* c64;
TfLiteComplex128* c128;
int8_t* int8;
/* Only use this member. */
void* data;
} TfLitePtrUnion;
// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
// data (or data externally allocated). kTfLiteArenaRw is arena allocated
// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
// Memory allocation strategies.
// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
// and available during eval.
// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
// only available during eval.
// * kTfLiteDynamic: Allocated during eval, or for string tensors.
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
// useful for tensors that can be computed during prepare and treated
// as constant inputs for downstream ops (also in prepare).
// * kTfLiteCustom: Custom memory allocation provided by the user. See
// TfLiteCustomAllocation below.
typedef enum TfLiteAllocationType {
kTfLiteMemNone = 0,
kTfLiteMmapRo,
kTfLiteArenaRw,
kTfLiteArenaRwPersistent,
kTfLiteDynamic,
kTfLitePersistentRo,
kTfLiteCustom,
} TfLiteAllocationType;
// The delegates should use zero or positive integers to represent handles.
@ -362,8 +401,18 @@ typedef struct TfLiteSparsity {
int dim_metadata_size;
} TfLiteSparsity;
// Defines a custom memory allocation not owned by the runtime.
// `data` should be aligned to kDefaultTensorAlignment defined in
// lite/util.h. (Currently 64 bytes)
// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
typedef struct TfLiteCustomAllocation {
void* data;
size_t bytes;
} TfLiteCustomAllocation;
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY
typedef struct TfLiteTensor {
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
@ -429,31 +478,6 @@ typedef struct TfLiteTensor {
const TfLiteIntArray* dims_signature;
} TfLiteTensor;
#ifndef TF_LITE_STATIC_MEMORY
// Free data memory of tensor `t`.
void TfLiteTensorDataFree(TfLiteTensor* t);
// Free quantization data.
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
// Free sparsity parameters.
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
// Free memory of tensor `t`.
void TfLiteTensorFree(TfLiteTensor* t);
// Set all of a tensor's fields (and free any previously allocated data).
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
TfLiteQuantizationParams quantization, char* buffer,
size_t size, TfLiteAllocationType allocation_type,
const void* allocation, bool is_variable,
TfLiteTensor* tensor);
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
// types other than kTfLiteDynamic will be ignored.
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
#endif // TF_LITE_STATIC_MEMORY
// A structure representing an instance of a node.
// This structure only exhibits the inputs, outputs and user defined data, not
// other features like the type.
@ -490,6 +514,130 @@ typedef struct TfLiteNode {
// WARNING: This is an experimental interface that is subject to change.
struct TfLiteDelegate* delegate;
} TfLiteNode;
#else // defined(TF_LITE_STATIC_MEMORY)?
// NOTE: This flag is opt-in only at compile time.
//
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
// contains only the minimum fields required to initialize and prepare a micro
// inference graph. The fields in this struct have been ordered from
// largest-to-smallest for optimal struct sizeof.
//
// This struct does not use:
// - allocation
// - buffer_handle
// - data_is_stale
// - delegate
// - dims_signature
// - name
// - sparsity
typedef struct TfLiteTensor {
// TODO(b/155784997): Consider consolidating these quantization fields:
// Quantization information. Replaces params field above.
TfLiteQuantization quantization;
// Quantization information.
TfLiteQuantizationParams params;
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have. NOTE: the product of elements of `dims`
// and the element datatype size should be equal to `bytes` below.
TfLiteIntArray* dims;
// The number of bytes required to store the data of this Tensor. I.e.
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
// type is kTfLiteFloat32 and dims = {3, 2} then
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
size_t bytes;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
// How memory is mapped
// kTfLiteMmapRo: Memory mapped read only.
// i.e. weights
// kTfLiteArenaRw: Arena allocated read write memory
// (i.e. temporaries, outputs).
TfLiteAllocationType allocation_type;
// True if the tensor is a variable.
bool is_variable;
} TfLiteTensor;
// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
// only the minimum fields required to represent a node.
//
// This struct does not use:
// - delegate
// - intermediates
// - temporaries
typedef struct TfLiteNode {
// Inputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* inputs;
// Outputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* outputs;
// Opaque data provided by the node implementer through `Registration.init`.
void* user_data;
// Opaque data provided to the node if the node is a builtin. This is usually
// a structure defined in builtin_op_data.h
void* builtin_data;
// Custom initial data. This is the opaque data provided in the flatbuffer.
// WARNING: This is an experimental interface that is subject to change.
const void* custom_initial_data;
int custom_initial_data_size;
} TfLiteNode;
#endif // TF_LITE_STATIC_MEMORY
// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
// of information required for a kernel to run during TfLiteRegistration::Eval.
// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
// builds with this flag by default internally.
typedef struct TfLiteEvalTensor {
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have.
TfLiteIntArray* dims;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
} TfLiteEvalTensor;
#ifndef TF_LITE_STATIC_MEMORY
// Free data memory of tensor `t`.
void TfLiteTensorDataFree(TfLiteTensor* t);
// Free quantization data.
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
// Free sparsity parameters.
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
// Free memory of tensor `t`.
void TfLiteTensorFree(TfLiteTensor* t);
// Set all of a tensor's fields (and free any previously allocated data).
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
TfLiteQuantizationParams quantization, char* buffer,
size_t size, TfLiteAllocationType allocation_type,
const void* allocation, bool is_variable,
TfLiteTensor* tensor);
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
// types other than kTfLiteDynamic will be ignored.
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
#endif // TF_LITE_STATIC_MEMORY
// WARNING: This is an experimental interface that is subject to change.
//
@ -581,12 +729,11 @@ typedef struct TfLiteContext {
void* profiler;
// Allocate persistent buffer which has the same life time as the interpreter.
// Returns nullptr on failure.
// The memory is allocated from heap for TFL, and from tail in TFLM.
// If *ptr is not nullptr, the pointer will be reallocated.
// This method is only available in Prepare stage.
// This method is only available in Init or Prepare stage.
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx,
size_t bytes, void** ptr);
void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
// Allocate a buffer which will be deallocated right after invoke phase.
// The memory is allocated from heap in TFL, and from volatile arena in TFLM.
@ -641,6 +788,18 @@ typedef struct TfLiteContext {
TfLiteStatus (*PreviewDelegatePartitioning)(
struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
TfLiteDelegateParams** partition_params_array, int* num_partitions);
// Returns a TfLiteTensor struct for a given index.
// WARNING: This is an experimental interface that is subject to change.
// WARNING: This method may not be available on all platforms.
TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
int tensor_idx);
// Returns a TfLiteEvalTensor struct for a given index.
// WARNING: This is an experimental interface that is subject to change.
// WARNING: This method may not be available on all platforms.
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
int tensor_idx);
} TfLiteContext;
typedef struct TfLiteRegistration {
@ -715,7 +874,26 @@ typedef enum TfLiteDelegateFlags {
//
// If the delegate isn't capable to handle dynamic tensors, this flag need
// to be set to false.
kTfLiteDelegateFlagsAllowDynamicTensors = 1
kTfLiteDelegateFlagsAllowDynamicTensors = 1,
// This flag can be used by delegates (that allow dynamic tensors) to ensure
// applicable tensor shapes are automatically propagated in the case of tensor
// resizing.
// This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
// of a delegate kernel will have correct shapes before its Prepare() method
// is called. The runtime leverages TFLite builtin ops in the original
// execution plan to propagate shapes.
//
// A few points to note:
// 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
// false, this one is redundant since the delegate kernels are re-initialized
// every time tensors are resized.
// 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
// work is required to prepare the original execution plan.
// 3. This flag requires that the original execution plan only have ops with
// valid registrations (and not 'dummy' custom ops like with Flex).
// WARNING: This feature is experimental and subject to change.
kTfLiteDelegateFlagsRequirePropagatedShapes = 2
} TfLiteDelegateFlags;
// WARNING: This is an experimental interface that is subject to change.
@ -734,8 +912,9 @@ typedef struct TfLiteDelegate {
struct TfLiteDelegate* delegate);
// Copy the data from delegate buffer handle into raw memory of the given
// 'tensor'. This cannot be null. The delegate is allowed to allocate the raw
// bytes as long as it follows the rules for kTfLiteDynamic tensors.
// 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
// long as it follows the rules for kTfLiteDynamic tensors, in which case this
// cannot be null.
TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
struct TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,

View File

@ -19,9 +19,12 @@ limitations under the License.
// flatbuffer serialization format into in-memory values that are used by the
// runtime API and interpreter.
#include <cstddef>
#include <new>
#include <type_traits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
@ -66,6 +69,185 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
ErrorReporter* error_reporter);
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseConcatenation(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFullyConnected(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseGreaterEqual(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseStridedSlice(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_

View File

@ -15,6 +15,10 @@ limitations under the License.
#include "tensorflow/lite/core/api/op_resolver.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
namespace tflite {
TfLiteStatus GetRegistrationFromOpCode(

View File

@ -15,6 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#include <vector>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -32,6 +34,16 @@ class OpResolver {
/// Finds the op registration of a custom operator by op name.
virtual const TfLiteRegistration* FindOp(const char* op,
int version) const = 0;
// Returns optional delegates for resolving and handling ops in the flatbuffer
// model. This may be used in addition to the standard TfLiteRegistration
// lookup for graph resolution.
using TfLiteDelegatePtrVector =
std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
return TfLiteDelegatePtrVector();
}
virtual ~OpResolver() {}
};

View File

@ -17,6 +17,8 @@ limitations under the License.
#include <string.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {

View File

@ -55,9 +55,12 @@ inline void GetActivationMinMax(FusedActivationFunctionType ac,
}
}
inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
float output_activation_max) {
return std::min(std::max(x, output_activation_min), output_activation_max);
template <typename T>
inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
T output_activation_max) {
using std::max;
using std::min;
return min(max(x, output_activation_min), output_activation_max);
}
// Legacy function, left for compatibility only.
@ -135,23 +138,24 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
#endif
}
inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32 x, int32 quantized_multiplier, int left_shift) {
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
return RoundingDivideByPOT(
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
}
inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
int32 x, int32 quantized_multiplier, int left_shift) {
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
int32_t x, int32_t quantized_multiplier, int left_shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier);
}
inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
int shift) {
inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
int32_t quantized_multiplier,
int shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
int left_shift = shift > 0 ? shift : 0;
@ -161,16 +165,16 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
right_shift);
}
inline int32 MultiplyByQuantizedMultiplier(int64_t x,
int32 quantized_multiplier,
int shift) {
inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
int32_t quantized_multiplier,
int shift) {
// Inputs:
// - quantized_multiplier has fixed point at bit 31
// - shift is -31 to +7 (negative for right shift)
//
// Assumptions: The following input ranges are assumed
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
// - scaling is chosen so final scaled result fits in int32
// - scaling is chosen so final scaled result fits in int32_t
// - input x is in the range -(1<<47) <= x < (1<<47)
assert(quantized_multiplier >= 0);
assert(shift >= -31 && shift < 8);
@ -215,9 +219,9 @@ inline int CountLeadingSignBits(T integer_input) {
using U = typename std::make_unsigned<T>::type;
return integer_input >= 0
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
: integer_input != std::numeric_limits<T>::min()
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
: 0;
: integer_input != std::numeric_limits<T>::min()
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
: 0;
#endif
}
@ -259,7 +263,7 @@ inline void gen_lut(const std::function<double(double)>& func, double min,
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
}
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
@ -410,6 +414,23 @@ SaturatingRoundingMultiplyByPOTParam(
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
}
// Convert int32_t multiplier to int16_t with rounding.
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
int16_t* multiplier_int16_t) {
TFLITE_DCHECK_GE(multiplier_int32_t, 0);
static constexpr int32_t kRoundingOffset = 1 << 15;
if (multiplier_int32_t >=
std::numeric_limits<int32_t>::max() - kRoundingOffset) {
*multiplier_int16_t = std::numeric_limits<int16_t>::max();
return;
}
const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
*multiplier_int16_t = result;
TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
}
// Minimum output bits to accommodate log of maximum input range. It actually
// does not matter if one considers, say, [-64,64] or [-64,64).
//
@ -418,15 +439,13 @@ SaturatingRoundingMultiplyByPOTParam(
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
constexpr int min_log_x_output_bits(int input_bits) {
return input_bits > 90
? 7
: input_bits > 44
? 6
: input_bits > 21
? 5
: input_bits > 10
? 4
: input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
return input_bits > 90 ? 7
: input_bits > 44 ? 6
: input_bits > 21 ? 5
: input_bits > 10 ? 4
: input_bits > 4 ? 3
: input_bits > 1 ? 2
: 1;
}
// Although currently the name of this function says that it cannot handle
@ -434,17 +453,17 @@ constexpr int min_log_x_output_bits(int input_bits) {
// x_max is the largest representable input. In other words, the output range
// is symmetric.
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1_impl(
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
// The reason for accumulating the result with an extra bit of headroom is
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
// recip_denom will otherwise introduce an error.
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
FixedPoint0, 1488522236, std::log(2.0));
@ -472,10 +491,10 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// required shift "ourselves" instead of using, say, Rescale.
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
// z_a_pow_2 = input_integer_bits - z_a_headroom;
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
FixedPoint0 r_a_tmp =
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
const int32 r_a_raw =
const int32_t r_a_raw =
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
@ -487,8 +506,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
FixedPoint0 z_b = z_a * sqrt_half;
int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
const int32 r_b_raw =
int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
const int32_t r_b_raw =
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
@ -516,9 +535,9 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
}
template <int OutputIntegerBits, int InputIntegerBits>
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
log_x_for_x_greater_than_or_equal_to_1(
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
static_assert(
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
"Output integer bits must be sufficient to accommodate logs of inputs.");
@ -527,25 +546,25 @@ log_x_for_x_greater_than_or_equal_to_1(
input_val);
}
inline int32 GetReciprocal(int32 x, int x_integer_digits,
int* num_bits_over_unit) {
int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
int* num_bits_over_unit) {
int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
// This is the number of bits to the left of the binary point above 1.0.
// Consider x=1.25. In that case shifted_scale=0.8 and
// no later adjustment will be needed.
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
const int32 shifted_sum_minus_one =
static_cast<int32>((static_cast<uint32>(x) << headroom_plus_one) -
(static_cast<uint32>(1) << 31));
const int32_t shifted_sum_minus_one =
static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
(static_cast<uint32_t>(1) << 31));
gemmlowp::FixedPoint<int32, 0> shifted_scale =
gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
gemmlowp::FixedPoint<int32, 0>::FromRaw(shifted_sum_minus_one));
gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
return shifted_scale.raw();
}
inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
int32* output_inv_sqrt,
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
int32_t* output_inv_sqrt,
int* output_shift) {
TFLITE_DCHECK_GE(input, 0);
if (input <= 1) {
@ -565,7 +584,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
++*output_shift;
}
const unsigned max_left_shift_bits =
CountLeadingZeros(static_cast<uint32>(input)) - 1;
CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
*output_shift -= left_shift_bit_pairs;
@ -577,8 +596,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
using gemmlowp::SaturatingRoundingMultiplyByPOT;
// Using 3 integer bits gives us enough room for the internal arithmetic in
// this Newton-Raphson iteration.
using F3 = FixedPoint<int32, 3>;
using F0 = FixedPoint<int32, 0>;
using F3 = FixedPoint<int32_t, 3>;
using F0 = FixedPoint<int32_t, 0>;
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
const F3 fixedpoint_half_input =
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);

View File

@ -76,13 +76,15 @@ limitations under the License.
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
#endif
// TODO(ahentz): Clean up.
#ifndef TF_LITE_STATIC_MEMORY
// TODO(b/162019032): Consider removing these type-aliases.
using int8 = std::int8_t;
using uint8 = std::uint8_t;
using int16 = std::int16_t;
using uint16 = std::uint16_t;
using int32 = std::int32_t;
using uint32 = std::uint32_t;
#endif // !defined(TF_LITE_STATIC_MEMORY)
// TFLITE_DEPRECATED()
//

View File

@ -19,8 +19,9 @@ limitations under the License.
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO)
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
defined(__ZEPHYR__)
#define TF_LITE_GLOBAL_STD_PREFIX
#else
#define TF_LITE_GLOBAL_STD_PREFIX std

View File

@ -342,13 +342,13 @@ void NudgeQuantizationRange(const float min, const float max,
const float quant_max_float = static_cast<float>(quant_max);
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
uint16 nudged_zero_point;
uint16_t nudged_zero_point;
if (zero_point_from_min < quant_min_float) {
nudged_zero_point = static_cast<uint16>(quant_min);
nudged_zero_point = static_cast<uint16_t>(quant_min);
} else if (zero_point_from_min > quant_max_float) {
nudged_zero_point = static_cast<uint16>(quant_max);
nudged_zero_point = static_cast<uint16_t>(quant_max);
} else {
nudged_zero_point = static_cast<uint16>(TfLiteRound(zero_point_from_min));
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
}
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);

View File

@ -51,34 +51,39 @@ inline void Add(const ArithmeticParams& params,
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void AddElementwise(int size, const ArithmeticParams& params,
const uint8* input1_data, const uint8* input2_data,
uint8* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
const T* input1_data, const T* input2_data,
T* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
const int32 raw_output =
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8>(clamped_output);
output_data[i] = static_cast<T>(clamped_output);
}
}
@ -86,40 +91,40 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
// broadcast add, so that, for example, scalar-broadcast with batch will still
// be fast.
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
uint8 input1_data, const uint8* input2_data,
uint8* output_data) {
uint8_t input1_data, const uint8_t* input2_data,
uint8_t* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
const int32 input1_val = params.input1_offset + input1_data;
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
for (int i = 0; i < size; ++i) {
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input2_val =
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
const int32 raw_output =
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8>(clamped_output);
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8* input1_data,
const RuntimeShape& input2_shape, const uint8* input2_data,
const RuntimeShape& output_shape, uint8* output_data) {
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
@ -132,24 +137,53 @@ inline void Add(const ArithmeticParams& params,
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void AddGeneralParamScale(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int16_t* input1_data,
const RuntimeShape& input2_shape,
const int16_t* input2_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
int max_value = std::numeric_limits<int16_t>::max();
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
TFLITE_DCHECK_LT(params.input1_offset, max_value);
TFLITE_DCHECK_LT(params.input2_offset, max_value);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16* input1_data,
const RuntimeShape& input2_shape, const int16* input2_data,
const RuntimeShape& output_shape, int16* output_data) {
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int16_t* output_data,
bool pot_scale = true) {
if (!pot_scale) {
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
return;
}
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int input1_shift = params.input1_shift;
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
const int16 output_activation_min = params.quantized_activation_min;
const int16 output_activation_max = params.quantized_activation_max;
const int16_t output_activation_min = params.quantized_activation_min;
const int16_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
TFLITE_DCHECK_LE(input1_shift, 0);
TFLITE_DCHECK_LE(params.input2_shift, 0);
const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data;
const int16* shift_input = input1_shift == 0 ? input2_data : input1_data;
const int16_t* not_shift_input =
input1_shift == 0 ? input1_data : input2_data;
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
const int input_right_shift =
input1_shift == 0 ? -params.input2_shift : -input1_shift;
@ -161,8 +195,8 @@ inline void Add(const ArithmeticParams& params,
F0 scaled_input = F0::FromRaw(
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
const int16 raw_output = result.raw();
const int16 clamped_output = std::min(
const int16_t raw_output = result.raw();
const int16_t clamped_output = std::min(
output_activation_max, std::max(output_activation_min, raw_output));
output_data[i] = clamped_output;
}
@ -218,11 +252,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32* input1_data,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32* input2_data,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32* output_data) {
int32_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@ -257,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const uint8* input1_data,
const RuntimeShape& input2_shape,
const uint8* input2_data,
const RuntimeShape& output_shape,
uint8* output_data) {
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void BroadcastAdd4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@ -286,34 +321,34 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32 input1_val =
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32 input2_val =
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32 shifted_input1_val =
const int32_t shifted_input1_val =
input1_val * (1 << params.left_shift);
const int32 shifted_input2_val =
const int32_t shifted_input2_val =
input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier,
params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier,
params.input2_shift);
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
const int32 raw_output =
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<uint8>(clamped_output);
static_cast<T>(clamped_output);
}
}
}
@ -322,11 +357,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
const RuntimeShape& unswitched_input1_shape,
const uint8* unswitched_input1_data,
const uint8_t* unswitched_input1_data,
const RuntimeShape& unswitched_input2_shape,
const uint8* unswitched_input2_data,
const uint8_t* unswitched_input2_data,
const RuntimeShape& output_shape,
uint8* output_data) {
uint8_t* output_data) {
ArithmeticParams switched_params = unswitched_params;
switched_params.input1_offset = unswitched_params.input2_offset;
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
@ -341,18 +376,18 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
const ArithmeticParams& params =
use_unswitched ? unswitched_params : switched_params;
const uint8* input1_data =
const uint8_t* input1_data =
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
const uint8* input2_data =
const uint8_t* input2_data =
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
// Fivefold nested loops. The second input resets its position for each
// iteration of the second loop. The first input resets its position at the
// beginning of the fourth loop. The innermost loop is an elementwise add of
// sections of the arrays.
uint8* output_data_ptr = output_data;
const uint8* input1_data_ptr = input1_data;
const uint8* input2_data_reset = input2_data;
uint8_t* output_data_ptr = output_data;
const uint8_t* input1_data_ptr = input1_data;
const uint8_t* input2_data_reset = input2_data;
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
// between input shapes. y3 for input 1 is always broadcast, and so the
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
@ -368,7 +403,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
// dimension.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8* input2_data_ptr;
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {
@ -397,7 +432,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
// for y4 == 1 and the loop over y3 is contained within the
// AddScalarBroadcast function.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8* input2_data_ptr;
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {

View File

@ -18,7 +18,6 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/string_util.h"
namespace tflite {
@ -51,18 +50,6 @@ inline bool LessEqualFn(T lhs, T rhs) {
return lhs <= rhs;
}
inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
if (lhs.len != rhs.len) return false;
for (int i = 0; i < lhs.len; ++i) {
if (lhs.str[i] != rhs.str[i]) return false;
}
return true;
}
inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
return !StringRefEqualFn(lhs, rhs);
}
template <typename T>
using ComparisonFn = bool (*)(T, T);
@ -78,22 +65,6 @@ inline void ComparisonImpl(
}
}
inline void ComparisonStringImpl(bool (*F)(const StringRef&, const StringRef&),
const RuntimeShape& input1_shape,
const TfLiteTensor* input1,
const RuntimeShape& input2_shape,
const TfLiteTensor* input2,
const RuntimeShape& output_shape,
bool* output_data) {
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
const auto lhs = GetString(input1, i);
const auto rhs = GetString(input2, i);
output_data[i] = F(lhs, rhs);
}
}
template <ComparisonFn<float> F>
inline void Comparison(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
@ -105,30 +76,30 @@ inline void Comparison(const ComparisonParams& op_params,
input2_data, output_shape, output_data);
}
template <typename T, ComparisonFn<int32> F>
template <typename T, ComparisonFn<int32_t> F>
inline void ComparisonWithScaling(
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
int left_shift = op_params.left_shift;
int32 input1_offset = op_params.input1_offset;
int32 input1_multiplier = op_params.input1_multiplier;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32 input2_offset = op_params.input2_offset;
int32 input2_multiplier = op_params.input2_multiplier;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
const int32 input1_val = input1_offset + input1_data[i];
const int32 input2_val = input2_offset + input2_data[i];
const int32 shifted_input1_val = input1_val * (1 << left_shift);
const int32 shifted_input2_val = input2_val * (1 << left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = input1_offset + input1_data[i];
const int32_t input2_val = input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[i] = F(scaled_input1_val, scaled_input2_val);
@ -180,31 +151,6 @@ inline void BroadcastComparison4DSlowImpl(
}
}
inline void BroadcastComparison4DSlowStringImpl(
bool (*F)(const StringRef&, const StringRef&),
const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
const auto lhs =
GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
const auto rhs =
GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
}
}
}
}
}
template <ComparisonFn<float> F>
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
@ -218,7 +164,7 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
output_shape, output_data);
}
template <typename T, ComparisonFn<int32> F>
template <typename T, ComparisonFn<int32_t> F>
inline void BroadcastComparison4DSlowWithScaling(
const ComparisonParams& op_params,
const RuntimeShape& unextended_input1_shape, const T* input1_data,
@ -230,29 +176,29 @@ inline void BroadcastComparison4DSlowWithScaling(
unextended_output_shape);
int left_shift = op_params.left_shift;
int32 input1_offset = op_params.input1_offset;
int32 input1_multiplier = op_params.input1_multiplier;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32 input2_offset = op_params.input2_offset;
int32 input2_multiplier = op_params.input2_multiplier;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
const int32 input1_val =
const int32_t input1_val =
input1_offset +
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
const int32 input2_val =
const int32_t input2_val =
input2_offset +
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
const int32 shifted_input1_val = input1_val * (1 << left_shift);
const int32 shifted_input2_val = input2_val * (1 << left_shift);
const int32 scaled_input1_val =
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[Offset(dims.output_shape, b, y, x, c)] =

View File

@ -74,14 +74,14 @@ inline void Concatenation(const ConcatenationParams& params,
// when optimizng this routine further.
inline void ConcatenationWithScaling(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const uint8* const* input_data,
const uint8_t* const* input_data,
const RuntimeShape& output_shape,
uint8* output_data) {
uint8_t* output_data) {
int axis = params.axis;
const int32* input_zeropoint = params.input_zeropoint;
const int32_t* input_zeropoint = params.input_zeropoint;
const float* input_scale = params.input_scale;
int inputs_count = params.inputs_count;
const int32 output_zeropoint = params.output_zeropoint;
const int32_t output_zeropoint = params.output_zeropoint;
const float output_scale = params.output_scale;
const int concat_dimensions = output_shape.DimensionsCount();
@ -110,11 +110,11 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
}
const float inverse_output_scale = 1.f / output_scale;
uint8* output_ptr = output_data;
uint8_t* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const uint8* input_ptr = input_data[i] + k * copy_size;
const uint8_t* input_ptr = input_data[i] + k * copy_size;
if (input_zeropoint[i] == output_zeropoint &&
input_scale[i] == output_scale) {
memcpy(output_ptr, input_ptr, copy_size);

View File

@ -99,11 +99,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
}
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& filter_shape,
const uint8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
uint8* output_data, const RuntimeShape& im2col_shape,
uint8* im2col_data, void* cpu_backend_context) {
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data, const RuntimeShape& im2col_shape,
uint8_t* im2col_data, void* cpu_backend_context) {
(void)cpu_backend_context; // only used in optimized code.
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
@ -113,13 +113,13 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -143,7 +143,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
@ -154,9 +154,9 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val =
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
acc +=
@ -174,7 +174,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<uint8>(acc);
static_cast<uint8_t>(acc);
}
}
}
@ -220,7 +220,7 @@ inline void HybridConvPerChannel(
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
@ -231,9 +231,9 @@ inline void HybridConvPerChannel(
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val =
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
acc += filter_val * (input_val - input_offset[batch]);

View File

@ -62,21 +62,21 @@ namespace reference_ops {
namespace depthwise_conv {
template <DepthwiseConvOutputRounding output_rounding>
inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
int shift) {
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
int shift) {
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32 x, int32 quantized_multiplier, int shift) {
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32_t x, int32_t quantized_multiplier, int shift) {
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32 x, int32 quantized_multiplier, int shift) {
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32_t x, int32_t quantized_multiplier, int shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
const int left_shift = shift > 0 ? shift : 0;
const int right_shift = shift > 0 ? 0 : -shift;
@ -89,13 +89,12 @@ inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
template <DepthwiseConvOutputRounding output_rounding>
struct DepthwiseConvBasicKernel {
static inline void Run(const DepthwiseParams& params,
const RuntimeShape& input_shape,
const uint8* input_data,
const RuntimeShape& filter_shape,
const uint8* filter_data,
const RuntimeShape& bias_shape, const int32* bias_data,
const RuntimeShape& output_shape, uint8* output_data) {
static inline void Run(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
@ -103,12 +102,12 @@ struct DepthwiseConvBasicKernel {
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
@ -135,7 +134,7 @@ struct DepthwiseConvBasicKernel {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
@ -146,9 +145,9 @@ struct DepthwiseConvBasicKernel {
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32 input_val =
int32_t input_val =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
int32 filter_val = filter_data[Offset(
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
acc += (filter_val + filter_offset) *
(input_val + input_offset);
@ -164,7 +163,7 @@ struct DepthwiseConvBasicKernel {
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
static_cast<uint8>(acc);
static_cast<uint8_t>(acc);
}
}
}
@ -176,10 +175,10 @@ struct DepthwiseConvBasicKernel {
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
static inline void RunPerChannel(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const int8* input_data, const RuntimeShape& filter_shape,
const int8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
int8* output_data) {
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
@ -189,12 +188,12 @@ struct DepthwiseConvBasicKernel {
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32 input_offset = params.input_offset;
const int32 output_offset = params.output_offset;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32* output_multiplier = params.output_multiplier_per_channel;
const int32* output_shift = params.output_shift_per_channel;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t* output_multiplier = params.output_multiplier_per_channel;
const int32_t* output_shift = params.output_shift_per_channel;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -222,7 +221,7 @@ struct DepthwiseConvBasicKernel {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
@ -234,17 +233,18 @@ struct DepthwiseConvBasicKernel {
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32 filter_val = filter_data[Offset(
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we
// force real value of 0.0 be represented by a quantized
// value. This guarantees that the input_offset is a int8,
// even though it is represented using int32. int32 += int8
// * (int8 - int8) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// value. This guarantees that the input_offset is a int8_t,
// even though it is represented using int32_t. int32_t +=
// int8_t
// * (int8_t - int8_t) so the highest value we can get from
// each accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
@ -279,10 +279,10 @@ struct DepthwiseConvBasicKernel {
inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& filter_shape,
const uint8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
uint8* output_data) {
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
return depthwise_conv::DepthwiseConvBasicKernel<
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
input_data, filter_shape,

View File

@ -32,12 +32,12 @@ inline void Dequantize(const tflite::DequantizationParams& op_params,
const RuntimeShape& input_shape,
const InputT* input_data,
const RuntimeShape& output_shape, OutputT* output_data) {
int32 zero_point = op_params.zero_point;
int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const int32 val = input_data[i];
const int32_t val = input_data[i];
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
output_data[i] = result;
}
@ -52,11 +52,11 @@ inline void PerChannelDequantize(
// Ensure flat size is same.
MatchingFlatSize(input_shape, output_shape);
const int32* zero_point = op_params.zero_point;
const int32_t* zero_point = op_params.zero_point;
const float* scale = op_params.scale;
const int32 quantized_dimension = op_params.quantized_dimension;
const int32 num_dims = input_shape.DimensionsCount();
const int32* dims_data = input_shape.DimsData();
const int32_t quantized_dimension = op_params.quantized_dimension;
const int32_t num_dims = input_shape.DimensionsCount();
const int32_t* dims_data = input_shape.DimsData();
std::vector<int> current_dim(num_dims, 0);
do {
@ -64,7 +64,7 @@ inline void PerChannelDequantize(
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data(), 0, nullptr);
const int channel = current_dim[quantized_dimension];
const int32 val = input_data[offset];
const int32_t val = input_data[offset];
const float result =
static_cast<float>(scale[channel] * (val - zero_point[channel]));
output_data[offset] = result;

View File

@ -61,17 +61,17 @@ inline void FullyConnected(
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& filter_shape,
const uint8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
uint8* output_data) {
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
@ -89,10 +89,10 @@ inline void FullyConnected(
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32 acc = 0;
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32 input_val = input_data[b * accum_depth + d];
int32 filter_val = filter_data[out_c * accum_depth + d];
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
@ -102,24 +102,24 @@ inline void FullyConnected(
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& filter_shape,
const uint8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
int16* output_data) {
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(output_offset, 0);
@ -138,20 +138,21 @@ inline void FullyConnected(
for (int out_c = 0; out_c < output_depth; ++out_c) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32 accum = bias_data[out_c];
int32_t accum = bias_data[out_c];
// Accumulation loop.
for (int d = 0; d < accum_depth; ++d) {
int16 input_val = input_data[b * accum_depth + d] + input_offset;
int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
int16_t filter_val =
filter_data[out_c * accum_depth + d] + filter_offset;
accum += filter_val * input_val;
}
// Down-scale the final int32 accumulator to the scale used by our
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
accum =
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
// Saturate, cast to int16, and store to output array.
// Saturate, cast to int16_t, and store to output array.
accum = std::max(accum, output_activation_min - output_offset);
accum = std::min(accum, output_activation_max - output_offset);
accum += output_offset;
@ -162,14 +163,14 @@ inline void FullyConnected(
inline void ShuffledFullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& weights_shape,
const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
int16* output_data, uint8* shuffled_input_workspace_data) {
const int32 output_multiplier = params.output_multiplier;
const uint8_t* input_data, const RuntimeShape& weights_shape,
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
@ -190,7 +191,7 @@ inline void ShuffledFullyConnected(
TFLITE_DCHECK((output_depth % 4) == 0);
// Shuffling and xoring of input activations into the workspace buffer
uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
if (batches == 1) {
for (int i = 0; i < accum_depth; i++) {
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
@ -198,13 +199,13 @@ inline void ShuffledFullyConnected(
} else if (batches == 4) {
for (int c = 0; c < accum_depth; c += 16) {
for (int b = 0; b < 4; b++) {
const uint8* src_data_ptr = input_data + b * accum_depth + c;
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
for (int j = 0; j < 16; j++) {
uint8 src_val = *src_data_ptr++;
uint8_t src_val = *src_data_ptr++;
// Flip the sign bit, so that the kernel will only need to
// reinterpret these uint8 values as int8, getting for free the
// reinterpret these uint8_t values as int8_t, getting for free the
// subtraction of the zero_point value 128.
uint8 dst_val = src_val ^ 0x80;
uint8_t dst_val = src_val ^ 0x80;
*shuffled_input_workspace_ptr++ = dst_val;
}
}
@ -216,62 +217,62 @@ inline void ShuffledFullyConnected(
// Actual computation
if (batches == 1) {
int16* output_ptr = output_data;
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8 values is equivalent to
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8* shuffled_weights_ptr =
reinterpret_cast<const int8*>(shuffled_weights_data);
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8* shuffled_input_data =
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32 accum[4] = {0};
int32_t accum[4] = {0};
// Accumulation loop.
for (int d = 0; d < accum_depth; d += 16) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 16; j++) {
int8 input_val = shuffled_input_data[d + j];
int8 weights_val = *shuffled_weights_ptr++;
int8_t input_val = shuffled_input_data[d + j];
int8_t weights_val = *shuffled_weights_ptr++;
accum[i] += weights_val * input_val;
}
}
}
for (int i = 0; i < 4; i++) {
// Add bias value
int32 acc = accum[i] + bias_data[c + i];
// Down-scale the final int32 accumulator to the scale used by our
int32_t acc = accum[i] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
// Saturate, cast to int16, and store to output array.
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[c + i] = acc;
}
}
} else if (batches == 4) {
int16* output_ptr = output_data;
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8 values is equivalent to
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8* shuffled_weights_ptr =
reinterpret_cast<const int8*>(shuffled_weights_data);
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8* shuffled_input_data =
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
const int8* shuffled_input_ptr = shuffled_input_data;
const int8_t* shuffled_input_ptr = shuffled_input_data;
// Accumulation loop.
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32 accum[4][4];
int32_t accum[4][4];
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
accum[i][b] = 0;
@ -281,8 +282,8 @@ inline void ShuffledFullyConnected(
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
for (int j = 0; j < 16; j++) {
int8 input_val = shuffled_input_ptr[16 * b + j];
int8 weights_val = shuffled_weights_ptr[16 * i + j];
int8_t input_val = shuffled_input_ptr[16 * b + j];
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
accum[i][b] += weights_val * input_val;
}
}
@ -293,14 +294,14 @@ inline void ShuffledFullyConnected(
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
// Add bias value
int32 acc = accum[i][b] + bias_data[c + i];
// Down-scale the final int32 accumulator to the scale used by our
int32_t acc = accum[i][b] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The
// quantized multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
// Saturate, cast to int16, and store to output array.
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[b * output_depth + c + i] = acc;

View File

@ -23,34 +23,41 @@ limitations under the License.
namespace tflite {
namespace reference_integer_ops {
inline void CheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
}
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
inline void AddElementwise(int size, const ArithmeticParams& params,
const int8_t* input1_data, const int8_t* input2_data,
int8_t* output_data) {
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
CheckArithmeticParams(params);
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
const int32 raw_output =
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
@ -61,16 +68,11 @@ inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
CheckArithmeticParams(params);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}

View File

@ -22,27 +22,27 @@ namespace reference_integer_ops {
// Fixed-point per-channel-quantization convolution reference kernel.
inline void ConvPerChannel(
const ConvParams& params, const int32* output_multiplier,
const int32* output_shift, const RuntimeShape& input_shape,
const int8* input_data, const RuntimeShape& filter_shape,
const int8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
int8* output_data) {
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
const int32 input_offset = params.input_offset; // r = s(q - Z)
const int32_t input_offset = params.input_offset; // r = s(q - Z)
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32 output_offset = params.output_offset;
const int32_t output_offset = params.output_offset;
// Set min and max value of the output.
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Sanity check.
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
@ -67,7 +67,7 @@ inline void ConvPerChannel(
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
@ -79,18 +79,18 @@ inline void ConvPerChannel(
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val =
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8, even though it
// is represented using int32.
// int32 += int8 * (int8 - int8) so the highest value we can
// get from each accumulation is [-127, 127] * ([-128, 127] -
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
@ -125,12 +125,12 @@ inline void ConvPerChannel(
// Fixed-point per-channel-quantization convolution reference kernel.
// 16-bit data and 8-bit filter
inline void ConvPerChannel(
const ConvParams& params, const int32* output_multiplier,
const int32* output_shift, const RuntimeShape& input_shape,
const int16* input_data, const RuntimeShape& filter_shape,
const int8* filter_data, const RuntimeShape& bias_shape,
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
int16* output_data) {
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
@ -140,10 +140,10 @@ inline void ConvPerChannel(
const int pad_height = params.padding_values.height;
// Set min and max value of the output.
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Sanity check.
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
@ -180,13 +180,13 @@ inline void ConvPerChannel(
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val =
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
// Accumulate with 64 bits accumulator.
// int64 += int8 * int16 so the highest value we can
// int64_t += int8_t * int16_t so the highest value we can
// get from each accumulation is [-127, 127] * ([-32768,
// 32767] -
// [-32768, 32767]), which is [-8322945, 8322945].

View File

@ -20,12 +20,12 @@ limitations under the License.
namespace tflite {
namespace reference_integer_ops {
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32* output_multiplier,
const int32* output_shift, const RuntimeShape& input_shape,
const int8* input_data, const RuntimeShape& filter_shape,
const int8* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
int8* output_data) {
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
@ -35,10 +35,10 @@ inline void DepthwiseConvPerChannel(
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32 input_offset = params.input_offset;
const int32 output_offset = params.output_offset;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -66,7 +66,7 @@ inline void DepthwiseConvPerChannel(
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
@ -77,17 +77,17 @@ inline void DepthwiseConvPerChannel(
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val = filter_data[Offset(
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8, even though it
// is represented using int32.
// int32 += int8 * (int8 - int8) so the highest value we can
// get from each accumulation is [-127, 127] * ([-128, 127] -
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
@ -120,12 +120,12 @@ inline void DepthwiseConvPerChannel(
}
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32* output_multiplier,
const int32* output_shift, const RuntimeShape& input_shape,
const int16* input_data, const RuntimeShape& filter_shape,
const int8* filter_data, const RuntimeShape& bias_shape,
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
int16* output_data) {
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
@ -134,8 +134,8 @@ inline void DepthwiseConvPerChannel(
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -174,9 +174,9 @@ inline void DepthwiseConvPerChannel(
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val = filter_data[Offset(
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 64 bits accumulator.
// We assume maximum of 2^16 accumulations as with the 8-bit
@ -190,7 +190,7 @@ inline void DepthwiseConvPerChannel(
if (bias_data) {
acc += bias_data[output_channel];
}
int32 scaled_acc = MultiplyByQuantizedMultiplier(
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
@ -207,8 +207,8 @@ inline void DepthwiseConvPerChannel(
inline void DepthwiseConvHybridPerChannel(
const DepthwiseParams& params, float* scaling_factors_ptr,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& filter_shape, const int8* filter_data,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& filter_shape, const int8_t* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scale, int32_t* input_offset) {
@ -247,7 +247,7 @@ inline void DepthwiseConvHybridPerChannel(
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32 acc = 0;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
@ -258,9 +258,9 @@ inline void DepthwiseConvHybridPerChannel(
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32 input_val = input_data[Offset(input_shape, batch, in_y,
in_x, in_channel)];
int32 filter_val = filter_data[Offset(
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
acc += filter_val * (input_val - input_offset[batch]);
}

View File

@ -24,15 +24,15 @@ inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32* bias_data, const RuntimeShape& output_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int32 input_offset = params.input_offset;
const int32 filter_offset = params.weights_offset;
const int32 output_offset = params.output_offset;
const int32 output_multiplier = params.output_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
@ -44,10 +44,10 @@ inline void FullyConnected(
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32 acc = 0;
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32 input_val = input_data[b * accum_depth + d];
int32 filter_val = filter_data[out_c * accum_depth + d];
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
@ -68,11 +68,11 @@ inline void FullyConnected(
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int64_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32 filter_offset = params.weights_offset;
const int32 output_multiplier = params.output_multiplier;
const int32_t filter_offset = params.weights_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32 output_activation_min = params.quantized_activation_min;
const int32 output_activation_max = params.quantized_activation_max;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
@ -86,8 +86,8 @@ inline void FullyConnected(
for (int out_c = 0; out_c < output_depth; ++out_c) {
int64_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32 input_val = input_data[b * accum_depth + d];
int32 filter_val = filter_data[out_c * accum_depth + d];
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * input_val;
}
if (bias_data) {

View File

@ -21,8 +21,8 @@ namespace tflite {
namespace reference_integer_ops {
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
int32_t depth, const int8* input_data,
int8* output_data) {
int32_t depth, const int8_t* input_data,
int8_t* output_data) {
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
// The output scale must be in sync with Prepare().
@ -30,7 +30,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
// to [-1, 127/128].
static constexpr int32_t kOutputScale = 7;
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
// int32 = (int8 - int8) ^ 2.
// int32_t = (int8_t - int8_t) ^ 2.
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
// safe from overflowing in at least 2^16 steps.
int32_t acc = 0;
@ -55,7 +55,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
std::min(static_cast<int32_t>(kMaxInt8),
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
output_data[depth * outer_index + inner_index] =
static_cast<int8>(output_in_q24);
static_cast<int8_t>(output_in_q24);
}
}
}

View File

@ -27,14 +27,14 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 unclamped_result =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
@ -57,13 +57,13 @@ inline void Mul(const ArithmeticParams& params,
// Mul with 16 bit inputs and int8_t outputs.
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16* input1_data,
const RuntimeShape& input2_shape, const int16* input2_data,
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
int32 output_offset = params.output_offset;
int32 output_activation_min = params.quantized_activation_min;
int32 output_activation_max = params.quantized_activation_max;
int32_t output_offset = params.output_offset;
int32_t output_activation_min = params.quantized_activation_min;
int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int flat_size =
@ -75,12 +75,12 @@ inline void Mul(const ArithmeticParams& params,
F0 unclamped_result =
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
int16 rescaled_result =
int16_t rescaled_result =
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
int16 clamped_result =
std::min<int16>(output_activation_max - output_offset, rescaled_result);
clamped_result =
std::max<int16>(output_activation_min - output_offset, clamped_result);
int16_t clamped_result = std::min<int16_t>(
output_activation_max - output_offset, rescaled_result);
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
clamped_result);
output_data[i] = output_offset + clamped_result;
}
}
@ -104,18 +104,18 @@ inline void BroadcastMul4DSlow(
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32 input1_val =
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32 input2_val =
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32 unclamped_result =
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32 clamped_output = std::min(
const int32_t clamped_output = std::min(
params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[Offset(extended_output_shape, b, y, x, c)] =

View File

@ -22,8 +22,9 @@ namespace tflite {
namespace reference_integer_ops {
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape, const int8* input_data,
const RuntimeShape& output_shape, int8* output_data) {
const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -52,7 +53,7 @@ inline void AveragePool(const PoolParams& params,
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32 acc = 0;
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
@ -71,7 +72,7 @@ inline void AveragePool(const PoolParams& params,
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int8>(acc);
static_cast<int8_t>(acc);
}
}
}
@ -79,8 +80,8 @@ inline void AveragePool(const PoolParams& params,
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int8* input_data, const RuntimeShape& output_shape,
int8* output_data) {
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
@ -137,8 +138,9 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16* input_data,
const RuntimeShape& output_shape, int16* output_data) {
const int16_t* input_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -167,7 +169,7 @@ inline void AveragePool(const PoolParams& params,
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32 acc = 0;
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
@ -186,7 +188,7 @@ inline void AveragePool(const PoolParams& params,
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16>(acc);
static_cast<int16_t>(acc);
}
}
}
@ -194,8 +196,8 @@ inline void AveragePool(const PoolParams& params,
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int16* input_data, const RuntimeShape& output_shape,
int16* output_data) {
const int16_t* input_data, const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,

View File

@ -52,40 +52,39 @@ inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const uint8* input_data,
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8* output_data) {
uint8_t* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int32 input_zero_point = op_params.input_zero_point;
const int32_t input_zero_point = op_params.input_zero_point;
for (int i = 0; i < outer_size; ++i) {
int32 square_l2_norm = 0;
int32_t square_l2_norm = 0;
for (int c = 0; c < depth; c++) {
int32 diff = input_data[depth * i + c] - input_zero_point;
int32_t diff = input_data[depth * i + c] - input_zero_point;
square_l2_norm += diff * diff;
}
int32 inv_l2norm_multiplier;
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
&inv_l2norm_multiplier, &inv_l2norm_shift);
for (int c = 0; c < depth; c++) {
int32 diff = input_data[depth * i + c] - input_zero_point;
int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
int32_t diff = input_data[depth * i + c] - input_zero_point;
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
int32 unclamped_output_val = 128 + rescaled_diff;
int32 output_val =
std::min(static_cast<int32>(255),
std::max(static_cast<int32>(0), unclamped_output_val));
output_data[depth * i + c] = static_cast<uint8>(output_val);
int32_t unclamped_output_val = 128 + rescaled_diff;
int32_t output_val =
std::min(static_cast<int32_t>(255),
std::max(static_cast<int32_t>(0), unclamped_output_val));
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_

View File

@ -66,8 +66,8 @@ inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
}
inline void Logistic(const LogisticParams& params,
const RuntimeShape& input_shape, const int16* input_data,
const RuntimeShape& output_shape, int16* output_data) {
const RuntimeShape& input_shape, const int16_t* input_data,
const RuntimeShape& output_shape, int16_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
@ -84,12 +84,12 @@ inline void Logistic(const LogisticParams& params,
}
}
// Quantized int8 logistic activation. Cheats by dequantizing and requantizing
// around the floating point logistic method. This implementation is slow on
// platforms without a floating point unit.
// Quantized int8_t logistic activation. Cheats by dequantizing and
// requantizing around the floating point logistic method. This implementation
// is slow on platforms without a floating point unit.
// TODO(b/141211002): Delete this int8 implementation once we can reuse the
// approach used in TFLite for int8 Logistic.
// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
// approach used in TFLite for int8_t Logistic.
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
float input_scale, int input_zero_point,
const RuntimeShape& output_shape, int8_t* output_data,

View File

@ -24,20 +24,20 @@ namespace reference_ops {
// Element-wise mul that can often be used for inner loop of broadcast Mul as
// well as the non-broadcast Mul.
inline void MulElementwise(int size, const ArithmeticParams& params,
const uint8* input1_data, const uint8* input2_data,
uint8* output_data) {
const uint8_t* input1_data,
const uint8_t* input2_data, uint8_t* output_data) {
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 unclamped_result =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<uint8>(clamped_output);
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
@ -60,9 +60,9 @@ inline void Mul(const ArithmeticParams& params,
}
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8* input1_data,
const RuntimeShape& input2_shape, const uint8* input2_data,
const RuntimeShape& output_shape, uint8* output_data) {
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
@ -73,11 +73,11 @@ inline void Mul(const ArithmeticParams& params,
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const uint8* input1_data,
const uint8_t* input1_data,
const RuntimeShape& input2_shape,
const uint8* input2_data,
const uint8_t* input2_data,
const RuntimeShape& output_shape,
uint8* output_data) {
uint8_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@ -89,22 +89,22 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params,
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32 input1_val =
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32 input2_val =
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32 unclamped_result =
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32 clamped_output = std::min(
const int32_t clamped_output = std::min(
params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<uint8>(clamped_output);
static_cast<uint8_t>(clamped_output);
}
}
}

View File

@ -32,8 +32,8 @@ constexpr int PadKernelMaxDimensionCount() { return 4; }
// equivalent to a simple input1_data. For Pad, it should point to a zero
// value.
//
// Note that two typenames are required, so that T=P=int32 is considered a
// specialization distinct from P=int32.
// Note that two typenames are required, so that T=P=int32_t is considered a
// specialization distinct from P=int32_t.
template <typename T, typename P>
inline void PadImpl(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
@ -116,11 +116,11 @@ inline void Pad(const tflite::PadParams& op_params,
output_data);
}
// The second (pad-value) input can be int32 when, say, the first is uint8.
// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
template <typename T>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const int32* pad_value_ptr, const RuntimeShape& output_shape,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
T* output_data) {
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
@ -130,40 +130,18 @@ inline void Pad(const tflite::PadParams& op_params,
// This version avoids conflicting template matching.
template <>
inline void Pad(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const int32* input_data,
const int32* pad_value_ptr, const RuntimeShape& output_shape,
int32* output_data) {
const RuntimeShape& input_shape, const int32_t* input_data,
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
int32_t* output_data) {
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
// One could make all PadImageStyle calls simply delegate the work to the
// ordinary Pad. However, it is better that the reference code asserts false in
// similar cases.
template <typename T, typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const P* pad_value_ptr,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_ASSERT_FALSE;
}
template <typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape,
const uint8* input_data, const P* pad_value_ptr,
const RuntimeShape& output_shape,
uint8* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}
template <typename P>
inline void PadImageStyle(const tflite::PadParams& op_params,
const RuntimeShape& input_shape,
const int8_t* input_data, const P* pad_value_ptr,
const RuntimeShape& output_shape,
int8_t* output_data) {
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
output_data);
}

View File

@ -78,8 +78,9 @@ inline void AveragePool(const PoolParams& params,
inline void AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const uint8* input_data,
const RuntimeShape& output_shape, uint8* output_data) {
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
@ -108,7 +109,7 @@ inline void AveragePool(const PoolParams& params,
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32 acc = 0;
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
@ -125,7 +126,7 @@ inline void AveragePool(const PoolParams& params,
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<uint8>(acc);
static_cast<uint8_t>(acc);
}
}
}
@ -237,8 +238,8 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const uint8* input_data, const RuntimeShape& output_shape,
uint8* output_data) {
const uint8_t* input_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
@ -269,7 +270,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
uint8 max = 0;
uint8_t max = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
@ -281,10 +282,10 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<uint8>(max, params.quantized_activation_min);
max = std::min<uint8>(max, params.quantized_activation_max);
max = std::max<uint8_t>(max, params.quantized_activation_min);
max = std::min<uint8_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<uint8>(max);
static_cast<uint8_t>(max);
}
}
}

View File

@ -23,7 +23,7 @@ namespace tflite {
namespace reference_ops {
// Broadcast prelu to output_shape for quantized uint8/int8 data.
// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
template <typename T>
inline void BroadcastPrelu4DSlow(
const PreluParams& params, const RuntimeShape& input_shape,
@ -44,15 +44,15 @@ inline void BroadcastPrelu4DSlow(
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
int output_index = Offset(extended_output_shape, b, y, x, c);
int input_index = SubscriptToIndex(desc1, b, y, x, c);
const int32 input_value =
const int32_t input_value =
params.input_offset + input_data[input_index];
int32 output_value;
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
const int32 alpha_value =
const int32_t alpha_value =
params.alpha_offset + alpha_data[alpha_index];
output_value = MultiplyByQuantizedMultiplier(
@ -61,9 +61,9 @@ inline void BroadcastPrelu4DSlow(
}
output_value += params.output_offset;
const int32 quantized_min = std::numeric_limits<T>::min();
const int32 quantized_max = std::numeric_limits<T>::max();
const int32 clamped_output =
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[output_index] = static_cast<T>(clamped_output);
}
@ -72,6 +72,37 @@ inline void BroadcastPrelu4DSlow(
}
}
template <typename T>
inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
const T* input_data, const RuntimeShape& alpha_shape,
const T* alpha_data, const RuntimeShape& output_shape,
T* output_data) {
const int32_t quantized_min = std::numeric_limits<T>::min();
const int32_t quantized_max = std::numeric_limits<T>::max();
const int flat_size =
MatchingElementsSize(input_shape, alpha_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t input_value = params.input_offset + input_data[i];
int32_t output_value;
if (input_value >= 0) {
output_value = MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_1, params.output_shift_1);
} else {
const int32_t alpha_value = params.alpha_offset + alpha_data[i];
output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
params.output_multiplier_2,
params.output_shift_2);
}
output_value += params.output_offset;
const int32_t clamped_output =
std::min(quantized_max, std::max(quantized_min, output_value));
output_data[i] = static_cast<T>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite

View File

@ -76,6 +76,10 @@ inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
params->broadcast_category !=
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
// This is unreachable because at least one else clause in the above loop
// must be reached.
TFLITE_DCHECK(false);
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
return false;
}

View File

@ -15,7 +15,11 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
@ -29,18 +33,18 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
const InputT* input_data,
const RuntimeShape& output_shape,
OutputT* output_data) {
const int32 zero_point = op_params.zero_point;
const int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static constexpr int32 min_val = std::numeric_limits<OutputT>::min();
static constexpr int32 max_val = std::numeric_limits<OutputT>::max();
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
for (int i = 0; i < flat_size; i++) {
const InputT val = input_data[i];
int32 unclamped =
static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
int32_t unclamped =
static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
zero_point;
int32 clamped = std::min(std::max(unclamped, min_val), max_val);
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
output_data[i] = clamped;
}
}

View File

@ -18,6 +18,8 @@ limitations under the License.
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
@ -249,9 +251,9 @@ inline void Mean(const tflite::MeanParams& op_params,
inline void Mean(const tflite::MeanParams& op_params,
const RuntimeShape& unextended_input_shape,
const uint8_t* input_data, int32 input_zero_point,
const uint8_t* input_data, int32_t input_zero_point,
float input_scale, const RuntimeShape& unextended_output_shape,
uint8_t* output_data, int32 output_zero_point,
uint8_t* output_data, int32_t output_zero_point,
float output_scale) {
ruy::profiler::ScopeLabel label("Mean4D/Uint8");
@ -280,9 +282,9 @@ inline void Mean(const tflite::MeanParams& op_params,
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
int32 bias =
int32_t bias =
output_zero_point -
static_cast<int32>(input_zero_point * input_scale / output_scale);
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
double real_scale =
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
@ -291,7 +293,7 @@ inline void Mean(const tflite::MeanParams& op_params,
QuantizeMultiplier(real_scale, &multiplier, &shift);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32 acc = 0;
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
@ -310,18 +312,21 @@ inline void Mean(const tflite::MeanParams& op_params,
// It does so in two stages, first calculates the sum of elements along the axis
// then divides it by the number of element in axis for quantized values.
template <typename T, typename U>
inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
float input_scale, const int* input_dims,
const int input_num_dims, T* output_data,
int32 output_zero_point, float output_scale,
int32_t output_zero_point, float output_scale,
const int* output_dims,
const int output_num_dims, const int* axis,
const int num_axis_dimensions, bool keep_dims,
int* temp_index, int* resolved_axis, U* temp_sum,
bool compute_sum) {
const bool uint8_case = std::is_same<T, int8_t>::value;
const bool uint8_case = std::is_same<T, uint8_t>::value;
const bool int16_case = std::is_same<T, int16_t>::value;
if (uint8_case) {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
} else if (int16_case) {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
} else {
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
}
@ -381,11 +386,11 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
for (size_t idx = 0; idx < num_outputs; ++idx) {
float float_mean = static_cast<float>(temp_sum[idx]) /
static_cast<float>(num_elements_in_axis);
float result =
std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point,
static_cast<float>(std::numeric_limits<T>::max()));
result =
std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
float result = TfLiteMin(
TfLiteRound(float_mean * scale + bias) + output_zero_point,
static_cast<float>(std::numeric_limits<T>::max()));
result = TfLiteMax(result,
static_cast<float>(std::numeric_limits<T>::min()));
output_data[idx] = static_cast<T>(result);
}
}

View File

@ -17,28 +17,30 @@ limitations under the License.
#include <cmath>
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline int32 GetNearestNeighbor(const int input_value, const int32 input_size,
const int32 output_size,
const bool align_corners,
const bool half_pixel_centers) {
inline int32_t GetNearestNeighbor(const int input_value,
const int32_t input_size,
const int32_t output_size,
const bool align_corners,
const bool half_pixel_centers) {
const float scale =
(align_corners && output_size > 1)
? (input_size - 1) / static_cast<float>(output_size - 1)
: input_size / static_cast<float>(output_size);
const float offset = half_pixel_centers ? 0.5f : 0.0f;
int32 output_value = std::min(
int32_t output_value = std::min(
align_corners
? static_cast<int32>(std::round((input_value + offset) * scale))
: static_cast<int32>(std::floor((input_value + offset) * scale)),
? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
: static_cast<int32_t>(std::floor((input_value + offset) * scale)),
input_size - 1);
if (half_pixel_centers) {
output_value = std::max(static_cast<int32>(0), output_value);
output_value = std::max(static_cast<int32_t>(0), output_value);
}
return output_value;
}
@ -47,7 +49,7 @@ template <typename T>
inline void ResizeNearestNeighbor(
const tflite::ResizeNearestNeighborParams& op_params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& output_size_shape, const int32* output_size_data,
const RuntimeShape& output_size_shape, const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
@ -57,16 +59,16 @@ inline void ResizeNearestNeighbor(
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
int32 input_height = input_shape.Dims(1);
int32 input_width = input_shape.Dims(2);
int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t input_height = input_shape.Dims(1);
int32_t input_width = input_shape.Dims(2);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
// The Tensorflow version of this op allows resize on the width and height
// axis only.
TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
int32 output_height = output_size_data[0];
int32 output_width = output_size_data[1];
int32_t output_height = output_size_data[0];
int32_t output_width = output_size_data[1];
const int col_offset = input_shape.Dims(3);
const int row_offset = input_shape.Dims(2) * col_offset;
@ -76,14 +78,14 @@ inline void ResizeNearestNeighbor(
T* output_ptr = output_data;
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
int32 in_y = GetNearestNeighbor(y, input_height, output_height,
op_params.align_corners,
op_params.half_pixel_centers);
const T* y_input_ptr = input_ptr + in_y * row_offset;
for (int x = 0; x < output_width; ++x) {
int32 in_x = GetNearestNeighbor(x, input_width, output_width,
int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
op_params.align_corners,
op_params.half_pixel_centers);
const T* y_input_ptr = input_ptr + in_y * row_offset;
for (int x = 0; x < output_width; ++x) {
int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
op_params.align_corners,
op_params.half_pixel_centers);
const T* x_input_ptr = y_input_ptr + in_x * col_offset;
memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
output_ptr += depth;

View File

@ -62,13 +62,14 @@ inline void Softmax(const SoftmaxParams& params,
}
}
// Quantized softmax with int8/uint8 input and int8/uint8/int16 output.
// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
// output.
template <typename InputT, typename OutputT>
inline void Softmax(const SoftmaxParams& params,
const RuntimeShape& input_shape, const InputT* input_data,
const RuntimeShape& output_shape, OutputT* output_data) {
const int32 input_beta_multiplier = params.input_multiplier;
const int32 input_beta_left_shift = params.input_left_shift;
const int32_t input_beta_multiplier = params.input_multiplier;
const int32_t input_beta_left_shift = params.input_left_shift;
const int diff_min = params.diff_min;
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
@ -78,9 +79,10 @@ inline void Softmax(const SoftmaxParams& params,
static const int kScaledDiffIntegerBits = 5;
static const int kAccumulationIntegerBits = 12;
using FixedPointScaledDiff =
gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
using FixedPointAccum =
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
@ -96,10 +98,10 @@ inline void Softmax(const SoftmaxParams& params,
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
for (int c = 0; c < depth; ++c) {
int32 input_diff =
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff >= diff_min) {
const int32 input_diff_rescaled =
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_beta_multiplier, input_beta_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
@ -114,28 +116,28 @@ inline void Softmax(const SoftmaxParams& params,
sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
for (int c = 0; c < depth; ++c) {
int32 input_diff =
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff >= diff_min) {
const int32 input_diff_rescaled =
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_beta_multiplier, input_beta_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
int32 unsat_output = gemmlowp::RoundingDivideByPOT(
int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
(shifted_scale * exp_in_0).raw(),
num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
const int32 shifted_output =
const int32_t shifted_output =
unsat_output +
static_cast<int32>(std::numeric_limits<OutputT>::min());
static_cast<int32_t>(std::numeric_limits<OutputT>::min());
output_data[i * depth + c] = static_cast<OutputT>(std::max(
std::min(shifted_output,
static_cast<int32>(std::numeric_limits<OutputT>::max())),
static_cast<int32>(std::numeric_limits<OutputT>::min())));
static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
} else {
output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
}
@ -143,7 +145,7 @@ inline void Softmax(const SoftmaxParams& params,
}
}
// Quantized softmax with int16 input and int16 output.
// Quantized softmax with int16_t input and int16_t output.
inline void SoftmaxInt16(const SoftmaxParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,

View File

@ -16,8 +16,10 @@ limitations under the License.
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {

View File

@ -15,9 +15,15 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
#include "fixedpoint/fixedpoint.h"
#include <stdint.h>
#include <algorithm>
#include <limits>
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
@ -41,11 +47,11 @@ inline void SubNonBroadcast(const ArithmeticParams& params,
inline void SubNonBroadcast(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32* input1_data,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32* input2_data,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32* output_data) {
int32_t* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
@ -106,12 +112,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
template <int N = 5>
inline void BroadcastSubSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const uint8* input1_data,
const uint8_t* input1_data,
const RuntimeShape& input2_shape,
const uint8* input2_data,
const uint8_t* input2_data,
const RuntimeShape& output_shape,
uint8* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8");
uint8_t* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
@ -134,28 +140,28 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
auto sub_func = [&](int indexes[N]) {
const int32 input1_val =
const int32_t input1_val =
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
const int32 input2_val =
const int32_t input2_val =
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
const int32 raw_output =
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sub, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[SubscriptToIndex(output_desc, indexes)] =
static_cast<uint8>(clamped_output);
static_cast<uint8_t>(clamped_output);
};
NDOpsHelper<N>(output_desc, sub_func);
}
@ -163,12 +169,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
template <int N = 5>
inline void BroadcastSubSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32* input1_data,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32* input2_data,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32");
int32_t* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t");
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
@ -208,7 +214,7 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
const int8_t* input2_data,
const RuntimeShape& output_shape,
int8_t* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8");
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
@ -254,6 +260,45 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
NDOpsHelper<N>(output_desc, sub_func);
}
template <int N = 5>
void BroadcastSubSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int64_t* input1_data,
const RuntimeShape& input2_shape,
const int64_t* input2_data,
const RuntimeShape& output_shape, int64_t* output_data) {
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t");
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
auto sub_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, indexes)] -
input2_data[SubscriptToIndex(desc2, indexes)],
params.int64_activation_min, params.int64_activation_max);
};
NDOpsHelper<N>(output_desc, sub_func);
}
template <typename T, int N = 5>
void BroadcastSubSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
@ -294,33 +339,33 @@ void BroadcastSubSlow(const ArithmeticParams& params,
// Element-wise Sub that can often be used for inner loop of broadcast sub as
// well as the non-broadcast sub.
inline void SubElementwise(int size, const ArithmeticParams& params,
const uint8* input1_data, const uint8* input2_data,
uint8* output_data) {
const uint8_t* input1_data,
const uint8_t* input2_data, uint8_t* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
const int32 raw_output =
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sub, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8>(clamped_output);
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
@ -336,22 +381,22 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
for (int i = 0; i < size; ++i) {
const int32 input1_val = params.input1_offset + input1_data[i];
const int32 input2_val = params.input2_offset + input2_data[i];
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
const int32 scaled_input1_val =
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32 scaled_input2_val =
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
const int32 raw_output =
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sub, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32 clamped_output =
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
@ -359,9 +404,9 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
}
inline void Sub(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8* input1_data,
const RuntimeShape& input2_shape, const uint8* input2_data,
const RuntimeShape& output_shape, uint8* output_data) {
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
@ -428,40 +473,43 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
}
}
inline void SubWithActivation(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32* input1_data,
const RuntimeShape& input2_shape,
const int32* input2_data,
const RuntimeShape& output_shape,
int32* output_data) {
inline void SetActivationMinMax(const ArithmeticParams& params,
int32_t* activation_min,
int32_t* activation_max) {
*activation_min = params.quantized_activation_min;
*activation_max = params.quantized_activation_max;
}
inline void SetActivationMinMax(const ArithmeticParams& params,
float* activation_min, float* activation_max) {
*activation_min = params.float_activation_min;
*activation_max = params.float_activation_max;
}
inline void SetActivationMinMax(const ArithmeticParams& params,
int64_t* activation_min,
int64_t* activation_max) {
*activation_min = params.int64_activation_min;
*activation_max = params.int64_activation_max;
}
template <typename T>
inline void SubWithActivation(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("SubWithActivation");
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
T activation_min, activation_max;
SetActivationMinMax(params, &activation_min, &activation_max);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] - input2_data[i], params.quantized_activation_min,
params.quantized_activation_max);
input1_data[i] - input2_data[i], activation_min, activation_max);
}
}
inline void SubWithActivation(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
float* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] - input2_data[i], params.float_activation_min,
params.float_activation_max);
}
}
} // namespace reference_ops
} // namespace tflite

View File

@ -24,24 +24,29 @@ limitations under the License.
namespace tflite {
enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
enum class PaddingType : uint8 { kNone, kSame, kValid };
enum class FusedActivationFunctionType : uint8_t {
kNone,
kRelu6,
kRelu1,
kRelu
};
enum class PaddingType : uint8_t { kNone, kSame, kValid };
struct PaddingValues {
int16 width;
int16 height;
int16_t width;
int16_t height;
// offset is used for calculating "remaining" padding, for example, `width`
// is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
// 1 + 1 = 2.
int16 width_offset;
int16_t width_offset;
// Same as width_offset except it's over the height dimension.
int16 height_offset;
int16_t height_offset;
};
// This enumeration allows for non-default formats for the weights array
// of a fully-connected operator, allowing the use of special optimized
// runtime paths.
enum class FullyConnectedWeightsFormat : uint8 {
enum class FullyConnectedWeightsFormat : uint8_t {
// Default format (flat 2D layout, the inner contiguous dimension
// is input_depth, the outer non-contiguous dimension is output_depth)
kDefault,
@ -88,11 +93,11 @@ enum class FullyConnectedWeightsFormat : uint8 {
// maximize arithmetic throughput.
//
// Finally, the 'Int8' part in the name refers to the fact that this
// weights format has each weights value encoded as a signed int8 value,
// even if the data type of the weights buffer is uint8. This is intended
// weights format has each weights value encoded as a signed int8_t value,
// even if the data type of the weights buffer is uint8_t. This is intended
// to save runtime kernels the effort to have to XOR the top bit of these
// bytes before using them in signed arithmetic, see this file for more
// explanations on the 'signed int8 trick' in matrix multiplication kernels:
// explanations on the 'signed int8_t trick' in matrix multiplication kernels:
//
// tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
//
@ -111,7 +116,7 @@ enum class FullyConnectedWeightsFormat : uint8 {
// the real 0 value, and scale designates the difference between the real values
// corresponding to consecutive quantized values differing by 1.
struct QuantizationParams {
int32 zero_point = 0;
int32_t zero_point = 0;
double scale = 0.0;
};
@ -140,20 +145,20 @@ class RuntimeShape {
if (dimensions_count > kMaxSmallSize) {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32[dimensions_count];
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32_t[dimensions_count];
#endif // TF_LITE_STATIC_MEMORY
}
}
RuntimeShape(int shape_size, int32 value) : size_(0) {
RuntimeShape(int shape_size, int32_t value) : size_(0) {
Resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
SetDim(i, value);
}
}
RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) {
RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) {
ReplaceWith(dimensions_count, dims_data);
}
@ -165,33 +170,34 @@ class RuntimeShape {
// rolls out.
RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
if (size_ > kMaxSmallSize) {
dims_pointer_ = new int32[size_];
dims_pointer_ = new int32_t[size_];
}
std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_);
std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_);
}
bool operator==(const RuntimeShape& comp) const {
return this->size_ == comp.size_ &&
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0;
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
0;
}
~RuntimeShape() {
if (size_ > kMaxSmallSize) {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
#else // TF_LITE_STATIC_MEMORY
delete[] dims_pointer_;
#endif // TF_LITE_STATIC_MEMORY
}
}
inline int32 DimensionsCount() const { return size_; }
inline int32 Dims(int i) const {
inline int32_t DimensionsCount() const { return size_; }
inline int32_t Dims(int i) const {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
}
inline void SetDim(int i, int32 val) {
inline void SetDim(int i, int32_t val) {
TFLITE_DCHECK_GE(i, 0);
TFLITE_DCHECK_LT(i, size_);
if (size_ > kMaxSmallSize) {
@ -201,20 +207,20 @@ class RuntimeShape {
}
}
inline int32* DimsData() {
inline int32_t* DimsData() {
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
}
inline const int32* DimsData() const {
inline const int32_t* DimsData() const {
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
}
// The caller must ensure that the shape is no bigger than 5-D.
inline const int32* DimsDataUpTo5D() const { return dims_; }
inline const int32_t* DimsDataUpTo5D() const { return dims_; }
inline void Resize(int dimensions_count) {
if (size_ > kMaxSmallSize) {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
#else // TF_LITE_STATIC_MEMORY
delete[] dims_pointer_;
#endif // TF_LITE_STATIC_MEMORY
}
@ -222,16 +228,16 @@ class RuntimeShape {
if (dimensions_count > kMaxSmallSize) {
#ifdef TF_LITE_STATIC_MEMORY
TFLITE_CHECK(false && "No shape resizing supported on this platform");
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32[dimensions_count];
#else // TF_LITE_STATIC_MEMORY
dims_pointer_ = new int32_t[dimensions_count];
#endif // TF_LITE_STATIC_MEMORY
}
}
inline void ReplaceWith(int dimensions_count, const int32* dims_data) {
inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
Resize(dimensions_count);
int32* dst_dims = DimsData();
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32));
int32_t* dst_dims = DimsData();
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
}
template <typename T>
@ -239,7 +245,7 @@ class RuntimeShape {
const int dimensions_count =
std::distance(src_iterable.begin(), src_iterable.end());
Resize(dimensions_count);
int32* data = DimsData();
int32_t* data = DimsData();
for (auto it : src_iterable) {
*data = it;
++data;
@ -288,13 +294,13 @@ class RuntimeShape {
SetDim(i, pad_value);
}
std::memcpy(DimsData() + size_increase, shape.DimsData(),
sizeof(int32) * shape.DimensionsCount());
sizeof(int32_t) * shape.DimensionsCount());
}
int32 size_;
int32_t size_;
union {
int32 dims_[kMaxSmallSize];
int32* dims_pointer_;
int32_t dims_[kMaxSmallSize];
int32_t* dims_pointer_;
};
};
@ -713,7 +719,7 @@ void ComputeStrides(Dims<N>* dims) {
}
}
enum class BroadcastableOpCategory : uint8 {
enum class BroadcastableOpCategory : uint8_t {
kNone,
kNonBroadcast, // Matching input shapes.
kFirstInputBroadcastsFast, // Fivefold nested loops.
@ -729,21 +735,21 @@ static_assert(sizeof(MinMax) == 8, "");
struct ActivationParams {
FusedActivationFunctionType activation_type;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
};
struct ReluParams : public ActivationParams {
int32 input_offset;
int32 output_offset;
int32 output_multiplier;
int32 output_shift;
int32_t input_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
};
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
// op for pattern-specific optimization.
enum class ResizingCategory : uint8 {
enum class ResizingCategory : uint8_t {
kNone,
kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}.
kGenericResize,
@ -753,24 +759,29 @@ enum class ResizingCategory : uint8 {
struct ArithmeticParams {
// Shape dependent / common to data / op types.
BroadcastableOpCategory broadcast_category;
// uint8 inference params.
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32 output_multiplier;
// uint8_t inference params.
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// Add / Sub, not Mul, uint8 inference params.
// Add / Sub, not Mul, uint8_t inference params.
int left_shift;
int32 input1_multiplier;
int32_t input1_multiplier;
int input1_shift;
int32 input2_multiplier;
int32_t input2_multiplier;
int input2_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// TODO(b/158622529): Union the following activation params.
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
// int64_t activation params.
int64_t int64_activation_min;
int64_t int64_activation_max;
// Processed output dimensions.
// Let input "a" be the one that broadcasts in the faster-changing dimension.
@ -785,22 +796,22 @@ struct ArithmeticParams {
};
struct ConcatenationParams {
int8 axis;
const int32* input_zeropoint;
int8_t axis;
const int32_t* input_zeropoint;
const float* input_scale;
uint16 inputs_count;
int32 output_zeropoint;
uint16_t inputs_count;
int32_t output_zeropoint;
float output_scale;
};
struct ComparisonParams {
// uint8 inference params.
// uint8_t inference params.
int left_shift;
int32 input1_offset;
int32 input1_multiplier;
int32_t input1_offset;
int32_t input1_multiplier;
int input1_shift;
int32 input2_offset;
int32 input2_multiplier;
int32_t input2_offset;
int32_t input2_multiplier;
int input2_shift;
// Shape dependent / common to inference types.
bool is_broadcast;
@ -810,81 +821,81 @@ struct ConvParams {
PaddingType padding_type;
PaddingValues padding_values;
// TODO(starka): This was just "stride", so check that width+height is OK.
int16 stride_width;
int16 stride_height;
int16 dilation_width_factor;
int16 dilation_height_factor;
// uint8 inference params.
int16_t stride_width;
int16_t stride_height;
int16_t dilation_width_factor;
int16_t dilation_height_factor;
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
};
struct DepthToSpaceParams {
int32 block_size;
int32_t block_size;
};
struct DepthwiseParams {
PaddingType padding_type;
PaddingValues padding_values;
int16 stride_width;
int16 stride_height;
int16 dilation_width_factor;
int16 dilation_height_factor;
int16 depth_multiplier;
// uint8 inference params.
int16_t stride_width;
int16_t stride_height;
int16_t dilation_width_factor;
int16_t dilation_height_factor;
int16_t depth_multiplier;
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
const int32* output_multiplier_per_channel;
const int32* output_shift_per_channel;
const int32_t* output_multiplier_per_channel;
const int32_t* output_shift_per_channel;
};
struct DequantizationParams {
double scale;
int32 zero_point;
int32_t zero_point;
};
struct PerChannelDequantizationParams {
const float* scale;
const int32* zero_point;
int32 quantized_dimension;
const int32_t* zero_point;
int32_t quantized_dimension;
};
struct FakeQuantParams {
MinMax minmax;
int32 num_bits;
int32_t num_bits;
};
struct FullyConnectedParams {
// uint8 inference params.
// uint8_t inference params.
// TODO(b/65838351): Use smaller types if appropriate.
int32 input_offset;
int32 weights_offset;
int32 output_offset;
int32 output_multiplier;
int32_t input_offset;
int32_t weights_offset;
int32_t output_offset;
int32_t output_multiplier;
int output_shift;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
@ -895,16 +906,16 @@ struct FullyConnectedParams {
};
struct GatherParams {
int16 axis;
int16_t axis;
};
struct L2NormalizationParams {
// uint8 inference params.
int32 input_zero_point;
// uint8_t inference params.
int32_t input_zero_point;
};
struct LocalResponseNormalizationParams {
int32 range;
int32_t range;
double bias;
double alpha;
double beta;
@ -932,50 +943,50 @@ struct HardSwishParams {
};
struct LogisticParams {
// uint8 inference params.
int32 input_zero_point;
int32 input_range_radius;
int32 input_multiplier;
// uint8_t inference params.
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
struct LstmCellParams {
int32 weights_zero_point;
int32 accum_multiplier;
int32_t weights_zero_point;
int32_t accum_multiplier;
int accum_shift;
int state_integer_bits;
};
struct MeanParams {
int8 axis_count;
int16 axis[4];
int8_t axis_count;
int16_t axis[4];
};
struct PackParams {
int8 axis;
const int32* input_zeropoint;
int8_t axis;
const int32_t* input_zeropoint;
const float* input_scale;
uint16 inputs_count;
int32 output_zeropoint;
uint16_t inputs_count;
int32_t output_zeropoint;
float output_scale;
};
struct PadParams {
int8 left_padding_count;
int32 left_padding[4];
int8 right_padding_count;
int32 right_padding[4];
int8_t left_padding_count;
int32_t left_padding[4];
int8_t right_padding_count;
int32_t right_padding[4];
ResizingCategory resizing_category;
};
struct PreluParams {
int32 input_offset;
int32 alpha_offset;
int32 output_offset;
int32 output_multiplier_1;
int32 output_shift_1;
int32 output_multiplier_2;
int32 output_shift_2;
int32_t input_offset;
int32_t alpha_offset;
int32_t output_offset;
int32_t output_multiplier_1;
int output_shift_1;
int32_t output_multiplier_2;
int output_shift_2;
};
struct PoolParams {
@ -986,17 +997,17 @@ struct PoolParams {
int stride_width;
int filter_height;
int filter_width;
// uint8, etc, activation params.
int32 quantized_activation_min;
int32 quantized_activation_max;
// uint8_t, etc, activation params.
int32_t quantized_activation_min;
int32_t quantized_activation_max;
// float activation params.
float float_activation_min;
float float_activation_max;
};
struct ReshapeParams {
int8 shape_count;
int32 shape[4];
int8_t shape_count;
int32_t shape[4];
};
struct ResizeBilinearParams {
@ -1013,91 +1024,93 @@ struct ResizeNearestNeighborParams {
};
struct SliceParams {
int8 begin_count;
int32 begin[4];
int8 size_count;
int32 size[4];
int8_t begin_count;
int32_t begin[4];
int8_t size_count;
int32_t size[4];
};
struct SoftmaxParams {
// beta is not really used (not a Tensorflow parameter) and not implemented
// for LogSoftmax.
double beta;
// uint8 inference params. Used even when beta defaults to 1.0.
int32 input_multiplier;
int32 input_left_shift;
// uint8_t inference params. Used even when beta defaults to 1.0.
int32_t input_multiplier;
int32_t input_left_shift;
// Reverse scaling is only used by LogSoftmax.
int32 reverse_scaling_divisor;
int32 reverse_scaling_right_shift;
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
int32_t zero_point;
float scale;
float* table;
int16_t* exp_lut;
int16_t* one_over_one_plus_x_lut;
uint8_t* uint8_table1;
uint8_t* uint8_table2;
};
struct SpaceToBatchParams {
// "Zero" padding for uint8 means padding with the output offset.
int32 output_offset;
// "Zero" padding for uint8_t means padding with the output offset.
int32_t output_offset;
};
struct SpaceToDepthParams {
int32 block_size;
int32_t block_size;
};
struct SplitParams {
// Graphs that split into, say, 2000 nodes are encountered. The indices in
// OperatorEdges are of type uint16.
uint16 num_split;
int16 axis;
// OperatorEdges are of type uint16_t.
uint16_t num_split;
int16_t axis;
};
struct SqueezeParams {
int8 squeeze_dims_count;
int32 squeeze_dims[4];
int8_t squeeze_dims_count;
int32_t squeeze_dims[4];
};
struct StridedSliceParams {
int8 start_indices_count;
int32 start_indices[5];
int8 stop_indices_count;
int32 stop_indices[5];
int8 strides_count;
int32 strides[5];
int8_t start_indices_count;
int32_t start_indices[5];
int8_t stop_indices_count;
int32_t stop_indices[5];
int8_t strides_count;
int32_t strides[5];
int16 begin_mask;
int16 ellipsis_mask;
int16 end_mask;
int16 new_axis_mask;
int16 shrink_axis_mask;
int16_t begin_mask;
int16_t ellipsis_mask;
int16_t end_mask;
int16_t new_axis_mask;
int16_t shrink_axis_mask;
};
struct TanhParams {
int32 input_zero_point;
int32 input_range_radius;
int32 input_multiplier;
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
struct TransposeParams {
int8 perm_count;
int32 perm[5];
int8_t perm_count;
int32_t perm[5];
};
struct UnpackParams {
uint16 num_split;
int16 axis;
uint16_t num_split;
int16_t axis;
};
struct LeakyReluParams {
float alpha;
int32 input_offset;
int32 output_offset;
int32 output_multiplier_alpha;
int32 output_shift_alpha;
int32 output_multiplier_identity;
int32 output_shift_identity;
int32_t input_offset;
int32_t output_offset;
int32_t output_multiplier_alpha;
int32_t output_shift_alpha;
int32_t output_multiplier_identity;
int32_t output_shift_identity;
};
template <typename P>
@ -1107,13 +1120,19 @@ inline void SetActivationParams(float min, float max, P* params) {
}
template <typename P>
inline void SetActivationParams(int32 min, int32 max, P* params) {
inline void SetActivationParams(int32_t min, int32_t max, P* params) {
params->quantized_activation_min = min;
params->quantized_activation_max = max;
}
template <typename P>
inline void GetActivationParams(const P& params, int32* min, int32* max) {
inline void SetActivationParams(int64_t min, int64_t max, P* params) {
params->int64_activation_min = min;
params->int64_activation_max = max;
}
template <typename P>
inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) {
*min = params.quantized_activation_min;
*max = params.quantized_activation_max;
}
@ -1124,6 +1143,11 @@ inline void GetActivationParams(const P& params, float* min, float* max) {
*max = params.float_activation_max;
}
template <typename P>
inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
*min = params.int64_activation_min;
*max = params.int64_activation_max;
}
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_

View File

@ -14,15 +14,63 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/kernel_util.h"
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
namespace tflite {
namespace {
inline TfLiteTensor* GetMutableInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
if (context->tensors != nullptr) {
return &context->tensors[node->inputs->data[index]];
} else {
return context->GetTensor(context, node->inputs->data[index]);
}
}
} // anonymous namespace.
const TfLiteTensor* GetInput(const TfLiteContext* context,
const TfLiteNode* node, int index) {
return GetMutableInput(context, node, index);
}
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
int index) {
TfLiteTensor* tensor = GetMutableInput(context, node, index);
return tensor->is_variable ? tensor : nullptr;
}
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
int index) {
if (context->tensors != nullptr) {
return &context->tensors[node->outputs->data[index]];
} else {
return context->GetTensor(context, node->outputs->data[index]);
}
}
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
const TfLiteNode* node, int index) {
const bool use_tensor = index < node->inputs->size &&
node->inputs->data[index] != kTfLiteOptionalTensor;
if (use_tensor) {
return GetMutableInput(context, node, index);
}
return nullptr;
}
// Per-axis
TfLiteStatus PopulateConvolutionQuantizationParams(
TfLiteContext* context, const TfLiteTensor* input,
@ -126,11 +174,27 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
// pipeline.
if (bias) {
const double bias_scale = static_cast<double>(bias->params.scale);
// Here we're making sure the input_product_scale & bias_scale the same.
// Normally this should be guaranteed by the training pipeline, we are
// setting the threshold to be 2e-6 to allow some numeric stability
// difference.
TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 2e-6);
// Here we're making sure the input_product_scale & bias_scale are about the
// same. Since we have:
// (output - output_zp) * output_scale =
// input_product_scale * input_product + bias * bias_scale ---- (0)
//
// (0) equals:
// (input_product + bias) * input_product_scale ----- (1)
// +
// bias * (bias_scale - input_product_scale) ------ (2)
//
// For the real kernel computation, we're doing (1), so we really need to
// make sure (2) has minimum impact on the output, so:
// bias * (bias_scale - input_product_scale) / output_scale should be
// a small number for an integer.
// Since normally bias should be within a small range.
// We should expect (bias_scale - input_product_scale) / output_scale to
// be a small number like 0.02.
const double scale_diff = std::abs(input_product_scale - bias_scale);
const double output_scale = static_cast<double>(output->params.scale);
TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
}
return GetQuantizedConvolutionMultipler(context, input, filter, output,
multiplier);
@ -167,7 +231,7 @@ void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
} else if (activation == kTfLiteActRelu6) {
*act_min = std::max(qmin, quantize(0.0));
*act_max = std::min(qmax, quantize(6.0));
} else if (activation == kTfLiteActRelu1) {
} else if (activation == kTfLiteActReluN1To1) {
*act_min = std::max(qmin, quantize(-1.0));
*act_max = std::min(qmax, quantize(1.0));
} else {

View File

@ -15,52 +15,55 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
#include <algorithm>
#include <stdint.h>
#include <limits>
#include "flatbuffers/flatbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// A fair number of functions in this header have historically been inline.
// It is ok to change functions to not be inline if the latency with
// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is
// made, move the newly non-inlined function declarations to the top of this
// header file.
const TfLiteTensor* GetInput(const TfLiteContext* context,
const TfLiteNode* node, int index);
// Note: You must check if result is not null:
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
// TF_LITE_ENSURE(context, my_tensor != nullptr);
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
int index);
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
int index);
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
const TfLiteNode* node, int index);
inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
return t->dims->data[dim];
}
inline const TfLiteTensor* GetInput(TfLiteContext* context,
const TfLiteNode* node, int index) {
return &context
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
}
// Note: You must check if result is not null:
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
// TF_LITE_ENSURE(context, my_tensor != nullptr);
inline TfLiteTensor* GetVariableInput(TfLiteContext* context,
const TfLiteNode* node, int index) {
TfLiteTensor* tensor =
&context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
return (tensor->is_variable) ? tensor : nullptr;
}
inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
int index) {
return &context
->tensors[flatbuffers::EndianScalar(node->outputs->data[index])];
}
#ifndef TF_LITE_STATIC_MEMORY
inline TfLiteTensor* GetTemporary(TfLiteContext* context,
const TfLiteNode* node, int index) {
return &context->tensors[flatbuffers::EndianScalar(
node->temporaries->data[index])];
return &context->tensors[node->temporaries->data[index]];
}
inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
const TfLiteNode* node, int index) {
return &context->tensors[node->intermediates->data[index]];
}
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
inline int NumIntermediates(const TfLiteNode* node) {
return node->intermediates->size;
}
#endif // TF_LITE_STATIC_MEMORY
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
inline int64_t NumElements(const TfLiteIntArray* dims) {
int64_t count = 1;
@ -74,19 +77,11 @@ inline int64_t NumElements(const TfLiteTensor* t) {
return NumElements(t->dims);
}
inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
const TfLiteNode* node,
int index) {
const bool use_tensor = index < node->inputs->size &&
node->inputs->data[index] != kTfLiteOptionalTensor;
if (use_tensor) {
return &context
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
}
return nullptr;
}
// Determines whether tensor is constant.
// TODO(b/138199592): Introduce new query which checks for constant OR
// persistent-read-only, which would be useful for most tensor kernels that
// are potentially dynamic based on the input tensor value availability at the
// time of prepare.
inline bool IsConstantTensor(const TfLiteTensor* tensor) {
return tensor->allocation_type == kTfLiteMmapRo;
}
@ -105,6 +100,14 @@ inline void SetTensorToDynamic(TfLiteTensor* tensor) {
}
}
// Sets tensor to persistent and read-only.
inline void SetTensorToPersistentRo(TfLiteTensor* tensor) {
if (tensor->allocation_type != kTfLitePersistentRo) {
tensor->allocation_type = kTfLitePersistentRo;
tensor->data.raw = nullptr;
}
}
// Determines whether it is a hybrid op - one that has float inputs and
// quantized weights.
inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
@ -162,7 +165,7 @@ void CalculateActivationRange(TfLiteFusedActivation activation,
} else if (activation == kTfLiteActRelu6) {
*activation_min = 0;
*activation_max = 6;
} else if (activation == kTfLiteActRelu1) {
} else if (activation == kTfLiteActReluN1To1) {
*activation_min = -1;
*activation_max = 1;
} else {

View File

@ -19,7 +19,7 @@ limitations under the License.
// non-portable function.
#ifdef TF_LITE_MCU_DEBUG_LOG
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/debug_log.h"
#define DEBUG_LOG(x) \
do { \
@ -36,7 +36,6 @@ inline void InfiniteLoop() {
#else // TF_LITE_MCU_DEBUG_LOG
#include <cassert>
#include <cstdio>
#include <cstdlib>
@ -45,6 +44,15 @@ inline void InfiniteLoop() {
fprintf(stderr, "%s", (x)); \
} while (0)
// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name) \
do { \
TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
__FILE__, __LINE__, TfLiteTypeGetName(type), \
(op_name)); \
return kTfLiteError; \
} while (0)
#define TFLITE_ABORT abort()
#endif // TF_LITE_MCU_DEBUG_LOG

View File

@ -15,10 +15,10 @@ limitations under the License.
#include "tensorflow/lite/micro/examples/hello_world/main_functions.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/examples/hello_world/constants.h"
#include "tensorflow/lite/micro/examples/hello_world/model.h"
#include "tensorflow/lite/micro/examples/hello_world/output_handler.h"
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -34,8 +34,12 @@ TfLiteTensor* output = nullptr;
int inference_count = 0;
// Create an area of memory to use for input, output, and intermediate arrays.
// Finding the minimum value for your model may require some trial and error.
constexpr int kTensorArenaSize = 2 * 1024;
// Minimum arena size, at the time of writing. After allocating tensors
// you can retrieve this value by invoking interpreter.arena_used_bytes().
const int kModelArenaSize = 2468;
// Extra headroom for model + alignment + future interpreter changes.
const int kExtraArenaSize = 560 + 16 + 100;
const int kTensorArenaSize = kModelArenaSize + kExtraArenaSize;
uint8_t tensor_arena[kTensorArenaSize];
} // namespace
@ -60,7 +64,7 @@ void setup() {
// This pulls in all the operation implementations we need.
// NOLINTNEXTLINE(runtime-global-variables)
static tflite::ops::micro::AllOpsResolver resolver;
static tflite::AllOpsResolver resolver;
// Build an interpreter to run the model with.
static tflite::MicroInterpreter static_interpreter(

View File

@ -24,19 +24,8 @@ limitations under the License.
#include "tensorflow/lite/micro/examples/hello_world/model.h"
// We need to keep the data array aligned on some architectures.
#ifdef __has_attribute
#define HAVE_ATTRIBUTE(x) __has_attribute(x)
#else
#define HAVE_ATTRIBUTE(x) 0
#endif
#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4)))
#else
#define DATA_ALIGN_ATTRIBUTE
#endif
const unsigned char g_model[] DATA_ALIGN_ATTRIBUTE = {
// Keep model aligned to 8 bytes to guarantee aligned 64-bit accesses.
alignas(8) const unsigned char g_model[] = {
0x1c, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x12, 0x00,
0x1c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00,
0x00, 0x00, 0x18, 0x00, 0x12, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,

View File

@ -21,6 +21,8 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
namespace tflite {
namespace ops {
@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return std::max(0.0f, a);
case kTfLiteActRelu1:
return std::max(-1.0f, std::min(a, 1.0f));
return TfLiteMax(0.0f, a);
case kTfLiteActReluN1To1:
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
case kTfLiteActRelu6:
return std::max(0.0f, std::min(a, 6.0f));
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:

View File

@ -18,30 +18,82 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename Q>
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
const Q* input_data, const RuntimeShape& output_shape,
Q* output_data) {
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val < lower ? lower : val;
output_data[i] = clamped;
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
@ -77,33 +129,57 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
ReluFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<int8_t>(input),
GetTensorShape(output),
GetTensorData<int8_t>(output));
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
GetTensorData<uint8_t>(input),
GetTensorShape(output),
GetTensorData<uint8_t>(output));
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
@ -114,37 +190,62 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
}
}
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToAsymmetricQuantizedUInt8(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
Relu6Float(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
const int8_t six = FloatToAsymmetricQuantizedInt8(
6.0f, input->params.scale, input->params.zero_point);
const int8_t zero = input->params.zero_point;
Relu6Quantized<int8_t>(
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
6.0f, input->params.scale, input->params.zero_point);
const uint8_t zero = input->params.zero_point;
Relu6Quantized<uint8_t>(
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
@ -157,28 +258,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_RELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_RELU6() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -23,6 +23,8 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
@ -40,18 +42,22 @@ struct OpData {
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
// Used only for float evals:
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
@ -89,24 +95,28 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
} else if (output->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output))
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
#define TF_LITE_ADD(opname) \
reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<float>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<float>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_ADD(BroadcastAdd4DSlow);
} else {
@ -117,9 +127,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
@ -135,12 +145,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
#define TF_LITE_ADD(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output));
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
@ -160,21 +173,45 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, &data, input1, input2, output);
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -187,16 +224,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace add
TfLiteRegistration* Register_ADD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ADD() {
return {/*init=*/add::Init,
/*free=*/nullptr,
/*prepare=*/add::Prepare,
/*invoke=*/add::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/micro_utils.h"
namespace tflite {
@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxis);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis =
tflite::micro::GetEvalInput(context, node, kAxis);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
GetTensorData<axis_type>(axis), GetTensorShape(output), \
GetTensorData<output_type>(output), is_arg_max)
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<data_type>(input), \
tflite::micro::GetTensorData<axis_type>(axis), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_type>(output), \
is_arg_max)
if (axis->type == kTfLiteInt32) {
if (output->type == kTfLiteInt32) {
switch (input->type) {
@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
break;
default:
TF_LITE_KERNEL_LOG(context,
"Only float32, uint8 and int8 are "
"Only float32, uint8_t and int8_t are "
"supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TF_LITE_KERNEL_LOG(context,
"Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
TfLiteTypeGetName(axis->type));
return kTfLiteError;
}
@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace arg_min_max
TfLiteRegistration* Register_ARG_MAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ARG_MAX() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_ARG_MIN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ARG_MIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/arg_min_max::ArgMinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -32,8 +33,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
@ -43,26 +44,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace ceil
TfLiteRegistration* Register_CEIL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CEIL() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/ceil::Prepare,
/*invoke=*/ceil::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
* The circular buffer custom operator is used to implement strided streaming
@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// The circular buffer custom operator currently only supports int8.
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
// The circular buffer custom operator currently only supports int8_t.
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
// TODO(b/132070898): Use statically slotted OpData structures until a
// scratch memory API is ready.
@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
OpData* data = reinterpret_cast<OpData*>(node->user_data);
@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
int depth = output->dims->data[3];
if (input->type == kTfLiteInt8) {
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
GetTensorData<int8_t>(output));
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -25,103 +26,109 @@ namespace micro {
namespace comparisons {
namespace {
struct OpData {
ComparisonParams params;
};
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// TODO(ruic): optimize macros below to using template functions.
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
template <typename input_dtype> \
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
const TfLiteTensor* input1, \
const TfLiteTensor* input2, TfLiteTensor* output, \
bool requires_broadcast) { \
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
auto input1_offset = -input1->params.zero_point; \
auto input2_offset = -input2->params.zero_point; \
const int left_shift = 8; \
\
int32 input1_multiplier; \
int input1_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input1->params.scale), &input1_multiplier, \
&input1_shift); \
int32 input2_multiplier; \
int input2_shift; \
QuantizeMultiplierSmallerThanOneExp( \
static_cast<double>(input2->params.scale), &input2_multiplier, \
&input2_shift); \
\
ComparisonParams op_params; \
op_params.left_shift = left_shift; \
op_params.input1_offset = input1_offset; \
op_params.input1_multiplier = input1_multiplier; \
op_params.input1_shift = input1_shift; \
op_params.input2_offset = input2_offset; \
op_params.input2_multiplier = input2_multiplier; \
op_params.input2_shift = input2_shift; \
if (requires_broadcast) { \
reference_ops::Broadcast4DSlow##opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} else { \
reference_ops::opname##WithScaling( \
op_params, GetTensorShape(input1), \
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
GetTensorData<bool>(output)); \
} \
} \
}
TF_LITE_QUANTIZE_COMPARISON(Equal);
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
TF_LITE_QUANTIZE_COMPARISON(Greater);
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
TF_LITE_QUANTIZE_COMPARISON(Less);
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
#undef TF_LITE_QUANTIZE_COMPARISON
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
{ \
ComparisonParams op_params; \
requires_broadcast \
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)) \
: reference_ops::opname##NoScaling( \
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
GetTensorShape(input2), GetTensorData<type>(input2), \
GetTensorShape(output), GetTensorData<bool>(output)); \
}
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::EqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::EqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
// TODO(renjieliu): Refactor the logic to avoid duplications.
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteBool:
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<bool>(input1), input2_shape,
tflite::micro::GetTensorData<bool>(input2), output_shape,
output_data);
break;
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::NotEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::GreaterEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
bool requires_broadcast = !HaveSameShapes(input1, input2);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
bool* output_data = tflite::micro::GetTensorData<bool>(output);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32:
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<float>(input1), input2_shape,
tflite::micro::GetTensorData<float>(input2), output_shape,
output_data);
break;
case kTfLiteInt32:
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt64:
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualNoScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
output_data);
break;
case kTfLiteUInt8:
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
output_data);
break;
case kTfLiteInt8:
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
requires_broadcast);
requires_broadcast
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data)
: reference_ops::LessEqualWithScaling(
data->params, input1_shape,
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
output_data);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -291,78 +608,113 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
auto input1_offset = -input1->params.zero_point;
auto input2_offset = -input2->params.zero_point;
const int kLeftShift = 8;
int32_t input1_multiplier;
int input1_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input1->params.scale), &input1_multiplier,
&input1_shift);
int32_t input2_multiplier;
int input2_shift;
QuantizeMultiplierSmallerThanOneExp(
static_cast<double>(input2->params.scale), &input2_multiplier,
&input2_shift);
data->params.left_shift = kLeftShift;
data->params.input1_offset = input1_offset;
data->params.input1_multiplier = input1_multiplier;
data->params.input1_shift = input1_shift;
data->params.input2_offset = input2_offset;
data->params.input2_multiplier = input2_multiplier;
data->params.input2_shift = input2_shift;
}
return kTfLiteOk;
}
} // namespace comparisons
TfLiteRegistration* Register_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::EqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_NOT_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_NOT_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::NotEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_GREATER() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_GREATER() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_GREATER_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_GREATER_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::GreaterEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LESS() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LESS() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LESS_EQUAL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LESS_EQUAL() {
return {/*init=*/comparisons::Init,
/*free=*/nullptr,
/*prepare=*/comparisons::Prepare,
/*invoke=*/comparisons::LessEqualEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -18,10 +18,11 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -31,6 +32,104 @@ namespace concatenation {
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
constexpr int kOutputTensor = 0;
struct OpData {
ConcatenationParams params;
};
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllInputTensorShapes(const TfLiteContext* context,
const TfLiteNode* node,
RuntimeShape all_shapes[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
RuntimeShape shape = tflite::micro::GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllInputTensorData(const TfLiteContext* context,
const TfLiteNode* node,
T* all_data[kMaxInputNum]) {
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(node != nullptr);
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
all_data[i] = tflite::micro::GetTensorData<T>(t);
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
GetAllInputTensorShapes(context, node, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllInputTensorData(context, node, inputs_data);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
reference_ops::ConcatenationWithScaling(
data->params, inputs_shape_ptr, inputs_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// This function only checks the types. Additional shape validations are
// performed in the reference implementation called during Eval().
@ -63,125 +162,63 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_KERNEL_LOG(
context,
"Op Concatenation does not currently support num dimensions >4 "
"Tensor '%s' has %d dimensions.",
input->name, num_dimensions);
"Tensor has %d dimensions.",
num_dimensions);
return kTfLiteError;
}
}
// Calculate OpData.
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
break;
}
case kTfLiteUInt8:
case kTfLiteInt8: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
data->params.inputs_count = node->inputs->size;
float* input_scales =
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(float)));
int32_t* input_zero_points =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, node->inputs->size * sizeof(int32_t)));
// Allocate persistent scale and zeropoint buffers.
// Store input scale and zero point values in OpParams:
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
input_scales[i] = t->params.scale;
input_zero_points[i] = t->params.zero_point;
}
data->params.input_scale = input_scales;
data->params.input_zeropoint = input_zero_points;
data->params.output_zeropoint = output->params.zero_point;
data->params.output_scale = output->params.scale;
break;
}
default:
TF_LITE_KERNEL_LOG(
context, "Op Concatenation does not currently support Type '%s'.",
TfLiteTypeGetName(output_type));
return kTfLiteError;
}
return kTfLiteOk;
}
// Handles negative axis index, coerces to positive index value.
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
if (axis >= 0) {
return axis;
} else {
return NumDimensions(output_tensor) + axis;
}
}
// The following functions are helpers to get tensor data in the format that the
// reference op implementation expects. They provide the same functionality as
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
// Gets shapes from a list of tensors.
inline void GetAllTensorShapes(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
RuntimeShape all_shapes[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
RuntimeShape shape = GetTensorShape(t);
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
}
}
// Get shape pointers from a list of shapes.
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
const RuntimeShape* pointers[]) {
for (size_t i = 0; i < num; ++i) {
pointers[i] = &shapes[i];
}
}
// Gets data pointers from a list of tensors.
template <typename T>
inline void GetAllTensorData(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
T* all_data[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
all_data[i] = GetTensorData<T>(t);
}
}
// Gets scale and zero point from a list of tensors
inline void GetAllQuantizationParam(const TfLiteContext& context,
const TfLiteIntArray& tensor_list,
float scales[kMaxInputNum],
int32 zero_points[kMaxInputNum]) {
for (int i = 0; i < tensor_list.size; ++i) {
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
scales[i] = t->params.scale;
zero_points[i] = t->params.zero_point;
}
}
template <typename data_type>
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const data_type* inputs_data[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
GetTensorShape(output),
GetTensorData<data_type>(output));
}
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
// Collect the shapes and data pointer of input tensors
RuntimeShape inputs_shape[kMaxInputNum];
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
const uint8_t* inputs_data[kMaxInputNum];
float inputs_scale[kMaxInputNum];
int32 inputs_zero_point[kMaxInputNum];
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
GetAllTensorData(*context, *node->inputs, inputs_data);
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
inputs_zero_point);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteConcatenationParams* params =
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
ConcatenationParams op_params;
op_params.axis = CalculatePositiveAxis(params->axis, output);
op_params.inputs_count = NumInputs(node);
op_params.input_zeropoint = inputs_zero_point;
op_params.input_scale = inputs_scale;
op_params.output_zeropoint = output->params.zero_point;
op_params.output_scale = output->params.scale;
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
inputs_data, GetTensorShape(output),
GetTensorData<uint8>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
@ -214,16 +251,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace concatenation
TfLiteRegistration* Register_CONCATENATION() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CONCATENATION() {
return {/*init=*/concatenation::Init,
/*free=*/nullptr,
/*prepare=*/concatenation::Prepare,
/*invoke=*/concatenation::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -42,6 +43,12 @@ constexpr int kConvQuantizedDimension = 0;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
@ -109,12 +116,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -137,12 +139,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Dynimically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t),
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t),
reinterpret_cast<void**>(&data->per_channel_output_shift)));
data->per_channel_output_multiplier =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
@ -163,19 +165,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
affine_quantization->zero_point->size);
}
return CalculateOpData(context, node, params, input_width, input_height,
filter_width, filter_height, output_width,
output_height, input->type, data);
TF_LITE_ENSURE_STATUS(CalculateOpData(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
} // namespace conv
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
@ -193,24 +202,29 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.output_shift = -data.output_shift;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<uint8_t>(input), GetTensorShape(filter),
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
GetTensorData<int32_t>(bias), GetTensorShape(output),
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
GetTensorData<uint8_t>(im2col), nullptr);
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
TfLiteTensor* im2col) {
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output,
TfLiteEvalTensor* im2col) {
// TODO(b/154032858): Investigate removing extra copies.
ConvParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.input_offset = -data.input_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.dilation_height_factor = params->dilation_height_factor;
@ -222,18 +236,21 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
reference_integer_ops::ConvPerChannel(
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteConvParams* params, const OpData& data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* im2col,
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
@ -249,21 +266,31 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
reference_ops::Conv(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(filter),
GetTensorData<float>(filter), GetTensorShape(bias),
GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output), GetTensorShape(im2col),
GetTensorData<float>(im2col));
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(im2col),
tflite::micro::GetTensorData<float>(im2col));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
@ -291,16 +318,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace conv
TfLiteRegistration* Register_CONV_2D() {
static TfLiteRegistration r = {/*init=*/conv::Init,
/*free=*/nullptr,
/*prepare=*/conv::Prepare,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_CONV_2D() {
return {/*init=*/conv::Init,
/*free=*/nullptr,
/*prepare=*/conv::Prepare,
/*invoke=*/conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -42,6 +43,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
struct OpData {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
@ -95,12 +102,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -111,6 +113,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
OpData* data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
@ -120,16 +123,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
int filter_width = SizeOfDimension(filter, 2);
int filter_height = SizeOfDimension(filter, 1);
// Per channel quantization is only needed for int8 inference. For other
// Per channel quantization is only needed for int8_t inference. For other
// quantized types, only a single scale and zero point is needed.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
// Dynimically allocate per-channel quantization parameters.
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t),
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t),
reinterpret_cast<void**>(&data->per_channel_output_shift)));
data->per_channel_output_multiplier =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
@ -150,14 +153,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
affine_quantization->zero_point->size);
}
return CalculateOpData(context, node, params, width, height, filter_width,
filter_height, data_type, data);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
filter_width, filter_height, data_type,
data));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
@ -165,8 +175,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
@ -176,74 +186,87 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params,
const OpData* data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
DepthwiseParams op_params;
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.input_offset = -input->params.zero_point;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = 0;
op_params.output_offset = output->params.zero_point;
op_params.output_offset = data.output_zero_point;
// TODO(b/130439627): Use calculated value for clamping.
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
reference_integer_ops::DepthwiseConvPerChannel(
op_params, data->per_channel_output_multiplier,
data->per_channel_output_shift, GetTensorShape(input),
GetTensorData<int8>(input), GetTensorShape(filter),
GetTensorData<int8>(filter), GetTensorShape(bias),
GetTensorData<int32>(bias), GetTensorShape(output),
GetTensorData<int8>(output));
op_params, data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDepthwiseConvParams* params, const OpData* data,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
TfLiteDepthwiseConvParams* params, const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::DepthwiseParams op_params;
// Padding type is ignored, but still set.
op_params.padding_type = PaddingType::kSame;
op_params.padding_values.width = data->padding.width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params->stride_width;
op_params.stride_height = params->stride_height;
op_params.dilation_width_factor = params->dilation_width_factor;
op_params.dilation_height_factor = params->dilation_height_factor;
op_params.depth_multiplier = params->depth_multiplier;
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
op_params.input_offset = input_offset;
op_params.weights_offset = filter_offset;
op_params.output_offset = output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_multiplier = data.output_multiplier;
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data->output_shift;
op_params.output_shift = -data.output_shift;
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<uint8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@ -254,24 +277,29 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
const TfLiteTensor* bias =
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFilterTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
// TODO(aselle): Consider whether float conv and quantized conv should be
// separate ops to avoid dispatch overhead here.
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input, filter, bias, output);
EvalFloat(context, node, params, data, input, filter, bias, output);
break;
case kTfLiteInt8:
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
output);
break;
case kTfLiteUInt8:
EvalQuantized(context, node, params, &data, input, filter, bias, output);
EvalQuantized(context, node, params, data, input, filter, bias, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -283,16 +311,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace depthwise_conv
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
static TfLiteRegistration r = {/*init=*/depthwise_conv::Init,
/*free=*/nullptr,
/*prepare=*/depthwise_conv::Prepare,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
return {/*init=*/depthwise_conv::Init,
/*free=*/nullptr,
/*prepare=*/depthwise_conv::Prepare,
/*invoke=*/depthwise_conv::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -29,20 +30,17 @@ namespace micro {
namespace dequantize {
struct OpData {
tflite::DequantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t output_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -69,6 +67,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
return kTfLiteOk;
}
@ -76,28 +78,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (output->type == kTfLiteFloat32) {
tflite::DequantizationParams op_params;
op_params.zero_point = input->params.zero_point;
op_params.scale = static_cast<double>(input->params.scale);
switch (input->type) {
case kTfLiteUInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt16:
reference_ops::Dequantize(
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
@ -106,21 +111,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
} else if (output->type == kTfLiteInt32) {
int flat_size =
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output));
switch (input->type) {
case kTfLiteInt16: {
reference_ops::Requantize(
GetTensorData<int16_t>(input), flat_size, data->output_multiplier,
data->output_shift, input->params.zero_point,
output->params.zero_point, GetTensorData<int32_t>(output));
tflite::micro::GetTensorData<int16_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
case kTfLiteInt8: {
reference_ops::Requantize(
GetTensorData<int8_t>(input), flat_size, data->output_multiplier,
data->output_shift, input->params.zero_point,
output->params.zero_point, GetTensorData<int32_t>(output));
tflite::micro::GetTensorData<int8_t>(input), flat_size,
data->output_multiplier, data->output_shift,
data->quantization_params.zero_point, data->output_zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
}
default:
@ -141,16 +148,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace dequantize
TfLiteRegistration* Register_DEQUANTIZE() {
static TfLiteRegistration r = {/*init=*/dequantize::Init,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_DEQUANTIZE() {
return {/*init=*/dequantize::Init,
/*free=*/nullptr,
/*prepare=*/dequantize::Prepare,
/*invoke=*/dequantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -18,6 +18,8 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@ -40,7 +42,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (!IsSupportedType(input->type)) {
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
TfLiteTypeGetName(input->type), input->type);
@ -52,13 +54,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
template <typename T>
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
T func(T), TfLiteType expected_type) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
const int64_t num_elements = NumElements(input);
const T* in_data = GetTensorData<T>(input);
T* out_data = GetTensorData<T>(output);
for (int64_t i = 0; i < num_elements; ++i) {
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
const size_t num_elements = ElementCount(*input->dims);
const T* in_data = tflite::micro::GetTensorData<T>(input);
T* out_data = tflite::micro::GetTensorData<T>(output);
for (size_t i = 0; i < num_elements; ++i) {
out_data[i] = func(in_data[i]);
}
return kTfLiteOk;
@ -106,137 +108,103 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
return EvalLogical(context, node, [](bool v) { return !v; });
}
TfLiteStatus TANHEval(TfLiteContext* context, TfLiteNode* node) {
return EvalNumeric(context, node, std::tanh);
}
} // namespace
} // namespace elementwise
TfLiteRegistration* Register_ABS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ABS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::AbsEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SIN() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SIN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SinEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_COS() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_COS() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::CosEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOG() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::LogEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_RSQRT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RSQRT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::RsqrtEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_SQUARE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SQUARE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::SquareEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOGICAL_NOT() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
}
TfLiteRegistration* Register_TANH() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
/*invoke=*/elementwise::TANHEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOGICAL_NOT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
/*invoke=*/elementwise::LogicalNotEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Floor(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace floor
TfLiteRegistration* Register_FLOOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_FLOOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/floor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -40,6 +41,10 @@ struct OpData {
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
// Cached zero point values of tensors.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
};
constexpr int kInputTensor = 0;
@ -64,6 +69,10 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, activation, output, &data->output_activation_min,
&data->output_activation_max));
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return status;
}
@ -72,12 +81,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -93,7 +97,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
@ -102,13 +106,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const OpData& data,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
tflite::FullyConnectedParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.weights_offset = -filter->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
// TODO(b/138810107): Figure out whether output shift should be inverted
op_params.output_shift = -data.output_shift;
@ -116,20 +122,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
op_params.quantized_activation_max = data.output_activation_max;
reference_integer_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(filter), GetTensorData<int8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpData& data, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias,
TfLiteTensor* output) {
const int32_t input_offset = -input->params.zero_point;
const int32_t filter_offset = -filter->params.zero_point;
const int32_t output_offset = output->params.zero_point;
const OpData& data, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
const int32_t input_offset = -data.input_zero_point;
const int32_t filter_offset = -data.filter_zero_point;
const int32_t output_offset = data.output_zero_point;
tflite::FullyConnectedParams op_params;
op_params.input_offset = input_offset;
@ -141,12 +152,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
GetTensorShape(output), GetTensorData<output_data_type>(output))
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
reference_ops::FullyConnected( \
op_params, tflite::micro::GetTensorShape(input), \
tflite::micro::GetTensorData<uint8_t>(input), \
tflite::micro::GetTensorShape(filter), \
tflite::micro::GetTensorData<uint8_t>(filter), \
tflite::micro::GetTensorShape(bias), \
tflite::micro::GetTensorData<int32_t>(bias), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<output_data_type>(output))
switch (output->type) {
case kTfLiteUInt8:
TF_LITE_FULLY_CONNECTED(uint8_t);
@ -165,8 +180,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteFusedActivation activation,
const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output) {
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(activation, &output_activation_min,
&output_activation_max);
@ -174,10 +190,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
tflite::reference_ops::FullyConnected(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(filter), GetTensorData<float>(filter),
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
GetTensorData<float>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
@ -186,10 +206,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
@ -216,16 +240,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace fully_connected
TfLiteRegistration* Register_FULLY_CONNECTED() {
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_FULLY_CONNECTED() {
return {/*init=*/fully_connected::Init,
/*free=*/nullptr,
/*prepare=*/fully_connected::Prepare,
/*invoke=*/fully_connected::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -14,16 +14,19 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace l2norm {
namespace {
// This file has two implementation of L2Norm.
enum KernelType {
kReference,
@ -33,9 +36,15 @@ enum KernelType {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
} // namespace
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
#if defined(DEBUG)
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
L2NormalizationParams* data =
static_cast<L2NormalizationParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@ -48,29 +57,36 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
if (output->type == kTfLiteUInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
}
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
}
data->input_zero_point = input->params.zero_point;
} else if (output->type == kTfLiteFloat32) {
data->input_zero_point = 0;
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
#endif
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(L2NormalizationParams));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const L2NormalizationParams& data =
*(static_cast<const L2NormalizationParams*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
// from tensorflow, i.e., adding a params.
@ -87,39 +103,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// So we don't even need to do handle the epsilon for quantized kernel case.
const float epsilon = 1e-6f;
if (output->type == kTfLiteFloat32) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = 0; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<float>(input), GetTensorShape(output), \
GetTensorData<float>(output), epsilon)
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
epsilon);
} else if (output->type == kTfLiteUInt8) {
#define TF_LITE_L2NORM(type) \
tflite::L2NormalizationParams op_params; \
op_params.input_zero_point = input->params.zero_point; \
type::L2Normalization(op_params, GetTensorShape(input), \
GetTensorData<uint8>(input), GetTensorShape(output), \
GetTensorData<uint8>(output))
TF_LITE_L2NORM(reference_ops);
#undef TF_LITE_L2NORM
reference_ops::L2Normalization(
data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
const auto input_shape = GetTensorShape(input);
const auto output_shape = GetTensorShape(output);
const auto input_shape = tflite::micro::GetTensorShape(input);
const auto output_shape = tflite::micro::GetTensorShape(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
depth, GetTensorData<int8>(input),
GetTensorData<int8>(output));
reference_integer_ops::L2Normalization(
data.input_zero_point, outer_size, depth,
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
output->type);
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
@ -128,22 +137,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace l2norm
TfLiteRegistration* Register_L2NORM_REF() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_L2NORM_REF() {
return {/*init=*/l2norm::Init,
/*free=*/nullptr,
/*prepare=*/l2norm::Prepare,
/*invoke=*/l2norm::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_L2_NORMALIZATION() {
return Register_L2NORM_REF();
}
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
} // namespace micro
} // namespace ops

View File

@ -15,8 +15,8 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (HaveSameShapes(input1, input2)) {
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
GetTensorShape(input1), GetTensorData<bool>(input1),
GetTensorShape(input2), GetTensorData<bool>(input2),
GetTensorShape(output), GetTensorData<bool>(output), func);
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace logical
TfLiteRegistration* Register_LOGICAL_OR() {
TfLiteRegistration Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_LOGICAL_AND() {
TfLiteRegistration Register_LOGICAL_AND() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
// TfLiteRegistration.
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -44,7 +45,7 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
@ -54,6 +55,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
@ -64,18 +67,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
}
} // namespace
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateArithmeticOpData(context, node, data);
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
OpData data;
CalculateArithmeticOpData(context, node, &data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteFloat32: {
reference_ops::Logistic(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
default:
@ -88,10 +107,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
switch (output->type) {
case kTfLiteInt8: {
reference_integer_ops::Logistic(
input->params.zero_point, data.input_range_radius,
data.input_multiplier, data.input_left_shift,
NumElements(input->dims), GetTensorData<int8_t>(input),
GetTensorData<int8_t>(output));
data->input_zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
NumElements(input->dims),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
@ -113,16 +133,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_LOGISTIC() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_LOGISTIC() {
return {/*init=*/activations::LogisticInit,
/*free=*/nullptr,
/*prepare=*/activations::LogisticPrepare,
/*invoke=*/activations::LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
struct OpContext {
OpContext(TfLiteContext* context, TfLiteNode* node) {
input1 = GetInput(context, node, kInputTensor1);
input2 = GetInput(context, node, kInputTensor2);
output = GetOutput(context, node, kOutputTensor);
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
}
const TfLiteTensor* input1;
const TfLiteTensor* input2;
TfLiteTensor* output;
const TfLiteEvalTensor* input1;
const TfLiteEvalTensor* input2;
TfLiteEvalTensor* output;
};
struct MaximumOp {
@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
const OpContext& op_context) {
reference_ops::MaximumMinimumBroadcastSlow(
GetTensorShape(op_context.input1),
GetTensorData<data_type>(op_context.input1),
GetTensorShape(op_context.input2),
GetTensorData<data_type>(op_context.input2),
GetTensorShape(op_context.output),
GetTensorData<data_type>(op_context.output),
tflite::micro::GetTensorShape(op_context.input1),
tflite::micro::GetTensorData<data_type>(op_context.input1),
tflite::micro::GetTensorShape(op_context.input2),
tflite::micro::GetTensorData<data_type>(op_context.input2),
tflite::micro::GetTensorShape(op_context.output),
tflite::micro::GetTensorData<data_type>(op_context.output),
op_type::template op<data_type>);
}
@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace maximum_minimum
TfLiteRegistration* Register_MAXIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MAXIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MaximumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration* Register_MINIMUM() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MINIMUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/
maximum_minimum::Eval<maximum_minimum::kReference,
maximum_minimum::MinimumOp>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -29,59 +29,61 @@ namespace micro {
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
// registration in turn allows the linker to strip unused kernels.
TfLiteRegistration* Register_ABS();
TfLiteRegistration* Register_ADD();
TfLiteRegistration* Register_ARG_MAX();
TfLiteRegistration* Register_ARG_MIN();
TfLiteRegistration* Register_AVERAGE_POOL_2D();
TfLiteRegistration* Register_CEIL();
TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
TfLiteRegistration* Register_CONV_2D();
TfLiteRegistration* Register_CONCATENATION();
TfLiteRegistration* Register_COS();
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
TfLiteRegistration* Register_DEQUANTIZE();
TfLiteRegistration* Register_EQUAL();
TfLiteRegistration* Register_FLOOR();
TfLiteRegistration* Register_FULLY_CONNECTED();
TfLiteRegistration* Register_GREATER();
TfLiteRegistration* Register_GREATER_EQUAL();
TfLiteRegistration* Register_LESS();
TfLiteRegistration* Register_LESS_EQUAL();
TfLiteRegistration* Register_LOG();
TfLiteRegistration* Register_LOGICAL_AND();
TfLiteRegistration* Register_LOGICAL_NOT();
TfLiteRegistration* Register_LOGICAL_OR();
TfLiteRegistration* Register_LOGISTIC();
TfLiteRegistration* Register_MAXIMUM();
TfLiteRegistration* Register_MAX_POOL_2D();
TfLiteRegistration* Register_MEAN();
TfLiteRegistration* Register_MINIMUM();
TfLiteRegistration* Register_MUL();
TfLiteRegistration* Register_NEG();
TfLiteRegistration* Register_NOT_EQUAL();
TfLiteRegistration* Register_PACK();
TfLiteRegistration* Register_PAD();
TfLiteRegistration* Register_PADV2();
TfLiteRegistration* Register_PRELU();
TfLiteRegistration* Register_QUANTIZE();
TfLiteRegistration* Register_RELU();
TfLiteRegistration* Register_RELU6();
TfLiteRegistration* Register_RESHAPE();
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration* Register_ROUND();
TfLiteRegistration* Register_RSQRT();
TfLiteRegistration* Register_SIN();
TfLiteRegistration* Register_SOFTMAX();
TfLiteRegistration* Register_SPLIT();
TfLiteRegistration* Register_SQRT();
TfLiteRegistration* Register_SQUARE();
TfLiteRegistration* Register_STRIDED_SLICE();
TfLiteRegistration* Register_SUB();
TfLiteRegistration* Register_SVDF();
TfLiteRegistration* Register_UNPACK();
TfLiteRegistration* Register_L2_NORMALIZATION();
TfLiteRegistration* Register_TANH();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CONCATENATION();
TfLiteRegistration Register_COS();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_DEQUANTIZE();
TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_FULLY_CONNECTED();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
TfLiteRegistration Register_NEG();
TfLiteRegistration Register_NOT_EQUAL();
TfLiteRegistration Register_PACK();
TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();
TfLiteRegistration Register_RSQRT();
TfLiteRegistration Register_SIN();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPLIT();
TfLiteRegistration Register_SQRT();
TfLiteRegistration Register_SQUARE();
TfLiteRegistration Register_STRIDED_SLICE();
TfLiteRegistration Register_SUB();
TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_UNPACK();
TfLiteRegistration Register_L2_NORMALIZATION();
TfLiteRegistration Register_TANH();
} // namespace micro
} // namespace ops

View File

@ -21,22 +21,31 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
namespace ops {
namespace micro {
namespace mul {
namespace {
constexpr int kInput1Tensor = 0;
constexpr int kInput2Tensor = 1;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
int32_t output_zero_point;
int32_t output_multiplier;
int output_shift;
float output_activation_min_f32;
float output_activation_max_f32;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
@ -48,105 +57,155 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
double real_multiplier = static_cast<double>(input1->params.scale) *
static_cast<double>(input2->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
} else {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -input1->params.zero_point;
op_params.input2_offset = -input2->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
} // namespace
#define TF_LITE_MUL(type, opname, dtype) \
type::opname(op_params, GetTensorShape(input1), \
GetTensorData<dtype>(input1), GetTensorShape(input2), \
GetTensorData<dtype>(input2), GetTensorShape(output), \
GetTensorData<dtype>(output));
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
} else {
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
} else {
TF_LITE_MUL(reference_ops, Mul, uint8_t);
}
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
#undef TF_LITE_MUL
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpData* data,
const TfLiteTensor* input1, const TfLiteTensor* input2,
TfLiteTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
TfLiteMulParams* params, const OpData* data,
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.float_activation_min = data->output_activation_min_f32;
op_params.float_activation_max = data->output_activation_max_f32;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_MUL(opname) \
reference_ops::opname(op_params, GetTensorShape(input1), \
GetTensorData<float>(input1), GetTensorShape(input2), \
GetTensorData<float>(input2), GetTensorShape(output), \
GetTensorData<float>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
TF_LITE_MUL(BroadcastMul4DSlow);
reference_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_MUL(Mul);
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
#undef TF_LITE_MUL
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
CalculateOpData(context, node, params, &data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, params, &data, input1, input2, output);
EvalQuantized(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalFloat(context, node, params, &data, input1, input2, output);
EvalFloat(context, node, params, data, input1, input2, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -158,16 +217,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace mul
TfLiteRegistration* Register_MUL() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MUL() {
return {/*init=*/mul::Init,
/*free=*/nullptr,
/*prepare=*/mul::Prepare,
/*invoke=*/mul::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -17,7 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
// TODO(wangtz): handle for kTfLiteInt8
case kTfLiteFloat32:
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output),
GetTensorData<float>(output));
reference_ops::Negate(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace neg
TfLiteRegistration* Register_NEG() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_NEG() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/neg::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -16,7 +16,7 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
template <typename T>
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
TfLiteTensor* output, int values_count, int axis) {
TfLiteEvalTensor* output, int values_count, int axis) {
const TfLiteEvalTensor* input0 =
tflite::micro::GetEvalInput(context, node, 0);
const int dimensions = output->dims->size;
const TfLiteTensor* input0 = GetInput(context, node, 0);
const TfLiteIntArray* input_dims = input0->dims;
const TfLiteIntArray* output_dims = output->dims;
@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
}
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
T* output_data = GetTensorData<T>(output);
T* output_data = tflite::micro::GetTensorData<T>(output);
for (int i = 0; i < values_count; ++i) {
const TfLiteTensor* t = GetInput(context, node, i);
const T* input_data = GetTensorData<T>(t);
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
const T* input_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
const T* input_ptr = input_data + copy_size * k;
int loc = k * values_count * copy_size + i * copy_size;
@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLitePackParams* data =
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32: {
@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace pack
TfLiteRegistration* Register_PACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -16,189 +16,205 @@ limitations under the License.
#include <string.h>
#include "tensorflow/lite/kernels/internal/types.h"
#ifdef MEMORY_SANITIZER
#include <sanitizer/msan_interface.h>
#else
#define __msan_check_mem_is_initialized(ptr, size)
#endif
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pad {
namespace {
struct PadContext {
PadContext(TfLiteContext* context, TfLiteNode* node) {
input = GetInput(context, node, 0);
paddings = GetInput(context, node, 1);
constant_values = nullptr;
if (NumInputs(node) == 3) {
constant_values = GetOptionalInputTensor(context, node, 2);
} else {
constant_values = nullptr;
}
output = GetOutput(context, node, 0);
dims = NumDimensions(input);
resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
const int32* paddings_data = GetTensorData<int32>(paddings);
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
if (IsConstantTensor(paddings) && paddings_total == 8 &&
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
resizing_category = ResizingCategory::kImageStyle;
}
}
const TfLiteTensor* constant_values;
const TfLiteTensor* input;
const TfLiteTensor* paddings;
TfLiteTensor* output;
int dims;
ResizingCategory resizing_category;
struct OpData {
PadParams params;
int32_t output_zero_point;
};
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
PadContext op_context(context, node);
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
if (op_context.constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, op_context.input->type,
op_context.constant_values->type);
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
const TfLiteTensor* constant_values =
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(context, NumDimensions(input) <=
reference_ops::PadKernelMaxDimensionCount());
if (constant_values != nullptr) {
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
}
// There must be a pair of paddings for each output dimension.
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
op_context.output->dims->size * 2);
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
output->dims->size * 2);
// On Micro, outputs must be properly sized by the converter.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
for (int i = 0; i < op_context.output->dims->size; i++) {
int output_dim = op_context.output->dims->data[i];
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
paddings_data[i * 2 + 1];
// NOTE: This data is only available because the paddings buffer is stored in
// the flatbuffer:
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
for (int i = 0; i < output->dims->size; i++) {
int output_dim = output->dims->data[i];
int expected_dim =
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
}
// Current implementations rely on the inputs being <= 4D.
TF_LITE_ENSURE(
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
// Calculate OpData:
data->params.resizing_category = ResizingCategory::kGenericResize;
const int paddings_total = GetTensorShape(paddings).FlatSize();
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
data->params.resizing_category = ResizingCategory::kImageStyle;
}
const int num_input_dimensions = NumDimensions(input);
data->params.left_padding_count = num_input_dimensions;
data->params.right_padding_count = num_input_dimensions;
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
data->params.left_padding[idx] = paddings_data[idx * 2];
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
if (constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
} else {
TF_LITE_ENSURE(context, output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, output->params.zero_point <=
std::numeric_limits<int8_t>::max());
}
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
static_cast<double>(constant_values->params.scale));
}
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
PadContext op_context(context, node);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
if (op_context.constant_values != nullptr) {
// Ensure that constant_values is a scalar.
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
}
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, /*index=*/0);
const TfLiteEvalTensor* constant_values =
NumInputs(node) == 3
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
// Create before and after padding arrays that are accepted by the kernel.
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
tflite::PadParams op_params;
memset(&op_params, 0, sizeof(PadParams));
op_params.left_padding_count = op_context.dims;
op_params.right_padding_count = op_context.dims;
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
op_params.left_padding[idx] = paddings_data[idx * 2];
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
}
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
const scalar pad_value_copy = pad_value; \
\
type::op_name(op_params, GetTensorShape(op_context.input), \
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
GetTensorShape(op_context.output), \
GetTensorData<scalar>(op_context.output))
switch (op_context.input->type) {
switch (input->type) {
case kTfLiteFloat32: {
float pad_value = op_context.constant_values == nullptr
? 0.f
: *GetTensorData<float>(op_context.constant_values);
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
float pad_value =
constant_values == nullptr
? 0.f
: *tflite::micro::GetTensorData<float>(constant_values);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
} break;
case kTfLiteUInt8: {
uint8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<uint8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<uint8_t>::max());
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE_EQ(
context, static_cast<double>(op_context.output->params.scale),
static_cast<double>(op_context.constant_values->params.scale));
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
} break;
case kTfLiteInt8: {
int8_t pad_value;
if (op_context.constant_values == nullptr) {
// Quantized Pad requires that 0 is represented in the quantized
// range.
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
std::numeric_limits<int8_t>::min());
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
std::numeric_limits<int8_t>::max());
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
if (constant_values == nullptr) {
pad_value = static_cast<uint8_t>(data->output_zero_point);
} else {
// Quantized Pad requires that 'constant_values' is represented in the
// same quantized range as the input and output tensors.
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
op_context.constant_values->params.zero_point);
TF_LITE_ENSURE(context, op_context.output->params.scale ==
op_context.constant_values->params.scale);
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
}
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
reference_ops::PadImageStyle(
data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} break;
case kTfLiteInt32: {
int32_t pad_value =
op_context.constant_values == nullptr
constant_values == nullptr
? 0
: *GetTensorData<int32_t>(op_context.constant_values);
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
: *tflite::micro::GetTensorData<int32_t>(constant_values);
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
&pad_value, tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
TfLiteTypeGetName(op_context.input->type));
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
#undef TF_LITE_PAD
@ -207,29 +223,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace pad
TfLiteRegistration* Register_PAD() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PAD() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
// Also register Pad as PadV2.
TfLiteRegistration* Register_PADV2() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PADV2() {
return {/*init=*/pad::Init,
/*free=*/nullptr,
/*prepare=*/pad::Prepare,
/*invoke=*/pad::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
PoolParams op_params;
op_params.stride_height = params->stride_height;
@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
float activation_min, activation_max;
CalculateActivationRange(params->activation, &activation_min,
&activation_max);
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
reference_ops::MaxPool(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, OpData* data,
const TfLiteTensor* input, TfLiteTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
int32_t activation_min, activation_max;
(void)CalculateActivationRangeQuantized(context, params->activation, output,
&activation_min, &activation_max);
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = activation_min;
op_params.quantized_activation_max = activation_max;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, &data, input, output);
AverageEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, &data, input, output);
AverageEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
OpData data;
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, &data, input, output);
MaxEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, &data, input, output);
MaxEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@ -207,30 +209,57 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteRegistration* Register_MAX_POOL_2D() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
} // namespace pooling
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -15,20 +15,45 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
const TfLiteTensor* alpha,
TfLiteTensor* output, PreluParams* params) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt16) {
double real_multiplier_1 = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
&params->output_shift_1);
QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
&params->output_shift_2);
params->input_offset = -input->params.zero_point;
params->alpha_offset = -alpha->params.zero_point;
params->output_offset = output->params.zero_point;
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace
inline void BroadcastPrelu4DSlowFloat(
const RuntimeShape& unextended_input1_shape, const float* input1_data,
const RuntimeShape& unextended_input2_shape, const float* input2_data,
@ -60,51 +85,64 @@ inline void BroadcastPrelu4DSlowFloat(
}
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* alpha = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
int32_t output_multiplier_1 = 0;
int output_shift_1 = 0;
int32_t output_multiplier_2 = 0;
int output_shift_2 = 0;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
double real_multiplier_1 = static_cast<double>(input->params.scale) *
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &output_multiplier_1,
&output_shift_1);
QuantizeMultiplier(real_multiplier_2, &output_multiplier_2,
&output_shift_2);
}
return CalculatePreluParams(input, alpha, output, params);
}
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const PreluParams& params =
*(static_cast<const PreluParams*>(node->user_data));
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
switch (input->type) {
case kTfLiteFloat32: {
BroadcastPrelu4DSlowFloat(
GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(alpha), GetTensorData<float>(alpha),
GetTensorShape(output), GetTensorData<float>(output));
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<float>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
PreluParams op_params;
op_params.input_offset = -input->params.zero_point;
op_params.alpha_offset = -alpha->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier_1 = output_multiplier_1;
op_params.output_shift_1 = output_shift_1;
op_params.output_multiplier_2 = output_multiplier_2;
op_params.output_shift_2 = output_shift_2;
reference_ops::BroadcastPrelu4DSlow(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
GetTensorShape(output), GetTensorData<uint8_t>(output));
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<uint8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
reference_ops::BroadcastPrelu4DSlow(
params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(alpha),
tflite::micro::GetTensorData<int8_t>(alpha),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32 and uint8 are supported currently, got %d.",
context, "Only float32 and uint8_t are supported currently, got %d.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
@ -112,16 +150,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_PRELU() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_PRELU() {
return {/*init=*/activations::PreluInit,
/*free=*/nullptr,
/*prepare=*/activations::PreluPrepare,
/*invoke=*/activations::PreluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
@ -27,20 +28,18 @@ namespace micro {
namespace quantize {
struct OpData {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t input_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -63,18 +62,27 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
TF_LITE_ENSURE(context,
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
input->type == kTfLiteInt16 ||
input->type == kTfLiteInt8);
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16);
if (input->type == kTfLiteInt16 && output->type == kTfLiteInt8) {
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
double effective_scale =
static_cast<double>(input->params.scale / output->params.scale);
QuantizeMultiplier(effective_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
return kTfLiteOk;
}
@ -82,25 +90,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
tflite::QuantizationParams op_params;
op_params.zero_point = output->params.zero_point;
op_params.scale = static_cast<double>(output->params.scale);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
@ -111,10 +126,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
GetTensorData<int16_t>(input), size, data->output_multiplier,
data->output_shift, input->params.zero_point,
output->params.zero_point, GetTensorData<int8_t>(output));
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
@ -136,17 +177,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
// This Op (QUANTIZE) quantizes the input and produces quantized output.
// AffineQuantize takes scale and zero point and quantizes the float value to
// quantized output, in int8 or uint8 format.
TfLiteRegistration* Register_QUANTIZE() {
static TfLiteRegistration r = {/*init=*/quantize::Init,
/*free=*/nullptr,
/*prepare=*/quantize::Prepare,
/*invoke=*/quantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
// quantized output, in int8_t or uint8_t format.
TfLiteRegistration Register_QUANTIZE() {
return {/*init=*/quantize::Init,
/*free=*/nullptr,
/*prepare=*/quantize::Prepare,
/*invoke=*/quantize::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -21,6 +21,8 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@ -50,7 +52,7 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
return kTfLiteOk;
}
@ -58,7 +60,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
tflite::MeanParams* op_params) {
int i = 0;
for (; i < axis_count; ++i) {
op_params->axis[i] = static_cast<int16>(axis_data[i]);
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
}
for (; i < 4; ++i) {
op_params->axis[i] = 1;
@ -67,24 +69,25 @@ void ResolveAxis(const int* axis_data, int axis_count,
}
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
const TfLiteTensor* axis = GetInput(context, node, 1);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TfLiteReducerParams* params =
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
int num_axis = static_cast<int>(NumElements(axis));
int num_axis = static_cast<int>(ElementCount(*axis->dims));
int temp_index[kMaxNumberOfAxis];
int resolved_axis[kMaxNumberOfReducedAxis];
switch (input->type) {
case kTfLiteFloat32: {
tflite::MeanParams op_params;
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
&op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs =
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
(input->dims->size == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
@ -95,22 +98,24 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
// reference method.
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims) {
reference_ops::Mean(op_params, GetTensorShape(input),
GetTensorData<float>(input), GetTensorShape(output),
GetTensorData<float>(output));
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_ENSURE(
context,
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
input->dims->size, GetTensorData<float>(output),
output->dims->data, output->dims->size,
GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
GetTensorData<float>(output)));
reference_ops::Mean(
tflite::micro::GetTensorData<float>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<float>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis,
tflite::micro::GetTensorData<float>(output)));
}
} break;
default:
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
TF_LITE_ENSURE_MSG(context, false,
"Currently, only float32 input type "
"is supported.");
@ -119,16 +124,15 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace reduce
TfLiteRegistration* Register_MEAN() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_MEAN() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops

View File

@ -18,6 +18,9 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
@ -61,7 +64,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
num_output_elements *= output_shape->data[stretch_dim];
}
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
return kTfLiteOk;
}
@ -74,13 +77,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/162522304): storing input bytes in OpData increases some models
// significantly, possibly due to alignment issues.
size_t input_bytes;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
input_bytes *= ElementCount(*input->dims);
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
for (size_t i = 0; i < input->bytes; ++i) {
for (size_t i = 0; i < input_bytes; ++i) {
output->data.raw[i] = input->data.raw[i];
}
}
@ -89,16 +100,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace reshape
TfLiteRegistration* Register_RESHAPE() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RESHAPE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/reshape::Prepare,
/*invoke=*/reshape::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -49,8 +50,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output->type = input->type;
if (!IsConstantTensor(size)) {
TF_LITE_KERNEL_LOG(context,
"Dynamic tensors are unsupported in tfmicro.");
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
return kTfLiteError;
}
#endif
@ -61,9 +61,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* size =
tflite::micro::GetEvalInput(context, node, kSizeTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::ResizeNearestNeighborParams op_params;
op_params.align_corners = params->align_corners;
@ -71,22 +74,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int32>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int32>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int32_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} else if (output->type == kTfLiteUInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<uint8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else if (output->type == kTfLiteInt8) {
reference_ops::ResizeNearestNeighbor(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(size), GetTensorData<int32>(size),
GetTensorShape(output), GetTensorData<int8_t>(output));
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context,
"Output type is %d, requires float, uint8 or int8.",
"Output type is %d, requires float, uint8_t or int8_t.",
output->type);
return kTfLiteError;
}
@ -95,16 +107,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace resize_nearest_neighbor
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/resize_nearest_neighbor::Prepare,
/*invoke=*/resize_nearest_neighbor::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -32,8 +33,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, output->type, input->type);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
@ -43,26 +44,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
reference_ops::Round(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace round
TfLiteRegistration* Register_ROUND() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_ROUND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/round::Prepare,
/*invoke=*/round::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -42,7 +43,8 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16 softmax output does not require symmetric scaling
// NOTE: Current int16_t softmax output does not require symmetric
// scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
@ -72,60 +74,75 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
} // namespace
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
}
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
return kTfLiteOk;
}
TfLiteTensor* output = GetOutput(context, node, 0);
// Takes a tensor and performs softmax along the last dimension.
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
}
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
}
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
return CalculateSoftmaxParams(context, input, output, params, data);
}
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
SoftmaxParams op_data;
TF_LITE_ENSURE_STATUS(
CalculateSoftmaxParams(context, input, output, params, &op_data));
TFLITE_DCHECK(node->user_data != nullptr);
SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxFloat(input, output, op_data);
SoftmaxFloat(input, output, *data);
return kTfLiteOk;
}
case kTfLiteInt8:
case kTfLiteUInt8: {
SoftmaxQuantized(input, output, op_data);
SoftmaxQuantized(input, output, *data);
return kTfLiteOk;
}
default:
@ -136,16 +153,15 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
}
} // namespace activations
TfLiteRegistration* Register_SOFTMAX() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SOFTMAX() {
return {/*init=*/activations::SoftmaxInit,
/*free=*/nullptr,
/*prepare=*/activations::SoftmaxPrepare,
/*invoke=*/activations::SoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -25,10 +26,11 @@ namespace split {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int axis_value) {
const TfLiteEvalTensor* input, int axis_value) {
const int output_count = NumOutputs(node);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* output_dims = output0->dims;
const int split_dimensions = input_dims->size;
@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = GetTensorData<T>(input);
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
const int copy_size = output_dims->data[axis] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
@ -65,23 +67,28 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* axis = GetInput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 1);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
return kTfLiteOk;
}
int axis_value = GetTensorData<int32_t>(axis)[0];
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += NumDimensions(input);
axis_value += input->dims->size;
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
TF_LITE_ENSURE(context, axis_value < input->dims->size);
switch (input->type) {
case kTfLiteFloat32: {
@ -111,16 +118,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace split
TfLiteRegistration* Register_SPLIT() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SPLIT() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/split::Prepare,
/*invoke=*/split::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -15,23 +15,20 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
#include <cmath>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace strided_slice {
enum KernelType {
kReference,
// TODO(soroosh): add kGenericOptimized
};
constexpr int kInputTensor = 0;
constexpr int kBeginTensor = 1;
constexpr int kEndTensor = 2;
@ -120,64 +117,74 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context,
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
StridedSliceParams* op_params =
static_cast<StridedSliceParams*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 4);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
StridedSliceContext op_context(context, node);
TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim,
"input dim should not exceed 4");
auto params = BuildStridedSliceParams(&op_context);
memcpy(op_params, &params, sizeof(StridedSliceParams));
return CheckOutputSize(context, &op_context);
}
template <KernelType kernel_type>
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
StridedSliceContext op_context(context, node);
auto op_params = BuildStridedSliceParams(&op_context);
TFLITE_DCHECK(node->user_data != nullptr);
const StridedSliceParams& op_params =
*(static_cast<const StridedSliceParams*>(node->user_data));
#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \
kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \
GetTensorData<data_type>(op_context.input), \
GetTensorShape(op_context.output), \
GetTensorData<data_type>(op_context.output))
switch (op_context.input->type) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (output->type) {
case kTfLiteFloat32:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, float);
}
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteUInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, uint8_t);
}
reference_ops::StridedSlice(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt8:
if (kernel_type == kReference) {
TF_LITE_STRIDED_SLICE(reference_ops, int8_t);
}
reference_ops::StridedSlice(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(op_context.input->type),
op_context.input->type);
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
#undef TF_LITE_STRIDED_SLICE
return kTfLiteOk;
}
} // namespace strided_slice
TfLiteRegistration* Register_STRIDED_SLICE() {
static TfLiteRegistration r = {
/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval<strided_slice::kReference>,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_STRIDED_SLICE() {
return {/*init=*/strided_slice::Init,
/*free=*/nullptr,
/*prepare=*/strided_slice::Prepare,
/*invoke=*/strided_slice::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -21,8 +21,10 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -40,18 +42,18 @@ struct OpData {
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32 output_activation_min;
int32 output_activation_max;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32 input1_multiplier;
int32 input2_multiplier;
int32 output_multiplier;
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32 input1_offset;
int32 input2_offset;
int32 output_offset;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
@ -93,31 +95,59 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, data));
return kTfLiteOk;
}
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
const OpData* data, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output) {
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
#define TF_LITE_SUB(opname) \
opname(op_params, GetTensorShape(input1), GetTensorData<float>(input1), \
GetTensorShape(input2), GetTensorData<float>(input2), \
GetTensorShape(output), GetTensorData<float>(output))
if (data->requires_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::SubWithActivation);
tflite::reference_ops::SubWithActivation(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
#undef TF_LITE_SUB
}
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteSubParams* params, const OpData* data,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
@ -133,25 +163,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
GetTensorShape(input1), GetTensorShape(input2), &op_params);
#define TF_LITE_SUB(opname, dtype) \
opname(op_params, GetTensorShape(input1), GetTensorData<dtype>(input1), \
GetTensorShape(input2), GetTensorData<dtype>(input2), \
GetTensorShape(output), GetTensorData<dtype>(output));
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, int8_t);
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
if (need_broadcast) {
TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t);
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t);
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
#undef TF_LITE_SUB
}
return kTfLiteOk;
@ -160,13 +211,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
OpData data;
TF_LITE_ENSURE_STATUS(
CalculateOpData(context, params, input1, input2, output, &data));
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
@ -184,16 +237,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace sub
TfLiteRegistration* Register_SUB() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SUB() {
return {/*init=*/sub::Init,
/*free=*/nullptr,
/*prepare=*/sub::Prepare,
/*invoke=*/sub::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
@ -32,14 +33,18 @@ namespace svdf {
namespace {
struct OpData {
int32 effective_scale_1_a;
int32 effective_scale_2_a;
int32_t effective_scale_1_a;
int32_t effective_scale_2_a;
// b versions of each scale are kept at int since the numbers are just the
// shift value - typically between [-32, 32].
int effective_scale_1_b;
int effective_scale_2_b;
int scratch_tensor_index;
int scratch_output_tensor_index;
// Cached tensor zero point values for quantized operations.
int input_zero_point;
int output_zero_point;
};
/**
@ -114,11 +119,11 @@ static inline void ApplyTimeWeightsBiasAndActivation(
}
inline void EvalFloatSVDF(
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
int scratch_tensor_index, TfLiteTensor* activation_state,
TfLiteTensor* output) {
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* weights_feature,
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
const TfLiteSVDFParams* params, int scratch_tensor_index,
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
const int rank = params->rank;
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
@ -126,12 +131,14 @@ inline void EvalFloatSVDF(
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
const float* weights_feature_ptr = GetTensorData<float>(weights_feature);
const float* weights_time_ptr = GetTensorData<float>(weights_time);
const float* bias_ptr = GetTensorData<float>(bias);
const float* input_ptr = GetTensorData<float>(input);
const float* weights_feature_ptr =
tflite::micro::GetTensorData<float>(weights_feature);
const float* weights_time_ptr =
tflite::micro::GetTensorData<float>(weights_time);
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
float* state_ptr = GetTensorData<float>(activation_state);
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
@ -139,7 +146,7 @@ inline void EvalFloatSVDF(
float* scratch_ptr = static_cast<float*>(
context->GetScratchBuffer(context, scratch_tensor_index));
float* output_ptr = GetTensorData<float>(output);
float* output_ptr = tflite::micro::GetTensorData<float>(output);
// Left shift the activation_state.
{
@ -185,14 +192,13 @@ inline void EvalFloatSVDF(
}
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input_tensor,
const TfLiteTensor* weights_feature_tensor,
const TfLiteTensor* weights_time_tensor,
const TfLiteTensor* bias_tensor,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteTensor* activation_state_tensor,
TfLiteTensor* output_tensor, const OpData& data,
int32_t input_zp, int32_t output_zp) {
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor, const OpData& data) {
const int n_rank = params->rank;
const int n_batch = input_tensor->dims->data[0];
const int n_input = input_tensor->dims->data[1];
@ -209,7 +215,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
// Shift states.
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
int16_t* const state_ptr =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
// Left shift the activation_state.
{
@ -225,10 +232,11 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
// Feature matmul.
{
int16_t* state = GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = GetTensorData<int8_t>(input_tensor);
int16_t* state =
tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
const int8_t* weight_feature =
GetTensorData<int8_t>(weights_feature_tensor);
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
const int32_t output_max = std::numeric_limits<int16_t>::max();
const int32_t output_min = std::numeric_limits<int16_t>::min();
int16_t* result_in_batch = state + (n_memory - 1);
@ -238,7 +246,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t dot_prod = 0;
const int8_t* vector_in_batch = input + b * n_input;
for (int c = 0; c < n_input; c++) {
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
dot_prod +=
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
}
dot_prod = MultiplyByQuantizedMultiplier(
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
@ -261,9 +270,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Perform batched vector dot product:
const int16_t* vector1_ptr = GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector1_ptr =
tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
const int16_t* vector2_ptr =
GetTensorData<int16_t>(activation_state_tensor) +
tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
b * n_memory * n_filter;
for (int i = 0; i < n_filter; i++) {
@ -281,7 +291,8 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
// Add bias.
if (bias_tensor) {
// Vector batch assign:
const int32_t* bias_data = GetTensorData<int32_t>(bias_tensor);
const int32_t* bias_data =
tflite::micro::GetTensorData<int32_t>(bias_tensor);
for (int i = 0; i < n_batch; ++i) {
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
const int32_t* bias_ptr = bias_data;
@ -316,9 +327,10 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
int32_t x1 = scratch_output_tensor[i];
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
data.effective_scale_2_b);
int32_t x3 = x2 + output_zp;
int32_t x3 = x2 + data.output_zero_point;
int32_t x4 = std::min(std::max(output_min, x3), output_max);
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
static_cast<int8_t>(x4);
}
}
}
@ -338,12 +350,7 @@ constexpr int kOutputTensor = 0;
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
void* data = nullptr;
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
kTfLiteError) {
return nullptr;
}
return data;
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@ -382,7 +389,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
// Validate Tensor Output:
// [0] = float/int8, {2, batch_size, num_units}
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
@ -408,9 +415,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
memory_size * num_filters);
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
@ -419,35 +431,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
const auto* input_params =
reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
const auto* weights_feature_params =
static_cast<const TfLiteAffineQuantization*>(
weights_feature->quantization.params);
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
activation_state->quantization.params);
const auto* weight_time_params =
static_cast<const TfLiteAffineQuantization*>(
weights_time->quantization.params);
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
output->quantization.params);
const double effective_scale_1 = static_cast<double>(
input_params->scale->data[0] * weights_feature_params->scale->data[0] /
state_params->scale->data[0]);
const double effective_scale_2 = static_cast<double>(
state_params->scale->data[0] * weight_time_params->scale->data[0] /
output_params->scale->data[0]);
input->params.scale * weights_feature->params.scale /
activation_state->params.scale);
const double effective_scale_2 =
static_cast<double>(activation_state->params.scale *
weights_time->params.scale / output->params.scale);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
TF_LITE_ENSURE(
context,
std::abs(static_cast<double>(bias->params.scale) -
static_cast<double>(activation_state->params.scale *
weights_time->params.scale)) < 1e-5);
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
&(data->effective_scale_1_b));
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
&(data->effective_scale_2_b));
data->input_zero_point = input->params.zero_point;
data->output_zero_point = output->params.zero_point;
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
@ -467,10 +474,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
}
TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
@ -484,20 +488,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* weights_feature =
GetInput(context, node, kWeightsFeatureTensor);
const TfLiteTensor* weights_time =
GetInput(context, node, kWeightsTimeTensor);
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
TfLiteTensor* activation_state =
GetVariableInput(context, node, kInputActivationStateTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData& data = *(static_cast<const OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* weights_feature =
tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
const TfLiteEvalTensor* weights_time =
tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 5)
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
: nullptr;
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
context, node, kInputActivationStateTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (weights_feature->type) {
case kTfLiteFloat32: {
EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
@ -508,11 +516,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
case kTfLiteInt8: {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
params, activation_state, output, data,
input->params.zero_point, output->params.zero_point);
params, activation_state, output, data);
return kTfLiteOk;
break;
}
@ -527,16 +532,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace svdf
TfLiteRegistration* Register_SVDF() {
static TfLiteRegistration r = {/*init=*/svdf::Init,
/*free=*/nullptr,
/*prepare=*/svdf::Prepare,
/*invoke=*/svdf::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_SVDF() {
return {/*init=*/svdf::Init,
/*free=*/nullptr,
/*prepare=*/svdf::Prepare,
/*invoke=*/svdf::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -17,6 +17,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
@ -28,14 +29,16 @@ constexpr int kInputTensor = 0;
template <typename T>
TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input, int output_count, int axis) {
const TfLiteTensor* output0 = GetOutput(context, node, 0);
const TfLiteEvalTensor* input, int output_count,
int axis) {
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteIntArray* output_dims = output0->dims;
const int dimensions = input_dims->size;
if (axis < 0) {
axis += NumDimensions(input);
axis += input->dims->size;
}
TFLITE_DCHECK_LT(axis, dimensions);
@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
}
TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
const T* input_data = GetTensorData<T>(input);
const T* input_data = tflite::micro::GetTensorData<T>(input);
for (int i = 0; i < output_count; ++i) {
TfLiteTensor* t = GetOutput(context, node, i);
T* output_data = GetTensorData<T>(t);
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
for (int k = 0; k < outer_size; ++k) {
T* output_ptr = output_data + copy_size * k;
int loc = k * output_count * copy_size + i * copy_size;
@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteUnpackParams* data =
reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@ -101,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
} // namespace
} // namespace unpack
TfLiteRegistration* Register_UNPACK() {
static TfLiteRegistration r = {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
return &r;
TfLiteRegistration Register_UNPACK() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/unpack::Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro

View File

@ -15,9 +15,15 @@ limitations under the License.
#include "tensorflow/lite/micro/memory_helpers.h"
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
@ -40,8 +46,7 @@ size_t AlignSizeUp(size_t size, size_t alignment) {
return aligned_size;
}
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter) {
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
switch (type) {
case kTfLiteFloat32:
*size = sizeof(float);
@ -67,9 +72,10 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
case kTfLiteComplex64:
*size = sizeof(float) * 2;
break;
case kTfLiteComplex128:
*size = sizeof(double) * 2;
break;
default:
reporter->Report("Type %s (%d) not is not supported",
TfLiteTypeGetName(type), type);
return kTfLiteError;
}
return kTfLiteOk;
@ -79,17 +85,71 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter) {
int element_count = 1;
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
// so has 1 element.
if (flatbuffer_tensor.shape() != nullptr) {
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
}
}
TfLiteType tf_lite_type;
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&tf_lite_type, error_reporter));
TF_LITE_ENSURE_STATUS(
TfLiteTypeSizeOf(tf_lite_type, type_size, error_reporter));
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
*bytes = element_count * (*type_size);
return kTfLiteOk;
}
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes) {
TFLITE_DCHECK(out_bytes != nullptr);
int element_count = 1;
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
if (eval_tensor->dims != nullptr) {
for (int n = 0; n < eval_tensor->dims->size; ++n) {
element_count *= eval_tensor->dims->data[n];
}
}
size_t type_size;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
*out_bytes = element_count * type_size;
return kTfLiteOk;
}
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteTensor* input = nullptr;
TF_LITE_ENSURE(context, input1->dims != nullptr);
TF_LITE_ENSURE(context, input2->dims != nullptr);
TF_LITE_ENSURE(context, output->dims->size == 0);
input = input1->dims->size > input2->dims->size ? input1 : input2;
TF_LITE_ENSURE(context, output->type == input->type);
size_t size = 0;
TfLiteTypeSizeOf(input->type, &size);
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
for (int i = 0; i < dimensions_count; i++) {
size *= input->dims->data[i];
}
output->bytes = size;
output->dims =
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
context, TfLiteIntArrayGetSizeInBytes(size)));
output->dims->size = input->dims->size;
for (int i = 0; i < dimensions_count; i++) {
output->dims->data[i] = input->dims->data[i];
}
return kTfLiteOk;
}
} // namespace tflite

View File

@ -15,6 +15,9 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -31,14 +34,26 @@ uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size,
ErrorReporter* reporter);
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
// returned in out_bytes.
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes);
// Deduce output dimensions from input and allocate given size.
// Useful for operators with two inputs where the largest input should equal the
// output dimension.
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
current->offline_offset = kOnlinePlannedBuffer;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used, int offline_offset) {
BufferRequirements* current = &requirements_[buffer_count_];
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
kTfLiteOk) {
return kTfLiteError;
}
current->offline_offset = offline_offset;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[0];
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
} else {
if (start->next_entry_index == -1) {
return nullptr;
@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning.
// gaps with small buffers at the beginning. Add offline planned offsets
// first in the list, since they have a predetermined offset.
int idx_from_tail = buffer_count_;
int idx_from_head = 0;
for (int i = 0; i < buffer_count_; ++i) {
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
buffer_ids_sorted_by_size_[i] = i;
buffer_offsets_[i] = -1;
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
idx_from_tail--;
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
buffer_ids_sorted_[idx_from_tail] = i;
buffer_offsets_[i] = -1;
} else {
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
buffer_ids_sorted_[idx_from_head] = i;
buffer_offsets_[i] = requirements_[i].offline_offset;
idx_from_head++;
}
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers.
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
buffer_count_);
// Put the largest buffer at offset zero to start the process.
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
first_entry->offset = 0;
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
first_entry->next_entry_index = -1;
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers. Do not sort the offline planned offsets.
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
&buffer_ids_sorted_[idx_from_head],
buffer_count_ - idx_from_head);
// Initialize the first entry to the first buffer in
// buffer_ids_sorted_.
// - If there are no offline planned offsets, the largest buffer will be
// first, and the buffers will be handled in size order.
// - If offline offsets are present, these will be handled first in order
// for the greedy algorithm to utilized gaps in the offline plan.
first_entry_index_ = 0;
next_free_entry_ = 1;
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
first_entry->next_entry_index = -1; // to mark the entry as end of list
int buffer_id = buffer_ids_sorted_[0];
first_entry->requirements_index = buffer_id;
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
buffer_offsets_[buffer_id] = 0;
}
first_entry->offset = buffer_offsets_[buffer_id];
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
const int buffer_id = buffer_ids_sorted_by_size_[i];
buffer_id = buffer_ids_sorted_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
@ -168,37 +203,43 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
ListEntry* prior_entry = nullptr;
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
ListEntry* prior_entry = nullptr;
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
}
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
} else {
// Offline planned offset are to be considered constant
candidate_offset = wanted_requirements->offline_offset;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
@ -212,26 +253,36 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the ordered
// list.
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
if (first_entry->offset > candidate_offset) {
// The new entry offset is smaller than the first entry offset =>
// replace the first entry
first_entry = new_entry;
first_entry->next_entry_index = first_entry_index_;
first_entry_index_ = new_entry_index;
} else {
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the
// buffer-offset-ordered list
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
}
// not at the end of the list -> take a look at next entry
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
}
}
@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[0];
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =

View File

@ -21,6 +21,8 @@ limitations under the License.
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset);
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
int* buffer_sizes_sorted_by_size_;
int* buffer_ids_sorted_by_size_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
b/160894903
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@ -15,9 +15,14 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -26,16 +31,21 @@ namespace tflite {
// Namespace used for unittests.
namespace internal {
// Sets up all of the data structure members for a runtime tensor
// based on the contents of a serialized tensor.
TfLiteStatus InitializeRuntimeTensor(
SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
// Sets up all of the data structure members for a TfLiteTensor based on the
// contents of a serialized tensor in the flatbuffer.
// TODO(b/160894903): Once all kernels have been updated to the new
// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
// flatbuffer quantization or dimension allocations to take place in either the
// temp or tail section of the arena.
TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
SimpleMemoryAllocator* allocator, bool allocate_temp,
const tflite::Tensor& flatbuffer_tensor,
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
ErrorReporter* error_reporter, TfLiteTensor* result);
// A handle tracking scratch buffer allocation. This handle is created by
// `RequestScratchBufferInArena`. `data` field is populated in
// `FinishTensorAllocation` after static memory planning.
// `FinishModelAllocation` after static memory planning.
// TODO(b/150257460) As a future optimization, this struct could be replaced by
// a union, since once `data` is populated, `bytes` and `node_idx` is not
// needed.
@ -59,7 +69,17 @@ typedef struct {
// Allocator responsible for allocating memory for all intermediate tensors
// necessary to invoke a model.
//
// The lifetime of the model, tensor arena and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
//
// The MicroAllocator simply plans out additional allocations that are required
// to standup a model for inference in TF Micro. This class currently relies on
// an additional allocator - SimpleMemoryAllocator - for all allocations from an
// arena. These allocations are divided into head (non-persistent) and tail
// (persistent) regions:
//
// Memory layout to help understand how it works
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
@ -72,42 +92,73 @@ typedef struct {
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
public:
// The lifetime of the model, tensor allocator and error reporter must be at
// least as long as that of the allocator object, since the allocator needs
// them to be accessible during its entire lifetime.
// Creates a MicroAllocator instance from a given tensor arena. This arena
// will be managed by the created instance.
// Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
// bytes aligned, otherwise some head room will be wasted.
MicroAllocator(TfLiteContext* context, const Model* model,
uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// TODO(b/157615197): Cleanup constructor + factory usage.
static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
ErrorReporter* error_reporter);
// Runs through the model and allocates all necessary input, output and
// intermediate tensors.
// WARNING: doing any allocation after calling this method has the risk of
// corrupting tensor data so this method should be the last non-const method
// called in this class.
TfLiteStatus FinishTensorAllocation();
// Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
// intance. This allocator instance will use the SimpleMemoryAllocator
// instance to manage allocations internally.
static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
// Returns the arena usage in bytes, only available after
// `FinishTensorAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
// Begin allocating internal resources required for model inference.
// This method will run through the flatbuffer data supplied in the model to
// properly allocate tensor, node, and op registration data. This method is
// expected to be followed with a call to FinishModelAllocation() before
// resuming allocation with another model. All persistent tensor buffers are
// stored in the out-param eval_tensors. This value is allocated from the
// persistent memory arena and will be used to host runtime tensor buffers.
TfLiteStatus StartModelAllocation(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration** node_and_registrations,
TfLiteEvalTensor** eval_tensors);
// Run through the model to allocate nodes and registrations. We need to keep
// them for the entire life time of the model to allow persistent tensors.
// This method needs to be called before FinishTensorAllocation method.
TfLiteStatus AllocateNodeAndRegistrations(
const OpResolver& op_resolver,
NodeAndRegistration** node_and_registrations);
// Finish allocating internal resources required for model inference.
// This method will plan non-persistent buffers and commit a memory plan to
// the 'head' section of the memory arena. All variable tensor data will also
// be allocated. This method should be called after assigning model resources
// in StartModelAllocation(). The eval_tensors pointer should be the value
// passed into this class during StartModelAllocation().
TfLiteStatus FinishModelAllocation(const Model* model,
TfLiteEvalTensor* eval_tensors);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// persistent arena memory is only guaranteed for the lifetime of the
// application. The eval_tensors pointer should be the value passed into this
// class during StartModelAllocation() and contains the source-of-truth for
// buffers.
virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Allocates a TfLiteTensor struct and populates the returned value with
// properties from the model flatbuffer. This struct is allocated from
// temporary arena memory is only guaranteed until a call is made to
// ResetTempAllocations(). The eval_tensors pointer should be the value passed
// into this class during StartModelAllocation() and contains the
// source-of-truth for buffers.
virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
TfLiteEvalTensor* eval_tensors,
int tensor_index);
// Resets all temporary allocations. This method should be called after a
// chain of temp allocations (e.g. chain of TfLiteTensor objects via
// AllocateTfLiteTensor()).
virtual void ResetTempAllocations();
// Allocates persistent buffer which has the same life time as the allocator.
// The memory is immediately available and is allocated from the tail of the
// arena.
TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr);
void* AllocatePersistentBuffer(size_t bytes);
// Register a scratch buffer of size `bytes` for Node with `node_id`.
// This method only allocates a BufferHandle holding information for memory
// planning. The buffer ptr is ready after `FinishTensorAllocation` and can
// planning. The buffer ptr is ready after `FinishModelAllocation` and can
// be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
// Note that there should be no tail allocation between two consecutive
// `RequestScratchBufferInArena` calls.
@ -116,16 +167,74 @@ class MicroAllocator {
// Returns the pointer to the planned scratch buffer.
void* GetScratchBuffer(int buffer_idx) const;
private:
TfLiteStatus Init();
// Returns the arena usage in bytes, only available after
// `FinishModelAllocation`. Otherwise, it will return 0.
size_t used_bytes() const;
const Model* model_;
// A simple memory allocator that always allocate from the arena tail.
protected:
MicroAllocator(SimpleMemoryAllocator* memory_allocator,
ErrorReporter* error_reporter);
virtual ~MicroAllocator();
// Allocates an array in the arena to hold pointers to the node and
// registration pointers required to represent the inference graph of the
// model.
virtual TfLiteStatus AllocateNodeAndRegistrations(
const Model* model, NodeAndRegistration** node_and_registrations);
// Populates node and registration pointers representing the inference graph
// of the model from values inside the flatbuffer (loaded from the TfLiteModel
// instance). Persistent data (e.g. operator data) is allocated from the
// arena.
virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
const Model* model, const MicroOpResolver& op_resolver,
NodeAndRegistration* node_and_registrations);
// Allocates the list of persistent TfLiteEvalTensors that are used for the
// "eval" phase of model inference. These structs will be the source of truth
// for all tensor buffers. Allocation results are stored in the out-param
// eval_tensors.
virtual TfLiteStatus AllocateTfLiteEvalTensors(
const Model* model, TfLiteEvalTensor** eval_tensors);
// Allocates persistent tensor buffers for variable tensors in the subgraph.
virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this method. It is only used to record TfLiteTensor persistent allocations.
virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
// Populates a TfLiteTensor struct with data from the model flatbuffer. Any
// quantization data is allocated from either the tail (persistent) or temp
// sections of the arena based on the allocation flag.
// TODO(b/160894903): Once all kernels have been updated to the new API drop
// this function since all allocations for quantized data will take place in
// the temp section.
virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
int tensor_index, bool allocate_temp);
ErrorReporter* error_reporter() const;
// Returns the first subgraph from the model.
const SubGraph* GetSubGraphFromModel(const Model* model);
private:
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena. The eval_tensors pointer is the list of
// pre-allocated TfLiteEvalTensor structs that will point to the buffers that
// will be allocated into the head section in this function call.
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
const SubGraph* subgraph,
TfLiteEvalTensor* eval_tensors);
// A simple memory allocator that always allocate from the arena tail or head.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
TfLiteContext* context_;
// Indicating if the allocator is ready for allocation.
bool active_ = false;
bool model_is_allocating_;
// In reverse order for efficiency.
// i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
@ -134,9 +243,7 @@ class MicroAllocator {
// How many scratch buffers have been allocated.
size_t scratch_buffer_count_ = 0;
const SubGraph* subgraph_;
const flatbuffers::Vector<flatbuffers::Offset<Operator>>* operators_;
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite

View File

@ -15,7 +15,10 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <cstdarg>
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include "tensorflow/lite/micro/debug_log.h"
#include "tensorflow/lite/micro/micro_string.h"
#endif

View File

@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Minor modifications made for Espruino Microcontroller build by Gordon Williams <gw@pur3.co.uk>
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include <cstdarg>
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/debug_log.h"
namespace tflite {
@ -25,8 +27,6 @@ class MicroErrorReporter : public ErrorReporter {
public:
~MicroErrorReporter() override {}
int Report(const char* format, va_list args) override;
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};

View File

@ -15,15 +15,16 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
#include <cstdarg>
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/debug_log.h"
namespace tflite {
class MicroErrorReporter : public ErrorReporter {
public:
~MicroErrorReporter() {}
~MicroErrorReporter() override {}
int Report(const char* format, va_list args) override;
private:

View File

@ -14,16 +14,24 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/micro_interpreter.h"
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/tensor_utils.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
return registration->custom_name;
@ -31,15 +39,20 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
}
}
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
} // namespace
namespace internal {
TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes, void** ptr) {
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator, const Model* model)
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
size_t bytes) {
return reinterpret_cast<ContextHelper*>(ctx->impl_)
->allocator_->AllocatePersistentBuffer(bytes, ptr);
->allocator_->AllocatePersistentBuffer(bytes);
}
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
@ -57,55 +70,72 @@ void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
void ContextHelper::ReportOpError(struct TfLiteContext* context,
const char* format, ...) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
va_list args;
va_start(args, format);
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
va_end(args);
#endif
}
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
int tensor_idx) {
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
return helper->allocator_->AllocateTempTfLiteTensor(
helper->model_, helper->eval_tensors_, tensor_idx);
}
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
const struct TfLiteContext* context, int tensor_idx) {
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
return &helper->eval_tensors_[tensor_idx];
}
void ContextHelper::SetNodeIndex(int idx) { current_node_idx_ = idx; }
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
eval_tensors_ = eval_tensors;
}
} // namespace internal
MicroInterpreter::MicroInterpreter(const Model* model,
const OpResolver& op_resolver,
const MicroOpResolver& op_resolver,
uint8_t* tensor_arena,
size_t tensor_arena_size,
ErrorReporter* error_reporter)
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(&context_, model_, tensor_arena, tensor_arena_size,
error_reporter_),
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
error_reporter)),
tensors_allocated_(false),
context_helper_(error_reporter_, &allocator_) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.recommended_num_threads = 1;
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < tensors_size(); ++t) {
TfLiteTensor* thisTensor = &context_.tensors[t];
if (thisTensor->allocation_type == kTfLiteMmapRo)
CorrectTensorEndianness(thisTensor);
}
}
initialization_status_ = kTfLiteOk;
MicroInterpreter::MicroInterpreter(const Model* model,
const MicroOpResolver& op_resolver,
MicroAllocator* allocator,
ErrorReporter* error_reporter,
tflite::Profiler* profiler)
: model_(model),
op_resolver_(op_resolver),
error_reporter_(error_reporter),
allocator_(*allocator),
tensors_allocated_(false),
initialization_status_(kTfLiteError),
eval_tensors_(nullptr),
context_helper_(error_reporter_, &allocator_, model),
input_tensor_(nullptr),
output_tensor_(nullptr) {
Init(profiler);
}
MicroInterpreter::~MicroInterpreter() {
@ -123,7 +153,28 @@ MicroInterpreter::~MicroInterpreter() {
}
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
void MicroInterpreter::Init(tflite::Profiler* profiler) {
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
model_->subgraphs();
if (subgraphs->size() != 1) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Only 1 subgraph is currently supported.\n");
initialization_status_ = kTfLiteError;
return;
}
subgraph_ = (*subgraphs)[0];
context_.impl_ = static_cast<void*>(&context_helper_);
context_.ReportError = context_helper_.ReportOpError;
context_.GetTensor = context_helper_.GetTensor;
context_.GetEvalTensor = context_helper_.GetEvalTensor;
context_.recommended_num_threads = 1;
context_.profiler = profiler;
initialization_status_ = kTfLiteOk;
}
void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
int32_t tensorSize = 1;
for (int d = 0; d < tensorCorr->dims->size; ++d)
tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
@ -147,6 +198,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
case TfLiteType::kTfLiteComplex64:
CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize);
break;
case TfLiteType::kTfLiteComplex128:
CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize);
break;
default:
// Do nothing for other data types.
break;
@ -161,8 +215,42 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
}
TfLiteStatus MicroInterpreter::AllocateTensors() {
TF_LITE_ENSURE_OK(&context_, allocator_.AllocateNodeAndRegistrations(
op_resolver_, &node_and_registrations_));
if (allocator_.StartModelAllocation(model_, op_resolver_,
&node_and_registrations_,
&eval_tensors_) != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed starting model allocation.\n");
initialization_status_ = kTfLiteError;
return kTfLiteError;
}
// Update the pointer now that TfLiteEvalTensor allocation has completed on
// the context helper.
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
// into the interpreter.
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
// If the system is big endian then convert weights from the flatbuffer from
// little to big endian on startup so that it does not need to be done during
// inference.
// NOTE: This requires that the flatbuffer is held in memory which can be
// modified by this process.
if (!FLATBUFFERS_LITTLEENDIAN) {
for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
if (auto* buffer =
(*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
// If we've found a buffer, does it have any data?
if (auto* array = buffer->data()) {
// If it has any data, is the data size larger than zero?
if (array->size()) {
// Update the endianness of the corresponding eval tensor since that
// struct holds the buffer used at inference time.
CorrectTensorEndianness(&eval_tensors_[t]);
}
}
}
}
}
// Only allow AllocatePersistentBuffer in Init stage.
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
@ -189,8 +277,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
}
context_helper_.SetNodeIndex(-1);
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
// in Prepare stage.
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
// available in Prepare stage.
context_.RequestScratchBufferInArena =
context_helper_.RequestScratchBufferInArena;
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
@ -208,6 +296,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
return kTfLiteError;
}
}
allocator_.ResetTempAllocations();
}
context_helper_.SetNodeIndex(-1);
@ -217,7 +306,10 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
context_.RequestScratchBufferInArena = nullptr;
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
TF_LITE_ENSURE_OK(&context_,
allocator_.FinishModelAllocation(model_, eval_tensors_));
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
tensors_allocated_ = true;
return kTfLiteOk;
}
@ -240,7 +332,23 @@ TfLiteStatus MicroInterpreter::Invoke() {
auto* registration = node_and_registrations_[i].registration;
if (registration->invoke) {
TfLiteStatus invoke_status = registration->invoke(&context_, node);
TfLiteStatus invoke_status;
#ifndef NDEBUG // Omit profiler overhead from release builds.
// The case where profiler == nullptr is handled by
// ScopedOperatorProfile.
tflite::Profiler* profiler =
reinterpret_cast<tflite::Profiler*>(context_.profiler);
ScopedOperatorProfile scoped_profiler(
profiler, OpNameFromRegistration(registration), i);
#endif
invoke_status = registration->invoke(&context_, node);
// All TfLiteTensor structs used in the kernel are allocated from temp
// memory in the allocator. This creates a chain of allocations in the
// temp section. The call below resets the chain of allocations to
// prepare for the next call.
allocator_.ResetTempAllocations();
if (invoke_status == kTfLiteError) {
TF_LITE_REPORT_ERROR(
error_reporter_,
@ -257,50 +365,82 @@ TfLiteStatus MicroInterpreter::Invoke() {
TfLiteTensor* MicroInterpreter::input(size_t index) {
const size_t length = inputs_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Input index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[inputs().Get(index)]);
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Input tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
inputs().Get(index));
}
if (input_tensor_ == nullptr) {
input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, inputs().Get(index));
}
return input_tensor_;
}
TfLiteTensor* MicroInterpreter::output(size_t index) {
const size_t length = outputs_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Output index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &(context_.tensors[outputs().Get(index)]);
if (index != 0) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Output tensors not at index 0 are allocated from the "
"persistent memory arena. Repeat calls will cause excess "
"allocation!");
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
outputs().Get(index));
}
if (output_tensor_ == nullptr) {
// TODO(b/160894903): This API will allocate TfLiteTensor structs from
// persistent (tail) memory and cache on this pointer.
output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
model_, eval_tensors_, outputs().Get(index));
}
return output_tensor_;
}
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
const size_t length = tensors_size();
if ((index < 0) || (index >= length)) {
if (index >= length) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Tensor index %d out of range (length is %d)", index,
length);
return nullptr;
}
return &context_.tensors[index];
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
index);
}
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
const size_t length = tensors_size();
for (size_t i = 0; i < length; ++i) {
TfLiteTensor* cur_tensor = tensor(i);
if (cur_tensor->is_variable) {
TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
if (status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to reset variable tensor at index: %d", i);
return status;
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
auto* tensor = subgraph_->tensors()->Get(i);
if (tensor->is_variable()) {
size_t buffer_size;
TF_LITE_ENSURE_STATUS(
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
int value = 0;
if (tensor->type() == tflite::TensorType_INT8) {
value = tensor->quantization()->zero_point()->Get(0);
}
memset(eval_tensors_[i].data.raw, value, buffer_size);
}
}
return kTfLiteOk;
}

View File

@ -15,13 +15,18 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_INTERPRETER_H_
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/portable_type_to_tflitetype.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/type_to_tflitetype.h"
namespace tflite {
@ -30,29 +35,36 @@ namespace internal {
// A helper class to encapsulate the implementation of APIs in Context.
// context->impl_ points to an instance of this class.
// Check tensorflow/lite/c/common.h for detailed descriptions.
// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
class ContextHelper {
public:
explicit ContextHelper(ErrorReporter* error_reporter,
MicroAllocator* allocator)
: allocator_(allocator), error_reporter_(error_reporter) {}
static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
void** ptr);
MicroAllocator* allocator, const Model* model);
// Functions that will be assigned to function pointers on TfLiteContext:
static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
size_t bytes,
int* buffer_idx);
static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_idx);
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
int tensor_idx);
void SetNodeIndex(int idx) { current_node_idx_ = idx; }
// Sets the current node index to assist with scratch buffer allocations:
void SetNodeIndex(int idx);
// Sets the pointer to a list of TfLiteEvalTensor instances.
void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
private:
MicroAllocator* allocator_;
ErrorReporter* error_reporter_;
const Model* model_;
TfLiteEvalTensor* eval_tensors_;
int current_node_idx_ = -1;
};
@ -60,17 +72,26 @@ class ContextHelper {
class MicroInterpreter {
public:
// The lifetime of the model, op resolver, tensor arena, and error reporter
// must be at least as long as that of the interpreter object, since the
// interpreter may need to access them at any time. This means that you should
// usually create them with the same scope as each other, for example having
// them all allocated on the stack as local variables through a top-level
// function.
// The interpreter doesn't do any deallocation of any of the pointed-to
// objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const OpResolver& op_resolver,
// The lifetime of the model, op resolver, tensor arena, error reporter and
// profiler must be at least as long as that of the interpreter object, since
// the interpreter may need to access them at any time. This means that you
// should usually create them with the same scope as each other, for example
// having them all allocated on the stack as local variables through a
// top-level function. The interpreter doesn't do any deallocation of any of
// the pointed-to objects, ownership remains with the caller.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
uint8_t* tensor_arena, size_t tensor_arena_size,
ErrorReporter* error_reporter);
ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
// Create an interpreter instance using an existing MicroAllocator instance.
// This constructor should be used when creating an allocator that needs to
// have allocation handled in more than one interpreter or for recording
// allocations inside the interpreter. The lifetime of the allocator must be
// as long as that of the interpreter object.
MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
MicroAllocator* allocator, ErrorReporter* error_reporter,
tflite::Profiler* profiler = nullptr);
~MicroInterpreter();
@ -147,8 +168,16 @@ class MicroInterpreter {
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
protected:
const MicroAllocator& allocator() const { return allocator_; }
const TfLiteContext& context() const { return context_; }
private:
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
// TODO(b/158263161): Consider switching to Create() function to enable better
// error reporting during initialization.
void Init(tflite::Profiler* profiler);
void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);
template <class T>
void CorrectTensorDataEndianness(T* data, int32_t size);
@ -156,16 +185,22 @@ class MicroInterpreter {
NodeAndRegistration* node_and_registrations_ = nullptr;
const Model* model_;
const OpResolver& op_resolver_;
const MicroOpResolver& op_resolver_;
ErrorReporter* error_reporter_;
TfLiteContext context_ = {};
MicroAllocator allocator_;
MicroAllocator& allocator_;
bool tensors_allocated_;
TfLiteStatus initialization_status_;
const SubGraph* subgraph_;
TfLiteEvalTensor* eval_tensors_;
internal::ContextHelper context_helper_;
// TODO(b/160894903): Clean these pointers up when all APIs are updated to new
// TfLiteEvalTensor buffers.
TfLiteTensor* input_tensor_;
TfLiteTensor* output_tensor_;
};
} // namespace tflite

View File

@ -1,4 +1,4 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -16,110 +16,441 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_MICRO_MUTABLE_OP_RESOLVER_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include <cstdio>
#include <cstring>
#ifndef TFLITE_REGISTRATIONS_MAX
#define TFLITE_REGISTRATIONS_MAX (128)
#endif
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/micro_op_resolver.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Op versions discussed in this file are enumerated here:
// tensorflow/lite/tools/versioning/op_version.cc
inline int MicroOpResolverAnyVersion() { return 0; }
template <unsigned int tOpCount = TFLITE_REGISTRATIONS_MAX>
class MicroOpResolver : public OpResolver {
template <unsigned int tOpCount>
class MicroMutableOpResolver : public MicroOpResolver {
public:
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
int version) const override {
explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
: error_reporter_(error_reporter) {}
const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
if (op == BuiltinOperator_CUSTOM) return nullptr;
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == op) &&
(registration.version == MicroOpResolverAnyVersion() ||
version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
if (registration.builtin_code == op) {
return &registration;
}
}
return nullptr;
}
const TfLiteRegistration* FindOp(const char* op, int version) const override {
const TfLiteRegistration* FindOp(const char* op) const override {
for (unsigned int i = 0; i < registrations_len_; ++i) {
const TfLiteRegistration& registration = registrations_[i];
if ((registration.builtin_code == BuiltinOperator_CUSTOM) &&
(strcmp(registration.custom_name, op) == 0) &&
(registration.version == MicroOpResolverAnyVersion() ||
version == MicroOpResolverAnyVersion() ||
registration.version == version)) {
(strcmp(registration.custom_name, op) == 0)) {
return &registration;
}
}
return nullptr;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int version = 1) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
MicroOpResolver::BuiltinParseFunction GetOpDataParser(
BuiltinOperator op) const override {
TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
if (builtin_codes_[i] == op) return builtin_parsers_[i];
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = op;
new_registration->version = version;
return nullptr;
}
void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddBuiltin(op, registration, version);
}
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int version = 1) {
// Registers a Custom Operator with the MicroOpResolver.
//
// Only the first call for a given name will be successful. i.e. if this
// function is called again for a previously added Custom Operator, the
// MicroOpResolver will be unchanged and this function will return
// kTfLiteError.
TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
if (registrations_len_ >= tOpCount) {
// TODO(b/147748244) - Add error reporting hooks so we can report this!
return;
if (error_reporter_) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Couldn't register custom op '%s', resolver size is too small (%d)",
name, tOpCount);
}
return kTfLiteError;
}
if (FindOp(name) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddCustom for the same op more than once "
"is not supported (Op: %s).",
name);
}
return kTfLiteError;
}
TfLiteRegistration* new_registration = &registrations_[registrations_len_];
registrations_len_ += 1;
*new_registration = *registration;
new_registration->builtin_code = BuiltinOperator_CUSTOM;
new_registration->custom_name = name;
new_registration->version = version;
return kTfLiteOk;
}
void AddCustom(const char* name, TfLiteRegistration* registration,
int min_version, int max_version) {
for (int version = min_version; version <= max_version; ++version) {
AddCustom(name, registration, version);
}
// The Add* functions below add the various Builtin operators to the
// MicroMutableOpResolver object.
TfLiteStatus AddAbs() {
return AddBuiltin(BuiltinOperator_ABS, tflite::ops::micro::Register_ABS(),
ParseAbs);
}
TfLiteStatus AddAdd() {
return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
ParseAdd);
}
TfLiteStatus AddArgMax() {
return AddBuiltin(BuiltinOperator_ARG_MAX,
tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
}
TfLiteStatus AddArgMin() {
return AddBuiltin(BuiltinOperator_ARG_MIN,
tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
}
TfLiteStatus AddAveragePool2D() {
return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
tflite::ops::micro::Register_AVERAGE_POOL_2D(),
ParsePool);
}
TfLiteStatus AddCeil() {
return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
ParseCeil);
}
TfLiteStatus AddCircularBuffer() {
return AddCustom("CIRCULAR_BUFFER",
tflite::ops::micro::Register_CIRCULAR_BUFFER());
}
TfLiteStatus AddConcatenation() {
return AddBuiltin(BuiltinOperator_CONCATENATION,
tflite::ops::micro::Register_CONCATENATION(),
ParseConcatenation);
}
TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D,
tflite::ops::micro::Register_CONV_2D(), ParseConv2D);
}
TfLiteStatus AddCos() {
return AddBuiltin(BuiltinOperator_COS, tflite::ops::micro::Register_COS(),
ParseCos);
}
TfLiteStatus AddDepthwiseConv2D() {
return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
ParseDepthwiseConv2D);
}
TfLiteStatus AddDequantize() {
return AddBuiltin(BuiltinOperator_DEQUANTIZE,
tflite::ops::micro::Register_DEQUANTIZE(),
ParseDequantize);
}
TfLiteStatus AddEqual() {
return AddBuiltin(BuiltinOperator_EQUAL,
tflite::ops::micro::Register_EQUAL(), ParseEqual);
}
TfLiteStatus AddFloor() {
return AddBuiltin(BuiltinOperator_FLOOR,
tflite::ops::micro::Register_FLOOR(), ParseFloor);
}
TfLiteStatus AddFullyConnected() {
return AddBuiltin(BuiltinOperator_FULLY_CONNECTED,
tflite::ops::micro::Register_FULLY_CONNECTED(),
ParseFullyConnected);
}
TfLiteStatus AddGreater() {
return AddBuiltin(BuiltinOperator_GREATER,
tflite::ops::micro::Register_GREATER(), ParseGreater);
}
TfLiteStatus AddGreaterEqual() {
return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
tflite::ops::micro::Register_GREATER_EQUAL(),
ParseGreaterEqual);
}
TfLiteStatus AddHardSwish() {
return AddBuiltin(BuiltinOperator_HARD_SWISH,
tflite::ops::micro::Register_HARD_SWISH(),
ParseHardSwish);
}
TfLiteStatus AddL2Normalization() {
return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
tflite::ops::micro::Register_L2_NORMALIZATION(),
ParseL2Normalization);
}
TfLiteStatus AddLess() {
return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
ParseLess);
}
TfLiteStatus AddLessEqual() {
return AddBuiltin(BuiltinOperator_LESS_EQUAL,
tflite::ops::micro::Register_LESS_EQUAL(),
ParseLessEqual);
}
TfLiteStatus AddLog() {
return AddBuiltin(BuiltinOperator_LOG, tflite::ops::micro::Register_LOG(),
ParseLog);
}
TfLiteStatus AddLogicalAnd() {
return AddBuiltin(BuiltinOperator_LOGICAL_AND,
tflite::ops::micro::Register_LOGICAL_AND(),
ParseLogicalAnd);
}
TfLiteStatus AddLogicalNot() {
return AddBuiltin(BuiltinOperator_LOGICAL_NOT,
tflite::ops::micro::Register_LOGICAL_NOT(),
ParseLogicalNot);
}
TfLiteStatus AddLogicalOr() {
return AddBuiltin(BuiltinOperator_LOGICAL_OR,
tflite::ops::micro::Register_LOGICAL_OR(),
ParseLogicalOr);
}
TfLiteStatus AddLogistic() {
return AddBuiltin(BuiltinOperator_LOGISTIC,
tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
}
TfLiteStatus AddMaximum() {
return AddBuiltin(BuiltinOperator_MAXIMUM,
tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
}
TfLiteStatus AddMaxPool2D() {
return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
}
TfLiteStatus AddMean() {
return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
ParseReducer);
}
TfLiteStatus AddMinimum() {
return AddBuiltin(BuiltinOperator_MINIMUM,
tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
}
TfLiteStatus AddMul() {
return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
ParseMul);
}
TfLiteStatus AddNeg() {
return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
ParseNeg);
}
TfLiteStatus AddNotEqual() {
return AddBuiltin(BuiltinOperator_NOT_EQUAL,
tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
}
TfLiteStatus AddPack() {
return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
ParsePack);
}
TfLiteStatus AddPad() {
return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
ParsePad);
}
TfLiteStatus AddPadV2() {
return AddBuiltin(BuiltinOperator_PADV2,
tflite::ops::micro::Register_PADV2(), ParsePadV2);
}
TfLiteStatus AddPrelu() {
return AddBuiltin(BuiltinOperator_PRELU,
tflite::ops::micro::Register_PRELU(), ParsePrelu);
}
TfLiteStatus AddQuantize() {
return AddBuiltin(BuiltinOperator_QUANTIZE,
tflite::ops::micro::Register_QUANTIZE(), ParseQuantize);
}
TfLiteStatus AddRelu() {
return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
ParseRelu);
}
TfLiteStatus AddRelu6() {
return AddBuiltin(BuiltinOperator_RELU6,
tflite::ops::micro::Register_RELU6(), ParseRelu6);
}
TfLiteStatus AddReshape() {
return AddBuiltin(BuiltinOperator_RESHAPE,
tflite::ops::micro::Register_RESHAPE(), ParseReshape);
}
TfLiteStatus AddResizeNearestNeighbor() {
return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
ParseResizeNearestNeighbor);
}
TfLiteStatus AddRound() {
return AddBuiltin(BuiltinOperator_ROUND,
tflite::ops::micro::Register_ROUND(), ParseRound);
}
TfLiteStatus AddRsqrt() {
return AddBuiltin(BuiltinOperator_RSQRT,
tflite::ops::micro::Register_RSQRT(), ParseRsqrt);
}
TfLiteStatus AddSin() {
return AddBuiltin(BuiltinOperator_SIN, tflite::ops::micro::Register_SIN(),
ParseSin);
}
TfLiteStatus AddSoftmax() {
return AddBuiltin(BuiltinOperator_SOFTMAX,
tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax);
}
TfLiteStatus AddSplit() {
return AddBuiltin(BuiltinOperator_SPLIT,
tflite::ops::micro::Register_SPLIT(), ParseSplit);
}
TfLiteStatus AddSqrt() {
return AddBuiltin(BuiltinOperator_SQRT, tflite::ops::micro::Register_SQRT(),
ParseSqrt);
}
TfLiteStatus AddSquare() {
return AddBuiltin(BuiltinOperator_SQUARE,
tflite::ops::micro::Register_SQUARE(), ParseSquare);
}
TfLiteStatus AddStridedSlice() {
return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
tflite::ops::micro::Register_STRIDED_SLICE(),
ParseStridedSlice);
}
TfLiteStatus AddSub() {
return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
ParseSub);
}
TfLiteStatus AddSvdf() {
return AddBuiltin(BuiltinOperator_SVDF, tflite::ops::micro::Register_SVDF(),
ParseSvdf);
}
TfLiteStatus AddTanh() {
return AddBuiltin(BuiltinOperator_TANH, tflite::ops::micro::Register_TANH(),
ParseTanh);
}
TfLiteStatus AddUnpack() {
return AddBuiltin(BuiltinOperator_UNPACK,
tflite::ops::micro::Register_UNPACK(), ParseUnpack);
}
unsigned int GetRegistrationLength() { return registrations_len_; }
private:
TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
const TfLiteRegistration& registration,
MicroOpResolver::BuiltinParseFunction parser) {
if (op == BuiltinOperator_CUSTOM) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Invalid parameter BuiltinOperator_CUSTOM to the "
"AddBuiltin function.");
}
return kTfLiteError;
}
if (FindOp(op) != nullptr) {
if (error_reporter_ != nullptr) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Calling AddBuiltin with the same op more than "
"once is not supported (Op: #%d).",
op);
}
return kTfLiteError;
}
if (registrations_len_ >= tOpCount) {
if (error_reporter_) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Couldn't register builtin op #%d, resolver size "
"is too small (%d).",
op, tOpCount);
}
return kTfLiteError;
}
registrations_[registrations_len_] = registration;
// Strictly speaking, the builtin_code is not necessary for TFLM but filling
// it in regardless.
registrations_[registrations_len_].builtin_code = op;
registrations_len_++;
builtin_codes_[num_buitin_ops_] = op;
builtin_parsers_[num_buitin_ops_] = parser;
num_buitin_ops_++;
return kTfLiteOk;
}
TfLiteRegistration registrations_[tOpCount];
unsigned int registrations_len_ = 0;
public:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
// TODO(b/147854028): Consider switching all uses of MicroMutableOpResolver to
// MicroOpResolver.
class MicroMutableOpResolver
: public MicroOpResolver<TFLITE_REGISTRATIONS_MAX> {
private:
// Arrays (and counter) to store the builtin codes and their corresponding
// parse functions as these are registered with the Op Resolver.
BuiltinOperator builtin_codes_[tOpCount];
MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
unsigned int num_buitin_ops_ = 0;
ErrorReporter* error_reporter_;
public:
TF_LITE_REMOVE_VIRTUAL_DELETE
};

View File

@ -20,8 +20,18 @@ limitations under the License.
#endif
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <vector>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
@ -75,6 +85,8 @@ const char* TensorTypeName(TfLiteType type) {
return "kTfLiteInt16";
case kTfLiteComplex64:
return "kTfLiteComplex64";
case kTfLiteComplex128:
return "kTfLiteComplex128";
case kTfLiteFloat16:
return "kTfLiteFloat16";
case kTfLiteFloat64:
@ -95,11 +107,44 @@ const char* AllocTypeName(TfLiteAllocationType type) {
return "kTfLiteArenaRw";
case kTfLiteArenaRwPersistent:
return "kTfLiteArenaRwPersistent";
case kTfLitePersistentRo:
return "kTfLitePersistentRo";
case kTfLiteCustom:
return "kTfLiteCustom";
}
return "(invalid)";
}
} // namespace
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
auto* subgraphs = model->subgraphs();
const SubGraph* subgraph = (*subgraphs)[0];
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
subgraph->tensors();
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
for (size_t i = 0; i < tensors->size(); ++i) {
const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
size_t type_size, tensor_size;
auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
auto* array = buffer->data();
int array_size = 0;
if (array) {
array_size = array->size();
}
BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
error_reporter);
TF_LITE_REPORT_ERROR(
error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
!array_size && !flatbuffer_tensor.is_variable(), tensor_size);
}
#endif
}
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter) {
printf("Interpreter has %zu tensors and %zu nodes\n",
@ -113,10 +158,9 @@ void PrintInterpreterState(MicroInterpreter* interpreter) {
for (size_t tensor_index = 0; tensor_index < interpreter->tensors_size();
tensor_index++) {
TfLiteTensor* tensor = interpreter->tensor(static_cast<int>(tensor_index));
printf("Tensor %3zu %-20s %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
tensor->name, TensorTypeName(tensor->type),
AllocTypeName(tensor->allocation_type), tensor->bytes,
static_cast<double>(tensor->bytes / (1 << 20)));
printf("Tensor %3zu %10s %15s %10zu bytes (%4.1f MB) ", tensor_index,
TensorTypeName(tensor->type), AllocTypeName(tensor->allocation_type),
tensor->bytes, static_cast<double>(tensor->bytes / (1 << 20)));
PrintTfLiteIntVector(tensor->dims);
}
printf("\n");

View File

@ -20,6 +20,9 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_interpreter.h"
namespace tflite {
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite

View File

@ -23,6 +23,7 @@ limitations under the License.
#include <cstdarg>
#include <cstdint>
#include <cstring>
namespace {
@ -125,7 +126,8 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
const int32_t exponent_shift = 23;
const int32_t exponent_bias = 127;
const uint32_t fraction_mask = 0x007fffff;
const uint32_t u = *reinterpret_cast<uint32_t*>(&f);
uint32_t u;
memcpy(&u, &f, sizeof(int32_t));
const int32_t exponent =
((u & exponent_mask) >> exponent_shift) - exponent_bias;
const uint32_t fraction = (u & fraction_mask);
@ -163,7 +165,47 @@ char* FastFloatToBufferLeft(float f, char* buffer) {
*current = '.';
current += 1;
*current = 0;
// Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate
// zeros off the end of the fraction. Every fractional value takes 7 bytes.
// For example, 2500 would be written into the buffer as 0002500 since it
// represents .00025.
constexpr int kMaxFractionalDigits = 7;
// Abort early if there is not enough space in the buffer.
if (current_end - current <= kMaxFractionalDigits) {
return current;
}
// Pre-fill buffer with zeros to ensure zero-truncation works properly.
for (int i = 1; i < kMaxFractionalDigits; i++) {
*(current + i) = '0';
}
// Track how large the fraction is to add leading zeros.
char* previous = current;
current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10);
int fraction_digits = current - previous;
int leading_zeros = kMaxFractionalDigits - fraction_digits;
// Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion
// works properly.
*current = '0';
// Shift fraction values and prepent zeros.
for (int i = 0; i < fraction_digits; i++) {
current--;
*(current + leading_zeros) = *current;
*current = '0';
}
current += kMaxFractionalDigits;
// Truncate trailing zeros for cleaner logs. Ensure we leave at least one
// fractional character for the case when scaled_fraction is 0.
while (*(current - 1) == '0' && (current - 1) > previous) {
current--;
}
*current = 0;
current = StrCatStr(current, (current_end - current), "*2^");
current = StrCatInt32(current, (current_end - current), exponent);
return current;

Some files were not shown because too many files have changed in this diff Show More