diff --git a/tensorflow/lite/core/c/common.cc b/tensorflow/lite/core/c/common.cc index 84883d2fd19..4f404c93a18 100644 --- a/tensorflow/lite/core/c/common.cc +++ b/tensorflow/lite/core/c/common.cc @@ -649,4 +649,8 @@ TfLiteRunStep TfLiteTensorGetShapeKnownStep(const TfLiteTensor* t) { return kTfLiteRunStepUnknown; } +// Returns a sentinel value to be used as the user_data field of a TfLiteNode +// when the kernel initialization fails. +void* TfLiteKernelInitFailed() { return reinterpret_cast(-1); } + } // extern "C" diff --git a/tensorflow/lite/core/c/common.h b/tensorflow/lite/core/c/common.h index c3e00cc0972..765c2bc12f2 100644 --- a/tensorflow/lite/core/c/common.h +++ b/tensorflow/lite/core/c/common.h @@ -1161,6 +1161,11 @@ typedef struct TfLiteRegistration { /// NOTE: if the data is already in the desired format, simply implement this /// function to return `nullptr` and implement the free function to be a /// no-op. + /// + /// NOTE: For a Delegate kernel, returns `TfLiteKernelInitFailed()` if it + /// fails on the initialization. This eventually causes user's API call to + /// InterpreterBuilder::operator() or Interpreter::ModifyGraphWithDelegate() + /// to return an error. void* (*init)(TfLiteContext* context, const char* buffer, size_t length); /// The pointer `buffer` is the data previously returned by an init @@ -1499,6 +1504,10 @@ TfLiteRunStep TfLiteTensorGetDataKnownStep(const TfLiteTensor* t); /// operations. TfLiteRunStep TfLiteTensorGetShapeKnownStep(const TfLiteTensor* t); +/// Returns a sentinel value to be used as the user_data field of a TfLiteNode +/// when the kernel initialization fails. +void* TfLiteKernelInitFailed(); + /** @} */ // Ends `\addtogroup`, it's important for the doc generator that this doesn't // include the CC code below. diff --git a/tensorflow/lite/kernels/internal/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/portable_tensor_utils.h index fc45d1f9822..a361a2d0e5d 100644 --- a/tensorflow/lite/kernels/internal/portable_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/portable_tensor_utils.h @@ -170,6 +170,7 @@ inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch, tensor_utils::SymmetricQuantizeFloats( float_data_ptr + offset, n_data, quantized_data_ptr + offset, &unused_min, &unused_max, &scaling_factors[b]); + if (zero_points) zero_points[b] = 0; } } } diff --git a/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/tensorflow/lite/kernels/internal/reference/batch_matmul.h index d83696219c2..54908bd24ee 100644 --- a/tensorflow/lite/kernels/internal/reference/batch_matmul.h +++ b/tensorflow/lite/kernels/internal/reference/batch_matmul.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/common.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/runtime_shape.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { diff --git a/tensorflow/lite/kernels/internal/reference/div.h b/tensorflow/lite/kernels/internal/reference/div.h index e70fb09767e..5f26d3b8e6d 100644 --- a/tensorflow/lite/kernels/internal/reference/div.h +++ b/tensorflow/lite/kernels/internal/reference/div.h @@ -99,6 +99,18 @@ inline void Div(const ArithmeticParams& params, DivElementwise(flat_size, params, input1_data, input2_data, output_data); } +inline void Div(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, + const RuntimeShape& output_shape, int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + DivElementwise(flat_size, params, input1_data, input2_data, output_data); +} + template inline void BroadcastDivSlowQuantized( const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape, @@ -177,6 +189,19 @@ inline void BroadcastDivSlow(const ArithmeticParams& params, input2_data, unextended_output_shape, output_data); } +template +inline void BroadcastDivSlow(const ArithmeticParams& params, + const RuntimeShape& unextended_input1_shape, + const int16_t* input1_data, + const RuntimeShape& unextended_input2_shape, + const int16_t* input2_data, + const RuntimeShape& unextended_output_shape, + int16_t* output_data) { + BroadcastDivSlowQuantized( + params, unextended_input1_shape, input1_data, unextended_input2_shape, + input2_data, unextended_output_shape, output_data); +} + // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then diff --git a/tensorflow/lite/kernels/internal/reference/prelu.h b/tensorflow/lite/kernels/internal/reference/prelu.h index aa9901d605f..1a5ef0cb1f4 100644 --- a/tensorflow/lite/kernels/internal/reference/prelu.h +++ b/tensorflow/lite/kernels/internal/reference/prelu.h @@ -26,10 +26,10 @@ namespace tflite { namespace reference_ops { // Broadcast prelu to output_shape for quantized uint8_t/int8_t data. -template +template inline void BroadcastPrelu4DSlow( const PreluParams& params, const RuntimeShape& input_shape, - const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data, + const T* input_data, const RuntimeShape& alpha_shape, const U* alpha_data, const RuntimeShape& output_shape, T* output_data) { TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4); @@ -74,10 +74,10 @@ inline void BroadcastPrelu4DSlow( } } -template +template inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape, const T* input_data, const RuntimeShape& alpha_shape, - const T* alpha_data, const RuntimeShape& output_shape, + const U* alpha_data, const RuntimeShape& output_shape, T* output_data) { const int32_t quantized_min = std::numeric_limits::min(); const int32_t quantized_max = std::numeric_limits::max();