Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(compression): implement tensor decompression in op depthwise conv #3017

Merged
merged 2 commits into from
Dec 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion tensorflow/lite/micro/kernels/depthwise_conv.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -52,16 +52,37 @@ TfLiteStatus DepthwiseConvEval(TfLiteContext* context, TfLiteNode* node) {
? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
: nullptr;

#ifdef USE_TFLM_COMPRESSION

MicroContext* micro_context = GetMicroContext(context);

const CompressionTensorData* filter_comp_td =
micro_context->GetTensorCompressionData(node,
kDepthwiseConvWeightsTensor);
const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kDepthwiseConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::DepthwiseConv(
DepthwiseConvParamsFloat(params, data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(micro_context, filter,
filter_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
@@ -94,9 +115,18 @@ TfLiteStatus DepthwiseConvEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
filter_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
@@ -118,9 +148,18 @@ TfLiteStatus DepthwiseConvEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(micro_context, filter,
filter_comp_td,
data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int64_t>(
micro_context, bias, bias_comp_td, data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int64_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
23 changes: 21 additions & 2 deletions tensorflow/lite/micro/kernels/depthwise_conv_common.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -127,7 +127,9 @@ TfLiteStatus CalculateOpDataDepthwiseConv(

micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
if (has_bias) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);

return kTfLiteOk;
@@ -209,6 +211,23 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));

#ifdef USE_TFLM_COMPRESSION

// Compression scratch buffers.
// These will only be allocated if the tensor is compressed.
if (micro_context->IsTensorCompressed(node, kDepthwiseConvWeightsTensor) &&
filter->type == kTfLiteInt4) {
MicroPrintf("Compression not supported with INT4 tensors");
return kTfLiteError;
}
data->weights_scratch_index =
micro_context->AllocateDecompressionScratchBuffer(
node, kDepthwiseConvWeightsTensor);
data->bias_scratch_index = micro_context->AllocateDecompressionScratchBuffer(
node, kDepthwiseConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
388 changes: 375 additions & 13 deletions tensorflow/lite/micro/kernels/depthwise_conv_test.cc

Large diffs are not rendered by default.

34 changes: 33 additions & 1 deletion tensorflow/lite/micro/kernels/xtensa/depthwise_conv.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -93,6 +93,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
context, op_data.reference_op_data.filter_buffer_index, filter);

#ifdef USE_TFLM_COMPRESSION

MicroContext* micro_context = GetMicroContext(context);

const CompressionTensorData* filter_comp_td =
micro_context->GetTensorCompressionData(node,
kDepthwiseConvWeightsTensor);
const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kDepthwiseConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

switch (input->type) { // Already know in/out types are same.
case kTfLiteInt8: {
switch (filter_int8.type) {
@@ -111,9 +123,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(
micro_context, &filter_int8, filter_comp_td,
op_data.reference_op_data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(
micro_context, bias, bias_comp_td,
op_data.reference_op_data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(&filter_int8),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
@@ -136,9 +158,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(
micro_context, &filter_int8, filter_comp_td,
op_data.reference_op_data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int64_t>(
micro_context, bias, bias_comp_td,
op_data.reference_op_data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(&filter_int8),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int64_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
37 changes: 34 additions & 3 deletions tensorflow/lite/micro/kernels/xtensa/depthwise_conv_hifi.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -97,10 +97,22 @@ TfLiteStatus DepthwiseConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output) {
#ifdef USE_TFLM_COMPRESSION

MicroContext* micro_context = GetMicroContext(context);

const CompressionTensorData* filter_comp_td =
micro_context->GetTensorCompressionData(node,
kDepthwiseConvWeightsTensor);
const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kDepthwiseConvBiasTensor);

#endif // USE_TFLM_COMPRESSION

// If dilation is not required use the optimized NN Library kernel.
// Otherwise call the reference implementation.
if ((params.dilation_width_factor == 1) &&
(params.dilation_height_factor == 1)) {
(params.dilation_height_factor == 1) && bias != nullptr) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = data.reference_op_data.padding.width;
@@ -133,8 +145,17 @@ TfLiteStatus DepthwiseConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);

const int8_t* input_data = tflite::micro::GetTensorData<int8_t>(input);
#ifdef USE_TFLM_COMPRESSION
const int8_t* filter_data = tflite::micro::GetTensorData<int8_t>(
micro_context, filter, filter_comp_td,
data.reference_op_data.weights_scratch_index);
const int32_t* bias_data = tflite::micro::GetTensorData<int32_t>(
micro_context, bias, bias_comp_td,
data.reference_op_data.bias_scratch_index);
#else // USE_TFLM_COMPRESSION
const int8_t* filter_data = tflite::micro::GetTensorData<int8_t>(filter);
const int32_t* bias_data = tflite::micro::GetTensorData<int32_t>(bias);
#endif // USE_TFLM_COMPRESSION
int8_t* output_data = tflite::micro::GetTensorData<int8_t>(output);

int32_t input_data_format = 0;
@@ -178,9 +199,19 @@ TfLiteStatus DepthwiseConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
#ifdef USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(
micro_context, filter, filter_comp_td,
data.reference_op_data.weights_scratch_index),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(
micro_context, bias, bias_comp_td,
data.reference_op_data.bias_scratch_index),
#else // USE_TFLM_COMPRESSION
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
#endif // USE_TFLM_COMPRESSION
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));

73 changes: 68 additions & 5 deletions tensorflow/lite/micro/kernels/xtensa/depthwise_conv_vision.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -53,7 +53,7 @@ TfLiteStatus DepthwiseConvPrepareVision(TfLiteContext* context,
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvBiasTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TF_LITE_ENSURE(context, bias != nullptr);

// Dynamically allocate per-channel quantization parameters.
const int num_channels =
@@ -135,18 +135,81 @@ TfLiteStatus DepthwiseConvPrepareVision(TfLiteContext* context,
filter_int8 = *filter;
}

#ifdef USE_TFLM_COMPRESSION

uint8_t* filter_data = nullptr;
int32_t* bias_data = nullptr;

const CompressionTensorData* filter_comp_td =
micro_context->GetTensorCompressionData(node,
kDepthwiseConvWeightsTensor);
if (filter_comp_td != nullptr) {
const size_t filter_data_size =
NumElements(&filter_int8) * TfLiteTypeGetSize(kTfLiteInt8);
filter_data =
micro_context->AllocateTempBuffer(filter_data_size, sizeof(int8_t));
if (filter_data == nullptr) {
return kTfLiteError;
}
const TfLiteEvalTensor* filter_eval =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
filter_data = static_cast<uint8_t*>(micro_context->DecompressTensorToBuffer(
*filter_eval, *filter_comp_td, filter_data));
} else {
filter_data = GetTensorData<uint8_t>(&filter_int8);
}

const CompressionTensorData* bias_comp_td =
micro_context->GetTensorCompressionData(node, kDepthwiseConvBiasTensor);
if (bias_comp_td != nullptr) {
const size_t bias_data_size =
NumElements(bias) * TfLiteTypeGetSize(kTfLiteInt32);
bias_data = reinterpret_cast<int32_t*>(
micro_context->AllocateTempBuffer(bias_data_size, sizeof(int32_t)));
if (bias_data == nullptr) {
return kTfLiteError;
}
const TfLiteEvalTensor* bias_eval =
tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor);
bias_data = static_cast<int32_t*>(micro_context->DecompressTensorToBuffer(
*bias_eval, *bias_comp_td, bias_data));
} else {
bias_data = GetTensorData<int32_t>(bias);
}

if (filter_data == nullptr || bias_data == nullptr) {
return kTfLiteError;
}

#else // USE_TFLM_COMPRESSION

uint8_t* filter_data = GetTensorData<uint8_t>(&filter_int8);
int32_t* bias_data = GetTensorData<int32_t>(bias);

#endif // USE_TFLM_COMPRESSION

status = xiDepthwiseConvDoCoeffReorder(
data->p_context, data->context_size,
reinterpret_cast<uint8_t*>(data->reorder_coefficient_bias),
data->reorder_coefficient_bias_size,
const_cast<uint8_t*>(GetTensorData<uint8_t>(&filter_int8)),
const_cast<int32_t*>(GetTensorData<int32_t>(bias)));
data->reorder_coefficient_bias_size, filter_data, bias_data);
if (status) {
return kTfLiteError;
}
if (filter->type == kTfLiteInt4) {
micro_context->DeallocateTempBuffer(GetTensorData<uint8_t>(&filter_int8));
}

#ifdef USE_TFLM_COMPRESSION

if (filter_comp_td) {
micro_context->DeallocateTempBuffer(filter_data);
}
if (bias_comp_td) {
micro_context->DeallocateTempBuffer(reinterpret_cast<uint8_t*>(bias_data));
}

#endif // USE_TFLM_COMPRESSION

micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
13 changes: 10 additions & 3 deletions tensorflow/lite/micro/micro_utils.h
Original file line number Diff line number Diff line change
@@ -90,12 +90,19 @@ void SymmetricQuantize(const float* input, T* output, int num_elements,
template <typename T>
void SymmetricPerChannelQuantize(const float* input, T* output,
int num_elements, int num_channels,
float* scales) {
float* scales,
size_t quantized_dimension = 0) {
int elements_per_channel = num_elements / num_channels;
for (int i = 0; i < num_channels; i++) {
for (int j = 0; j < elements_per_channel; j++) {
output[i * elements_per_channel + j] = FloatToSymmetricQuantizedType<T>(
input[i * elements_per_channel + j], scales[i]);
size_t offset;
if (quantized_dimension == 0) {
offset = i * elements_per_channel + j;
} else {
offset = i + elements_per_channel * j;
}
output[offset] =
FloatToSymmetricQuantizedType<T>(input[offset], scales[i]);
}
}
}