Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dsp and mve support to transpose conv int8 #103

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s4.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_x.c"/>
Expand Down
42 changes: 40 additions & 2 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 19 January 2024
* $Revision: V.18.0.0
* $Date: 31 January 2024
* $Revision: V.18.1.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -920,6 +920,44 @@ __STATIC_FORCEINLINE const int8_t *read_and_pad(const int8_t *source, int32_t *o
return source;
}

/**
* @brief read and expand one s8 word into two s16 words with ordering and addition.
*/
__STATIC_FORCEINLINE void read_pad_and_add_s8(const int8_t *source, int32_t *out1, int32_t *out2, const uint32_t add)
{
int32_t inA = arm_nn_read_s8x4(source);
int32_t inAbuf1 = SXTAB16_RORn(add, (uint32_t)inA, 8);
int32_t inAbuf2 = SXTAB16(add, inA);

#ifndef ARM_MATH_BIG_ENDIAN
*out2 = (int32_t)(PKHTB(inAbuf1, inAbuf2, 16));
*out1 = (int32_t)(PKHBT(inAbuf2, inAbuf1, 16));
#else
*out1 = (int32_t)(PKHTB(inAbuf1, inAbuf2, 16));
*out2 = (int32_t)(PKHBT(inAbuf2, inAbuf1, 16));
#endif
}

/**
* @brief read and expand two bytes into one word with ordering.
*/
__STATIC_FORCEINLINE void read_and_pad_s8x2(const int8_t *source, int32_t *out)
{
int16_t in = arm_nn_read_s8x2(source);
int32_t inA = (in & 0x00FF) | ((in & 0xFF00) << 8);
*out = SXTB16(inA);
}

/**
* @brief read and expand two bytes into one word with ordering and addition.
*/
__STATIC_FORCEINLINE void read_pad_and_add_s8x2(const int8_t *source, int32_t *out, const uint32_t add)
{
int16_t in = arm_nn_read_s8x2(source);
int32_t inA = (in & 0x00FF) | ((in & 0xFF00) << 8);
*out = SXTAB16(add, inA);
}

/**
* @brief read and expand one s8 word into two s16 words with no additional ordering.
*/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ Processors with Arm Helium Technology use the Arm M-profile Vector Extension(MVE
Examples are Cortex-M55 or Cortex-M85 configured with MVE.

| Operator | C <br> int8 | C<br>int16 | C<br>int4* | DSP<br>int8 | DSP<br>int16 | DSP<br>int4* | MVE<br>int8 | MVE<br>int16 |
| --------------- | ----------- | ---------- |------------| ------------| -------------|--------------| ------------| -------------|
| --------------- | ----------- | ---------- |------------|-------------| -------------|--------------|-------------| -------------|
| Conv2D | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
| DepthwiseConv2D | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
| TransposeConv2D | Yes | No | No | No | No | No | No | No |
| TransposeConv2D | Yes | No | No | Yes | No | No | Yes | No |
| Fully Connected | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
| Add | Yes | Yes | N/A | Yes | Yes | N/A | Yes | Yes |
| Mul | Yes | Yes | N/A | Yes | Yes | N/A | Yes | Yes |
Expand Down
34 changes: 26 additions & 8 deletions Source/ConvolutionFunctions/arm_transpose_conv_s8.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
* SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
*
* SPDX-License-Identifier: Apache-2.0
*
Expand All @@ -19,10 +19,10 @@
/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_transpose_conv_s8.c
* Description: s8 version of convolution using symmetric quantization.
* Description: s8 version of transpose convolution using symmetric quantization.
*
* $Date: 5 October 2023
* $Revision: V.1.0.0
* $Date: 31 January 2024
* $Revision: V.1.1.0
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -172,11 +172,30 @@ arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx,
}
}
}

img_data = img_buf_ptr;
for (int i = 0; i < output_x * output_y; i++)
{
for (int i_output_ch = 0; i_output_ch < output_ch; i_output_ch++)
#if defined(ARM_MATH_MVEI)
int output_ch_idx = 0;
int8_t *ip_out_data = output_data_ptr;
for (int32_t i_channel_rmdr = output_ch; i_channel_rmdr > 0; i_channel_rmdr -= 4)
{
mve_pred16_t p = vctp32q((uint32_t)i_channel_rmdr);
int32x4_t result = vldrwq_z_s32(&img_data[output_ch_idx], p);
result = arm_requantize_mve_32x4(result,
vldrwq_z_s32(&output_multiplier[output_ch_idx], p),
vldrwq_z_s32(&output_shift[output_ch_idx], p));
result = vaddq_n_s32(result, out_offset);
result = vmaxq_s32(result, vdupq_n_s32(activation_min));
result = vminq_s32(result, vdupq_n_s32(activation_max));
vstrbq_p_s32(ip_out_data, result, p);
ip_out_data += 4;
output_ch_idx += 4;
}
output_data_ptr += output_ch;
#else
int i_output_ch = 0;
for (; i_output_ch < output_ch; i_output_ch++)
{
int32_t result =
arm_nn_requantize(img_data[i_output_ch], output_multiplier[i_output_ch], output_shift[i_output_ch]);
Expand All @@ -185,13 +204,12 @@ arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx,
result = MIN(result, activation_max);
*output_data_ptr++ = (int8_t)result;
}
#endif
img_data += output_ch;
}

input_data_ptr += (input_size * input_ch);
batch_cnt--;
}

/* Return to application */
return ARM_CMSIS_NN_SUCCESS;
}
Expand Down
Loading
Loading