Skip to content

Commit

Permalink
Add RVV qs8/qu8-f32-vcvt kernels and configs.
Browse files Browse the repository at this point in the history
  • Loading branch information
oliIMG committed Sep 19, 2024
1 parent 619b3fc commit 37c2f64
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 6 deletions.
4 changes: 4 additions & 0 deletions cmake/gen/rvv_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ SET(PROD_RVV_MICROKERNEL_SRCS
src/f32-vrnd/gen/f32-vrndu-rvv-u4v.c
src/f32-vrnd/gen/f32-vrndz-rvv-u4v.c
src/f32-vrsqrt/gen/f32-vrsqrt-rvv-rsqrt-u4v.c
src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u2v.c
src/qs8-vmul/gen/qs8-vmul-minmax-f32-rvv-u2v.c
src/qs8-vmulc/gen/qs8-vmulc-minmax-f32-rvv-u2v.c
src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u2v.c
src/qu8-vmul/gen/qu8-vmul-minmax-f32-rvv-u2v.c
src/qu8-vmulc/gen/qu8-vmulc-minmax-f32-rvv-u2v.c
src/x32-packw/gen/x32-packw-x4v-gemm-goi-rvv-u8.c
Expand Down Expand Up @@ -180,8 +182,10 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-6x4v-minmax-rvv.c
src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-7x4v-minmax-rvv.c
src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-8x4v-minmax-rvv.c
src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u1v.c
src/qs8-vmul/gen/qs8-vmul-minmax-f32-rvv-u1v.c
src/qs8-vmulc/gen/qs8-vmulc-minmax-f32-rvv-u1v.c
src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u1v.c
src/qu8-vmul/gen/qu8-vmul-minmax-f32-rvv-u1v.c
src/qu8-vmulc/gen/qu8-vmulc-minmax-f32-rvv-u1v.c
src/x32-packw/gen/x32-packw-x1v-gemm-goi-rvv-u2.c
Expand Down
4 changes: 4 additions & 0 deletions gen/rvv_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-vrnd/gen/f32-vrndu-rvv-u4v.c",
"src/f32-vrnd/gen/f32-vrndz-rvv-u4v.c",
"src/f32-vrsqrt/gen/f32-vrsqrt-rvv-rsqrt-u4v.c",
"src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u2v.c",
"src/qs8-vmul/gen/qs8-vmul-minmax-f32-rvv-u2v.c",
"src/qs8-vmulc/gen/qs8-vmulc-minmax-f32-rvv-u2v.c",
"src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u2v.c",
"src/qu8-vmul/gen/qu8-vmul-minmax-f32-rvv-u2v.c",
"src/qu8-vmulc/gen/qu8-vmulc-minmax-f32-rvv-u2v.c",
"src/x32-packw/gen/x32-packw-x4v-gemm-goi-rvv-u8.c",
Expand Down Expand Up @@ -177,8 +179,10 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
"src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-6x4v-minmax-rvv.c",
"src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-7x4v-minmax-rvv.c",
"src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-8x4v-minmax-rvv.c",
"src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u1v.c",
"src/qs8-vmul/gen/qs8-vmul-minmax-f32-rvv-u1v.c",
"src/qs8-vmulc/gen/qs8-vmulc-minmax-f32-rvv-u1v.c",
"src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u1v.c",
"src/qu8-vmul/gen/qu8-vmul-minmax-f32-rvv-u1v.c",
"src/qu8-vmulc/gen/qu8-vmulc-minmax-f32-rvv-u1v.c",
"src/x32-packw/gen/x32-packw-x1v-gemm-goi-rvv-u2.c",
Expand Down
7 changes: 7 additions & 0 deletions scripts/generate-qs8-f32-vcvt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ tools/xngen src/qs8-f32-vcvt/neon.c.in -D BATCH_TILE=16 -D DATATYPE=QU8 -o src/q
tools/xngen src/qs8-f32-vcvt/neon.c.in -D BATCH_TILE=24 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/qu8-f32-vcvt-neon-u24.c &
tools/xngen src/qs8-f32-vcvt/neon.c.in -D BATCH_TILE=32 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/qu8-f32-vcvt-neon-u32.c &

################################ RISC-V Vector ################################
tools/xngen src/qs8-f32-vcvt/rvv.c.in -D LMUL=1 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u1v.c &
tools/xngen src/qs8-f32-vcvt/rvv.c.in -D LMUL=2 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u2v.c &

tools/xngen src/qs8-f32-vcvt/rvv.c.in -D LMUL=1 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u1v.c &
tools/xngen src/qs8-f32-vcvt/rvv.c.in -D LMUL=2 -D DATATYPE=QU8 -o src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u2v.c &

################################# x86 128-bit #################################
tools/xngen src/qs8-f32-vcvt/sse2.c.in -D BATCH_TILE=8 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/qs8-f32-vcvt-sse2-u8.c &
tools/xngen src/qs8-f32-vcvt/sse2.c.in -D BATCH_TILE=16 -D DATATYPE=QS8 -o src/qs8-f32-vcvt/gen/qs8-f32-vcvt-sse2-u16.c &
Expand Down
14 changes: 8 additions & 6 deletions src/configs/unary-elementwise-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -2066,10 +2066,11 @@ static void init_qs8_to_f32_cvt_config(void) {
qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_u1;
qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params;
qs8_to_f32_cvt_config.element_tile = 1;
#elif XNN_ARCH_RISCV
qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_u4;
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__rvv_u2v;
qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params;
qs8_to_f32_cvt_config.element_tile = 4;
qs8_to_f32_cvt_config.element_tile = hardware_config->vlenb / sizeof(int8_t) * 2; // (VLENB/sizeof)*LMUL;
#else
qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_u4;
qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params;
Expand Down Expand Up @@ -2288,10 +2289,11 @@ static void init_qu8_to_f32_cvt_config(void) {
qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_u1;
qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params;
qu8_to_f32_cvt_config.element_tile = 1;
#elif XNN_ARCH_RISCV
qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_u4;
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__rvv_u2v;
qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params;
qu8_to_f32_cvt_config.element_tile = 4;
qu8_to_f32_cvt_config.element_tile = hardware_config->vlenb / sizeof(uint8_t) * 2; // (VLENB/sizeof)*LMUL;
#else
qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_u4;
qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params;
Expand Down
47 changes: 47 additions & 0 deletions src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u1v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Auto-generated file. Do not edit!
// Template: src/qs8-f32-vcvt/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/common.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/vcvt.h"


void xnn_qs8_f32_vcvt_ukernel__rvv_u1v(
size_t batch,
const int8_t* input,
float* output,
const struct xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(batch != 0);
assert(batch % sizeof(int8_t) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_INT8_T;

const float scale = params->scalar.scale;
const int32_t minus_zero_point = -params->scalar.zero_point;

for (; batch > 0; ) {
const int32_t n = __riscv_vsetvl_e8m1(batch); batch -= n;

vint8m1_t x_i8v = __riscv_vle8_v_i8m1(input, n); input += n;

vint32m4_t wx_i32v = __riscv_vsext_vf4_i32m4(x_i8v, n);
wx_i32v = __riscv_vadd_vx_i32m4(wx_i32v, minus_zero_point, n);
vfloat32m4_t y_f32v = __riscv_vfcvt_f_x_v_f32m4(wx_i32v, n);
y_f32v = __riscv_vfmul_vf_f32m4(y_f32v, scale, n);

__riscv_vse32_v_f32m4(output, y_f32v, n); output += n;
}
}
47 changes: 47 additions & 0 deletions src/qs8-f32-vcvt/gen/qs8-f32-vcvt-rvv-u2v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Auto-generated file. Do not edit!
// Template: src/qs8-f32-vcvt/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/common.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/vcvt.h"


void xnn_qs8_f32_vcvt_ukernel__rvv_u2v(
size_t batch,
const int8_t* input,
float* output,
const struct xnn_qs8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(batch != 0);
assert(batch % sizeof(int8_t) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_INT8_T;

const float scale = params->scalar.scale;
const int32_t minus_zero_point = -params->scalar.zero_point;

for (; batch > 0; ) {
const int32_t n = __riscv_vsetvl_e8m2(batch); batch -= n;

vint8m2_t x_i8v = __riscv_vle8_v_i8m2(input, n); input += n;

vint32m8_t wx_i32v = __riscv_vsext_vf4_i32m8(x_i8v, n);
wx_i32v = __riscv_vadd_vx_i32m8(wx_i32v, minus_zero_point, n);
vfloat32m8_t y_f32v = __riscv_vfcvt_f_x_v_f32m8(wx_i32v, n);
y_f32v = __riscv_vfmul_vf_f32m8(y_f32v, scale, n);

__riscv_vse32_v_f32m8(output, y_f32v, n); output += n;
}
}
5 changes: 5 additions & 0 deletions src/qs8-f32-vcvt/qs8-f32-vcvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__wasmsimd_u24, 24, false
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__wasmsimd_u32, 32, false, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__rvv_u1v, 1, true, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__rvv_u2v, 2, true, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
#endif // XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR

XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__scalar_u1, 1, false, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__scalar_u2, 2, false, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qs8_f32_vcvt_ukernel__scalar_u3, 3, false, int8_t, float, struct xnn_qs8_f32_cvt_params, xnn_init_qs8_f32_cvt_scalar_params)
Expand Down
52 changes: 52 additions & 0 deletions src/qs8-f32-vcvt/rvv.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2024 Imagination Technologies, inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert LMUL in [1, 2]
$VXINT = {"QS8": "vint", "QU8": "vuint"}[DATATYPE]
$XINT8_T = {"QS8": "int8_t", "QU8": "uint8_t"}[DATATYPE]
$XLOAD = {"QS8": "__riscv_vle8_v_i8", "QU8": "__riscv_vle8_v_u8"}[DATATYPE]
#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/common.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/vcvt.h"


void xnn_${DATATYPE.lower()}_f32_vcvt_ukernel__rvv_u${LMUL}v(
size_t batch,
const ${XINT8_T}* input,
float* output,
const struct xnn_${DATATYPE.lower()}_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(batch != 0);
assert(batch % sizeof(int8_t) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_INT8_T;

const float scale = params->scalar.scale;
const int32_t minus_zero_point = -params->scalar.zero_point;

for (; batch > 0; ) {
const int32_t n = __riscv_vsetvl_e8m${LMUL}(batch); batch -= n;

$if DATATYPE == "QS8":
vint8m${LMUL}_t x_i8v = __riscv_vle8_v_i8m${LMUL}(input, n); input += n;

vint32m${LMUL*4}_t wx_i32v = __riscv_vsext_vf4_i32m${LMUL*4}(x_i8v, n);
$else:
vuint8m${LMUL}_t x_u8v = __riscv_vle8_v_u8m${LMUL}(input, n); input += n;

vint32m${LMUL*4}_t wx_i32v = __riscv_vreinterpret_v_u32m${LMUL*4}_i32m${LMUL*4}(__riscv_vzext_vf4_u32m${LMUL*4}(x_u8v, n));
wx_i32v = __riscv_vadd_vx_i32m${LMUL*4}(wx_i32v, minus_zero_point, n);
vfloat32m${LMUL*4}_t y_f32v = __riscv_vfcvt_f_x_v_f32m${LMUL*4}(wx_i32v, n);
y_f32v = __riscv_vfmul_vf_f32m${LMUL*4}(y_f32v, scale, n);

__riscv_vse32_v_f32m${LMUL*4}(output, y_f32v, n); output += n;
}
}
47 changes: 47 additions & 0 deletions src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u1v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Auto-generated file. Do not edit!
// Template: src/qs8-f32-vcvt/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/common.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/vcvt.h"


void xnn_qu8_f32_vcvt_ukernel__rvv_u1v(
size_t batch,
const uint8_t* input,
float* output,
const struct xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(batch != 0);
assert(batch % sizeof(int8_t) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_INT8_T;

const float scale = params->scalar.scale;
const int32_t minus_zero_point = -params->scalar.zero_point;

for (; batch > 0; ) {
const int32_t n = __riscv_vsetvl_e8m1(batch); batch -= n;

vuint8m1_t x_u8v = __riscv_vle8_v_u8m1(input, n); input += n;

vint32m4_t wx_i32v = __riscv_vreinterpret_v_u32m4_i32m4(__riscv_vzext_vf4_u32m4(x_u8v, n));
wx_i32v = __riscv_vadd_vx_i32m4(wx_i32v, minus_zero_point, n);
vfloat32m4_t y_f32v = __riscv_vfcvt_f_x_v_f32m4(wx_i32v, n);
y_f32v = __riscv_vfmul_vf_f32m4(y_f32v, scale, n);

__riscv_vse32_v_f32m4(output, y_f32v, n); output += n;
}
}
47 changes: 47 additions & 0 deletions src/qu8-f32-vcvt/gen/qu8-f32-vcvt-rvv-u2v.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Auto-generated file. Do not edit!
// Template: src/qs8-f32-vcvt/rvv.c.in
// Generator: tools/xngen
//
// Copyright 2024 Imagination Technologies, inc.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include <assert.h>

#include <riscv_vector.h>

#include "xnnpack/common.h"
#include "xnnpack/intrinsics-polyfill.h"
#include "xnnpack/vcvt.h"


void xnn_qu8_f32_vcvt_ukernel__rvv_u2v(
size_t batch,
const uint8_t* input,
float* output,
const struct xnn_qu8_f32_cvt_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(batch != 0);
assert(batch % sizeof(int8_t) == 0);
assert(input != NULL);
assert(output != NULL);

batch >>= XNN_LOG2_SIZEOF_INT8_T;

const float scale = params->scalar.scale;
const int32_t minus_zero_point = -params->scalar.zero_point;

for (; batch > 0; ) {
const int32_t n = __riscv_vsetvl_e8m2(batch); batch -= n;

vuint8m2_t x_u8v = __riscv_vle8_v_u8m2(input, n); input += n;

vint32m8_t wx_i32v = __riscv_vreinterpret_v_u32m8_i32m8(__riscv_vzext_vf4_u32m8(x_u8v, n));
wx_i32v = __riscv_vadd_vx_i32m8(wx_i32v, minus_zero_point, n);
vfloat32m8_t y_f32v = __riscv_vfcvt_f_x_v_f32m8(wx_i32v, n);
y_f32v = __riscv_vfmul_vf_f32m8(y_f32v, scale, n);

__riscv_vse32_v_f32m8(output, y_f32v, n); output += n;
}
}
5 changes: 5 additions & 0 deletions src/qu8-f32-vcvt/qu8-f32-vcvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__wasmsimd_u24, 24, false
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__wasmsimd_u32, 32, false, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__rvv_u1v, 1, true, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__rvv_u2v, 2, true, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
#endif // XNN_ARCH_RISCV && XNN_ENABLE_RISCV_VECTOR

XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__scalar_u1, 1, false, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__scalar_u2, 2, false, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
XNN_CVT_UKERNEL_WITH_PARAMS(0, xnn_qu8_f32_vcvt_ukernel__scalar_u3, 3, false, uint8_t, float, struct xnn_qu8_f32_cvt_params, xnn_init_qu8_f32_cvt_scalar_params)
Expand Down

0 comments on commit 37c2f64

Please sign in to comment.