Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CANN: cann backend build failed when manually specify SOC_TYPE or gcc version that isn't verified #10519

Merged
merged 3 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/backend/CANN.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi

## News

- 2024.11
- Support F16 and F32 data type model for Ascend 310P NPU.
- 2024.8
- Support `Q4_0` and `Q8_0` data type for Ascend NPU.
- 2024.7
Expand All @@ -43,6 +45,7 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
| Ascend NPU | Status |
|:-----------------------------:|:-------:|
| Atlas 300T A2 | Support |
| Atlas 300I Duo | Support |

*Notes:*

Expand Down
7 changes: 4 additions & 3 deletions ggml/src/ggml-cann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ if(NOT SOC_TYPE)
detect_ascend_soc_type(SOC_VERSION)
set(SOC_TYPE "${SOC_VERSION}")
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
else()
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
endif()

# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P.
string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower

# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)

if (CANN_INSTALL_DIR)
# Only Support Linux.
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cann/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ ascendc_library(ascendc_kernels STATIC
${SRC_FILES}
)

message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.")
message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
1 change: 0 additions & 1 deletion ggml/src/ggml-cann/kernels/dup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ class DupByRows {
// Input has four dims.
int64_t op_block_num = GetBlockNum();
int64_t op_block_idx = GetBlockIdx();
assert(op_block_idx < SUPPORTED_MAX_DIM && op_block_idx >= 0, "Invalid block index:%d, max is:%d\n", op_block_idx, SUPPORTED_MAX_DIM);

// param
num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
Expand Down
16 changes: 12 additions & 4 deletions ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

// optimize me. Use template to avoid copy code.
using namespace AscendC;
#ifdef ASCEND_310P // 310P not support 4bit get row
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support 4bit get row.\n");
}
#else

#define BUFFER_NUM 2

Expand Down Expand Up @@ -110,12 +119,9 @@ class GET_ROW_Q4_0 {
LocalTensor<float> output_local = output_queue.AllocTensor<float>();

// TODO: cast more data to speed up.
#ifdef ASCEND_310P
// TODO: 310P support quantification
#else
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
#endif

// Only mul need compile by group.
half scale = scale_gm.GetValue(scale_offset);

Expand Down Expand Up @@ -194,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
indices_nb_ub, output_ne_ub, output_nb_ub);
op.calculate();
}

#endif // #ifdef ASCEND_310P
10 changes: 10 additions & 0 deletions ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#include "kernel_operator.h"

using namespace AscendC;
#ifdef ASCEND_310P
extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f16->8bit quantization.\n");
}
#else

#define BUFFER_NUM 2
#define QK8_0 32
Expand Down Expand Up @@ -206,3 +214,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate();
}

#endif // #ifdef ASCEND_310P
10 changes: 10 additions & 0 deletions ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#include "kernel_operator.h"

using namespace AscendC;
#ifdef ASCEND_310P // 310P not support f32->8bit quantization
extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f32->8bit quantization.\n");
}
#else

#define BUFFER_NUM 2
#define QK8_0 32
Expand Down Expand Up @@ -204,3 +212,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate();
}

#endif // #ifdef ASCEND_310P
17 changes: 17 additions & 0 deletions ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
#include "kernel_operator.h"

using namespace AscendC;
#ifdef ASCEND_310P // 310P not support float->4bit quantization
extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f32->4bit quantization.\n");
}

extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f16->4bit quantization.\n");
}
#else

#define BUFFER_NUM 2
#define Group_Size 32
Expand Down Expand Up @@ -276,3 +291,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate();
}

#endif // #ifdef ASCEND_310P
Loading