Skip to content

Commit

Permalink
reformat extra cpu backend.
Browse files Browse the repository at this point in the history
- clean Q4_0_N_M and IQ4_0_N_M
  - remove from "file" tensor type
  - allow only with dynamic repack

- extract cpu extra bufts and convert to C++
  - hbm
  - "aarch64"

- more generic use of extra buffer
  - generalise extra_supports_op
  - new API for "cpu-accel":
     - amx
     - aarch64
  • Loading branch information
Djip007 committed Dec 1, 2024
1 parent ce4ce87 commit 733f891
Show file tree
Hide file tree
Showing 28 changed files with 2,308 additions and 2,363 deletions.
22 changes: 10 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,10 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
MK_CFLAGS += -march=native -mtune=native
HOST_CXXFLAGS += -march=native -mtune=native

# Usage AMX build test
#MK_CFLAGS += -march=graniterapids -mtune=graniterapids
#HOST_CXXFLAGS += -march=graniterapids -mtune=graniterapids

# Usage AVX-only
#MK_CFLAGS += -mfma -mf16c -mavx
#MK_CXXFLAGS += -mfma -mf16c -mavx
Expand Down Expand Up @@ -944,17 +948,18 @@ DIR_COMMON = common

OBJ_GGML = \
$(DIR_GGML)/src/ggml.o \
$(DIR_GGML)/src/ggml-aarch64.o \
$(DIR_GGML)/src/ggml-alloc.o \
$(DIR_GGML)/src/ggml-backend.o \
$(DIR_GGML)/src/ggml-backend-reg.o \
$(DIR_GGML)/src/ggml-opt.o \
$(DIR_GGML)/src/ggml-quants.o \
$(DIR_GGML)/src/ggml-threading.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
$(OBJ_GGML_EXT)

OBJ_LLAMA = \
Expand Down Expand Up @@ -1094,17 +1099,10 @@ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
# Default target
all: $(BUILD_TARGETS)

# force c++ build for source file that have same name as c file
# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o: \
ggml/src/ggml-cpu/ggml-cpu.cpp \
ggml/include/ggml-backend.h \
ggml/include/ggml.h \
ggml/include/ggml-alloc.h \
ggml/src/ggml-backend-impl.h \
ggml/include/ggml-cpu.h \
ggml/src/ggml-impl.h
$(CXX) $(CXXFLAGS) -c $< -o $@
$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@

# Rules for building object files
$(DIR_GGML)/%.o: $(DIR_GGML)/%.c
Expand Down
5 changes: 3 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ var sources = [
"src/unicode.cpp",
"src/unicode-data.cpp",
"ggml/src/ggml.c",
"ggml/src/ggml-aarch64.c",
"ggml/src/ggml-alloc.c",
"ggml/src/ggml-backend.cpp",
"ggml/src/ggml-backend-reg.cpp",
"ggml/src/ggml-cpu/ggml-cpu.c",
"ggml/src/ggml-cpu/ggml-cpu.cpp",
"ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
"ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp",
"ggml/src/ggml-cpu/ggml-cpu-hbm.cpp",
"ggml/src/ggml-cpu/ggml-cpu-quants.c",
"ggml/src/ggml-cpu/ggml-cpu-traits.cpp",
"ggml/src/ggml-threading.cpp",
"ggml/src/ggml-quants.c",
]
Expand Down
17 changes: 0 additions & 17 deletions ggml/include/ggml-cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,14 @@ extern "C" {

// Internal types and functions exposed for tests and benchmarks

typedef void (*ggml_from_float_to_mat_t)
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
const void * GGML_RESTRICT y, size_t by, int nrc);
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
const void * GGML_RESTRICT y, int nr, int nc);
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
const void * GGML_RESTRICT y, int nr, int nc);

struct ggml_type_traits_cpu {
ggml_from_float_t from_float;
ggml_from_float_to_mat_t from_float_to_mat;
ggml_vec_dot_t vec_dot;
enum ggml_type vec_dot_type;
int64_t nrows; // number of rows to process simultaneously
int64_t ncols; // number of columns to process simultaneously
ggml_gemv_t gemv;
ggml_gemm_t gemm;
};

GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
Expand All @@ -140,13 +130,6 @@ extern "C" {

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);

#ifdef GGML_USE_CPU_HBM
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
#endif

GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);

#ifdef __cplusplus
}
#endif
11 changes: 4 additions & 7 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,12 @@ extern "C" {
GGML_TYPE_F64 = 28,
GGML_TYPE_IQ1_M = 29,
GGML_TYPE_BF16 = 30,
GGML_TYPE_Q4_0_4_4 = 31,
GGML_TYPE_Q4_0_4_8 = 32,
GGML_TYPE_Q4_0_8_8 = 33,
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
// GGML_TYPE_Q4_0_4_8 = 32,
// GGML_TYPE_Q4_0_8_8 = 33,
GGML_TYPE_TQ1_0 = 34,
GGML_TYPE_TQ2_0 = 35,
GGML_TYPE_IQ4_NL_4_4 = 36,
// GGML_TYPE_IQ4_NL_4_4 = 36,
// GGML_TYPE_IQ4_NL_4_8 = 37,
// GGML_TYPE_IQ4_NL_8_8 = 38,
GGML_TYPE_COUNT,
Expand Down Expand Up @@ -433,9 +433,6 @@ extern "C" {
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
};

// available tensor operations:
Expand Down
4 changes: 1 addition & 3 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,7 @@ add_library(ggml-base
ggml-threading.cpp
ggml-threading.h
ggml-quants.c
ggml-quants.h
ggml-aarch64.c
ggml-aarch64.h)
ggml-quants.h)

target_include_directories(ggml-base PRIVATE .)

Expand Down
129 changes: 0 additions & 129 deletions ggml/src/ggml-aarch64.c

This file was deleted.

19 changes: 0 additions & 19 deletions ggml/src/ggml-aarch64.h

This file was deleted.

2 changes: 1 addition & 1 deletion ggml/src/ggml-cann/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2089,7 +2089,7 @@ static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, con
static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
/* .get_name = */ ggml_backend_cann_reg_get_name,
/* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
/* .get_device_get = */ ggml_backend_cann_reg_get_device,
/* .get_device = */ ggml_backend_cann_reg_get_device,
/* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
};

Expand Down
Loading

0 comments on commit 733f891

Please sign in to comment.