Skip to content

Commit

Permalink
update CMakeLists.txt
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
mingfeima committed Sep 26, 2024
1 parent 98ddb38 commit 7c371fa
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 29 deletions.
20 changes: 14 additions & 6 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,23 @@ if (GGML_LLAMAFILE)
endif()

if (GGML_AMX)
message(STATUS "Using AMX")
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
else()
set(GGML_AMX OFF)
message(WARNING "AMX requires gcc version > 11.0. Turning off GGML_AMX.")
endif()

add_compile_definitions(GGML_USE_AMX)
if (GGML_AMX)
message(STATUS "Using AMX")

file(GLOB GGML_HEADERS_AMX "ggml-amx/*.h")
list(APPEND GGML_HEADERS_AMX "../include/ggml-amx.h")
list(APPEND GGML_CDEF_PUBLIC GGML_USE_AMX)

file(GLOB GGML_SOURCES_AMX "ggml-amx/*.cpp")
list(APPEND GGML_SOURCES_AMX "ggml-amx.cpp")
file(GLOB GGML_HEADERS_AMX "ggml-amx/*.h")
list(APPEND GGML_HEADERS_AMX "../include/ggml-amx.h")

file(GLOB GGML_SOURCES_AMX "ggml-amx/*.cpp")
list(APPEND GGML_SOURCES_AMX "ggml-amx.cpp")
endif()
endif()

if (GGML_CUDA)
Expand Down
10 changes: 8 additions & 2 deletions ggml/src/ggml-amx/mmq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2326,13 +2326,21 @@ size_t ggml_backend_amx_get_alloc_size(const struct ggml_tensor * tensor) {

// pack weight to vnni format
void ggml_backend_amx_convert_weight(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {

size_t alloc_size = ggml_backend_amx_get_alloc_size(tensor);
GGML_ASSERT(alloc_size == size);

const enum ggml_type TYPE = tensor->type;

const int K = tensor->ne[0]; // ne0: in_features
const int N = tensor->ne[1]; // ne1: out_features

#if defined(_OPENMP)
// the buffer ctx is not initialized when .set_tensor is called
int n_threads = omp_get_num_threads();
#else
int n_threads = 1;
#endif

GGML_DISPATCH_QTYPES(TYPE, [&] {
convert_B_packed_format<type, blck_size>((void *)((char *)tensor->data + offset), (const type *)data, N, K, n_threads);
Expand Down Expand Up @@ -2425,8 +2433,6 @@ void ggml_backend_amx_mul_mat(ggml_backend_amx_context * ctx, struct ggml_tensor
}
});

//printf("### using amx kernels ... n_threads = %d\n", n_threads);

if (M == 1) {
// MB = 1 and handle 8 tiles in each block
constexpr int kTilesN = 4;
Expand Down
16 changes: 0 additions & 16 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,10 @@ int ggml_sve_cnt_b = 0;
#undef GGML_USE_LLAMAFILE
#endif

// enable AMX only with OPENMP
//#if !defined(__AMX_INT8__) || !defined(GGML_USE_OPENMP)
//#undef GGML_USE_AMX
//#endif

#ifdef GGML_USE_LLAMAFILE
#include <llamafile/sgemm.h>
#endif

//#ifdef GGML_USE_AMX
//#include <ggml-amx/mmq.h>
//#endif

#if defined(_MSC_VER)
// disable "possible loss of data" to avoid hundreds of casts
// we should just be careful :)
Expand Down Expand Up @@ -12904,13 +12895,6 @@ static void ggml_compute_forward_mul_mat(
// nb01 >= nb00 - src0 is not transposed
// compute by src0 rows

//#if GGML_USE_AMX
// if (ggml_compute_forward_mul_mat_use_amx(dst)) {
// ggml_mul_mat_amx(dst, nth, ith, params->wdata, params->wsize);
// return;
// }
//#endif

#if GGML_USE_LLAMAFILE
// broadcast factors
const int64_t r2 = ne12 / ne02;
Expand Down
5 changes: 0 additions & 5 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@
# include "ggml-metal.h"
#endif

// enable AMX only with OPENMP
#if !defined(__AMX_INT8__) || !defined(GGML_USE_OPENMP)
# undef GGML_USE_AMX
#endif

#ifdef GGML_USE_AMX
# include "ggml-amx.h"
#endif
Expand Down

0 comments on commit 7c371fa

Please sign in to comment.