diff --git a/CMakeLists.txt b/CMakeLists.txt index 296f5043001db..23c28c3589ac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -335,6 +335,7 @@ if (LLAMA_HIPBLAS) target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX) target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas) + add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) if (LLAMA_STATIC) message(FATAL_ERROR "Static linking not supported for HIP/ROCm") diff --git a/Makefile b/Makefile index f49c57edf5ece..49bbfaf4e72fb 100644 --- a/Makefile +++ b/Makefile @@ -21,8 +21,8 @@ ifndef UNAME_M UNAME_M := $(shell uname -m) endif -CCV = $(shell $(CC) --version | head -n 1) -CXXV = $(shell $(CXX) --version | head -n 1) +CCV := $(shell $(CC) --version | head -n 1) +CXXV := $(shell $(CXX) --version | head -n 1) # Mac OS + Arm can report x86_64 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 @@ -207,13 +207,18 @@ ifdef LLAMA_HIPBLAS ROCM_PATH ?= /opt/rocm CC := $(ROCM_PATH)/llvm/bin/clang CXX := $(ROCM_PATH)/llvm/bin/clang++ - GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030 + GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 LLAMA_CUDA_DMMV_X ?= 32 LLAMA_CUDA_DMMV_Y ?= 1 CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 OBJS += ggml-cuda.o +ifdef LLAMA_CUDA_KQUANTS_ITER + CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +else + CXXFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) diff --git a/ggml.c b/ggml.c index 89379516e2bcd..5713a9f43569f 100644 --- a/ggml.c +++ b/ggml.c @@ -230,9 +230,11 @@ inline static void* ggml_aligned_malloc(size_t size) { #endif #elif defined(GGML_USE_OPENBLAS) #include -#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS) +#endif +#if defined(GGML_USE_CUBLAS) #include "ggml-cuda.h" -#elif defined(GGML_USE_CLBLAST) +#endif +#if defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #endif