Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from ggerganov:master #153

Closed
wants to merge 10 commits into from
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
endif()

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/source-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/source-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/execution-charset:utf-8>")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/execution-charset:utf-8>")
endif()

#
# option list
#
Expand Down
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -730,10 +730,10 @@ GLSLC_CMD = glslc
_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
_ggml_vk_input_dir = ggml/src/vulkan-shaders
_ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders
_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)

ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
$(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@

$(_ggml_vk_header): $(_ggml_vk_source)
Expand All @@ -745,8 +745,8 @@ $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
--target-hpp $(_ggml_vk_header) \
--target-cpp $(_ggml_vk_source)

vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

endif # GGML_VULKAN

Expand Down
52 changes: 32 additions & 20 deletions cmake/llama-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,11 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)

set(GGML_BLAS @GGML_BLAS@)
set(GGML_CUDA @GGML_CUDA@)
set(GGML_METAL @GGML_METAL@)
set(GGML_HIP @GGML_HIP@)
set(GGML_ACCELERATE @GGML_ACCELERATE@)
set(GGML_VULKAN @GGML_VULKAN@)
set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@)
set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@)
set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@)
set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@)
set(GGML_SYCL @GGML_SYCL@)
set(GGML_OPENMP @GGML_OPENMP@)

@PACKAGE_INIT@
Expand All @@ -22,10 +16,39 @@ set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")

# Ensure transient dependencies satisfied

find_package(Threads REQUIRED)

set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")
set(_llama_link_deps "")
foreach(_ggml_lib ggml ggml-base)
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")
find_library(${_ggml_lib_var} ${_ggml_lib}
REQUIRED
HINTS ${LLAMA_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH
)
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
message(STATUS "Found ${${_ggml_lib_var}}")
endforeach()

foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan)
string(TOUPPER "GGML_${backend}" backend_id)
set(_ggml_lib "ggml-${backend}")
string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY")

find_library(${_ggml_lib_var} ${_ggml_lib}
HINTS ${LLAMA_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH
)
if(${_ggml_lib_var})
list(APPEND _llama_link_deps "${${_ggml_lib_var}}")
set(${backend_id} ON)
message(STATUS "Found backend ${${_ggml_lib_var}}")
else()
set(${backend_id} OFF)
endif()
endforeach()

if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
endif()
Expand All @@ -48,7 +71,7 @@ if (GGML_VULKAN)
find_package(Vulkan REQUIRED)
endif()

if (GGML_HIPBLAS)
if (GGML_HIP)
find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)
Expand All @@ -63,24 +86,13 @@ if (GGML_OPENMP)
find_package(OpenMP REQUIRED)
endif()


find_library(ggml_LIBRARY ggml
REQUIRED
HINTS ${LLAMA_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH
)

find_library(llama_LIBRARY llama
REQUIRED
HINTS ${LLAMA_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH
)

set(_llama_link_deps "${ggml_LIBRARY}" "@GGML_LINK_LIBRARIES@")
set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@")

add_library(llama UNKNOWN IMPORTED)

set_target_properties(llama
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
Expand Down
6 changes: 6 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,12 @@ struct common_init_result common_init_from_params(common_params & params) {
return iparams;
}

if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) {
LOG_ERR("%s: KV cache shifting is not supported for this model (--no-context-shift to disable)'\n", __func__);
llama_free_model(model);
return iparams;
}

if (!params.control_vectors.empty()) {
if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model);
Expand Down
5 changes: 5 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3040,6 +3040,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
return [(self.map_tensor_name(name), data_torch)]


@Model.register("Olmo1124ForCausalLM")
class Olmo1124Model(Model):
model_arch = gguf.MODEL_ARCH.OLMO_1124


@Model.register("OlmoeForCausalLM")
class OlmoeModel(Model):
model_arch = gguf.MODEL_ARCH.OLMOE
Expand Down
9 changes: 3 additions & 6 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ if (NOT MSVC)
endif()
option(GGML_LASX "ggml: enable lasx" ON)
option(GGML_LSX "ggml: enable lsx" ON)
option(GGML_RVV "ggml: enable rvv" ON)
option(GGML_SVE "ggml: enable SVE" OFF)

if (WIN32)
Expand Down Expand Up @@ -235,12 +236,8 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
#endif()
install(TARGETS ggml PUBLIC_HEADER)

if (BUILD_SHARED_LIBS)
install(TARGETS ggml LIBRARY)
install(TARGETS ggml-base LIBRARY)
endif()
install(TARGETS ggml LIBRARY PUBLIC_HEADER)
install(TARGETS ggml-base LIBRARY)

# FIXME: this should be done in the backend cmake files
if (GGML_METAL)
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,8 @@ function(ggml_add_backend backend)
if (${BUILD_SHARED_LIBS})
target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
install(TARGETS ${backend_target} LIBRARY)
endif()
install(TARGETS ${backend_target} LIBRARY)
target_link_libraries(ggml PUBLIC ${backend_target})
string(TOUPPER "GGML_USE_${backend}" backend_use)
target_compile_definitions(ggml PUBLIC ${backend_use})
Expand Down
1 change: 0 additions & 1 deletion ggml/src/ggml-blas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ if (BLAS_FOUND)

message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")

#add_compile_options(${BLAS_LINKER_FLAGS})
target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})

if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
Expand Down
5 changes: 5 additions & 0 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,11 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
if (GGML_LSX)
list(APPEND ARCH_FLAGS -mlsx)
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
message(STATUS "RISC-V detected")
if (GGML_RVV)
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
endif()
else()
message(STATUS "Unknown architecture")
endif()
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ if (CUDAToolkit_FOUND)
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
endif()

add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
else()
message(FATAL_ERROR "CUDA Toolkit not found")
endif()
15 changes: 15 additions & 0 deletions ggml/src/ggml-metal/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
GGML_METAL_KERNEL_TYPE_GELU_QUICK_4,
GGML_METAL_KERNEL_TYPE_SILU,
GGML_METAL_KERNEL_TYPE_SILU_4,
GGML_METAL_KERNEL_TYPE_ELU,
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16,
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4,
GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32,
Expand Down Expand Up @@ -649,6 +650,7 @@ @implementation GGMLMetalClass
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK_4, gelu_quick_4, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU, silu, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4, silu_4, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ELU, elu, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, soft_max_f16, has_simdgroup_reduction);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, soft_max_f16_4, has_simdgroup_reduction);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, soft_max_f32, has_simdgroup_reduction);
Expand Down Expand Up @@ -968,6 +970,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
case GGML_UNARY_OP_GELU:
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_SILU:
case GGML_UNARY_OP_ELU:
return ggml_is_contiguous(op->src[0]);
default:
return false;
Expand Down Expand Up @@ -1589,6 +1592,18 @@ static void ggml_metal_encode_node(

[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;
case GGML_UNARY_OP_ELU:
{
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ELU].pipeline;

[encoder setComputePipelineState:pipeline];
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];

const int64_t n = ggml_nelements(dst);

[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;
default:
{
GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op));
Expand Down
60 changes: 41 additions & 19 deletions ggml/src/ggml-metal/ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,14 @@ kernel void kernel_silu_4(
dst[tpig] = x / (1.0f + exp(-x));
}

kernel void kernel_elu(
device const float * src0,
device float * dst,
uint tpig[[thread_position_in_grid]]) {
device const float & x = src0[tpig];
dst[tpig] = (x > 0.0f) ? x : (exp(x) - 1.0f);
}

kernel void kernel_sqr(
device const float * src0,
device float * dst,
Expand Down Expand Up @@ -2137,20 +2145,34 @@ kernel void kernel_im2col(
uint3 tgpg[[threadgroups_per_grid]],
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) {
const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
// const int64_t IC = tgpg[0];
const int64_t OH = tgpg[1];
const int64_t OW = tgpg[2];

const int32_t offset_dst =
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
// const int64_t N = ntg[0];
const int64_t KH = ntg[1];
const int64_t KW = ntg[2];

const int64_t in = tpitg[0];
const int64_t ikh = tpitg[1];
const int64_t ikw = tpitg[2];

const int64_t iic = tgpig[0];
const int64_t ioh = tgpig[1];
const int64_t iow = tgpig[2];

const int64_t iiw = iow*s0 + ikw*d0 - p0;
const int64_t iih = ioh*s1 + ikh*d1 - p1;

const int64_t offset_dst = (in*OH*OW + ioh*OW + iow)*CHW + (iic*(KH*KW) + ikh*KW + ikw);

device T * pdst = (device T *) (dst);

if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
pdst[offset_dst] = 0.0f;
} else {
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
const int64_t offset_src = in*ofs0 + iic*ofs1 + iih*IW + iiw;
pdst[offset_dst] = x[offset_src];
}
}

Expand Down Expand Up @@ -2201,25 +2223,25 @@ kernel void kernel_im2col_ext(
uint3 tgpg[[threadgroups_per_grid]], // tgpg[0] = D x IC x KH x KW, CHW = IC x KH x KW
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) { // [M, 1, 1]
const int32_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2]
const int64_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2]

const int32_t d = tgpig[0] / CHW;
const int32_t chw = tgpig[0] % CHW;
const int32_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1)
const int32_t HW = tgpig[0] % KHW;
const int64_t d = tgpig[0] / CHW;
const int64_t chw = tgpig[0] % CHW;
const int64_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1)
const int64_t HW = tgpig[0] % KHW;

const int32_t tpitg_0 = (d * ntg[0]) + tpitg[0];
const int64_t tpitg_0 = (d * ntg[0]) + tpitg[0];
if (tpitg_0 >= N) {
return;
}

const int32_t tpitg_1 = HW / KW;
const int32_t tpitg_2 = HW % KW;
const int64_t tpitg_1 = HW / KW;
const int64_t tpitg_2 = HW % KW;

const int32_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0;
const int32_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1;
const int64_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0;
const int64_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1;

const int32_t offset_dst =
const int64_t offset_dst =
(tpitg_0 * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
(tgpig_0 * KHW + tpitg_1 * KW + tpitg_2);

Expand All @@ -2228,7 +2250,7 @@ kernel void kernel_im2col_ext(
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
pdst[offset_dst] = 0.0f;
} else {
const int32_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1;
const int64_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1;
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
}
}
Expand Down
Loading
Loading