Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from ggerganov:master #106

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,22 @@ jobs:

- name: Build
id: cmake_build
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
mkdir build
cd build
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)

- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
mkdir build
cd build
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DLLAMA_OPENMP=OFF
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)

- name: Test
id: cmake_test
run: |
Expand Down
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
option(LLAMA_RPC "llama: use RPC" OFF)
option(LLAMA_OPENMP "llama: use OpenMP" ON)
option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
Expand Down Expand Up @@ -296,6 +297,17 @@ if (LLAMA_METAL)
)
endif()

if (LLAMA_OPENMP)
find_package(OpenMP)
if (OpenMP_FOUND)
message(STATUS "OpenMP found")
add_compile_definitions(GGML_USE_OPENMP)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
else()
message(WARNING "OpenMP not found")
endif()
endif()

if (LLAMA_BLAS)
if (LLAMA_STATIC)
set(BLA_STATIC ON)
Expand Down Expand Up @@ -1373,6 +1385,13 @@ if (LLAMA_METAL)
endif()
endif()

configure_file(cmake/llama.pc.in
"${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
@ONLY)

install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
DESTINATION lib/pkgconfig)

#
# programs, examples and tests
#
Expand Down
50 changes: 41 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ ifeq ($(UNAME_S),Darwin)
LLAMA_METAL := 1
endif

LLAMA_NO_OPENMP := 1

ifneq ($(UNAME_P),arm)
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
ifeq ($(SYSCTL_M),1)
Expand All @@ -67,6 +69,10 @@ ifeq ($(UNAME_S),Darwin)
endif
endif

ifdef LLAMA_RPC
BUILD_TARGETS += rpc-server
endif

default: $(BUILD_TARGETS)

test: $(TEST_TARGETS)
Expand Down Expand Up @@ -135,12 +141,16 @@ MK_NVCCFLAGS = -std=c++11
ifdef LLAMA_FAST
MK_CFLAGS += -Ofast
HOST_CXXFLAGS += -Ofast
ifndef LLAMA_DEBUG
MK_NVCCFLAGS += -O3
endif # LLAMA_DEBUG
else
MK_CFLAGS += -O3
MK_CXXFLAGS += -O3
ifndef LLAMA_DEBUG
MK_NVCCFLAGS += -O3
endif
endif # LLAMA_DEBUG
endif # LLAMA_FAST

ifndef LLAMA_NO_CCACHE
CCACHE := $(shell which ccache)
Expand Down Expand Up @@ -201,9 +211,10 @@ ifdef LLAMA_SCHED_MAX_COPIES
endif

ifdef LLAMA_DEBUG
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g
MK_NVCCFLAGS += -O0 -g

ifeq ($(UNAME_S),Linux)
MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
Expand Down Expand Up @@ -400,6 +411,12 @@ ifndef LLAMA_NO_ACCELERATE
endif
endif # LLAMA_NO_ACCELERATE

ifndef LLAMA_NO_OPENMP
MK_CPPFLAGS += -DGGML_USE_OPENMP
MK_CFLAGS += -fopenmp
MK_CXXFLAGS += -fopenmp
endif # LLAMA_NO_OPENMP

ifdef LLAMA_OPENBLAS
MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
Expand All @@ -416,6 +433,11 @@ ifdef LLAMA_BLIS
MK_LDFLAGS += -lblis -L/usr/local/lib
endif # LLAMA_BLIS

ifdef LLAMA_RPC
MK_CPPFLAGS += -DGGML_USE_RPC
OBJS += ggml-rpc.o
endif # LLAMA_RPC

ifdef LLAMA_CUBLAS
# LLAMA_CUBLAS is deprecated and will be removed in the future
LLAMA_CUDA := 1
Expand Down Expand Up @@ -641,11 +663,26 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
endif
endif # LLAMA_METAL

OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o

ifndef LLAMA_NO_LLAMAFILE
sgemm.o: sgemm.cpp sgemm.h ggml.h
$(CXX) $(CXXFLAGS) -c $< -o $@
endif

ifdef LLAMA_RPC
ggml-rpc.o: ggml-rpc.cpp ggml-rpc.h
$(CXX) $(CXXFLAGS) -c $< -o $@

rpc-server.o: examples/rpc/rpc-server.cpp ggml-rpc.h
$(CXX) $(CXXFLAGS) -c $< -o $@

rpc-server: rpc-server.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
endif # LLAMA_RPC

GF_CC := $(CC)
include scripts/get-flags.mk

Expand Down Expand Up @@ -725,14 +762,9 @@ unicode.o: unicode.cpp unicode.h
unicode-data.o: unicode-data.cpp unicode-data.h
$(CXX) $(CXXFLAGS) -c $< -o $@

OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o

llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@

COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o

common.o: common/common.cpp $(COMMON_H_DEPS)
$(CXX) $(CXXFLAGS) -c $< -o $@

Expand Down
10 changes: 10 additions & 0 deletions cmake/llama.pc.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${prefix}/include

Name: llama
Description: Port of Facebook's LLaMA model in C/C++
Version: @PROJECT_VERSION@
Libs: -L${libdir} -lllama
Cflags: -I${includedir}
12 changes: 6 additions & 6 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true;
}
params.main_gpu = std::stoi(argv[i]);
#ifndef GGML_USE_CUDA_SYCL
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
#endif // GGML_USE_CUDA_SYCL
#ifndef GGML_USE_CUDA_SYCL_VULKAN
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
#endif // GGML_USE_CUDA_SYCL_VULKAN
return true;
}
if (arg == "--split-mode" || arg == "-sm") {
Expand All @@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
invalid_param = true;
return true;
}
#ifndef GGML_USE_CUDA_SYCL
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n");
#endif // GGML_USE_CUDA_SYCL
#ifndef GGML_USE_CUDA_SYCL_VULKAN
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
#endif // GGML_USE_CUDA_SYCL_VULKAN
return true;
}
if (arg == "--tensor-split" || arg == "-ts") {
Expand Down
6 changes: 3 additions & 3 deletions ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
// this tensor was allocated without ggml-backend
return;
}
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
ggml_backend_view_init(tensor);
}
} else {
if (tensor->data == NULL) {
Expand Down Expand Up @@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
if (t->view_src == NULL) {
ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
if (dst_buf->iface.cpy_tensor) {
return src->buffer->iface.cpy_tensor(dst_buf, src, dst);
return dst_buf->iface.cpy_tensor(dst_buf, src, dst);
}
return false;
}
Expand Down Expand Up @@ -1887,15 +1887,15 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,

// utils

void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
void ggml_backend_view_init(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->view_src != NULL);
GGML_ASSERT(tensor->view_src->buffer != NULL);
GGML_ASSERT(tensor->view_src->data != NULL);

tensor->buffer = buffer;
tensor->buffer = tensor->view_src->buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
ggml_backend_buffer_init_tensor(buffer, tensor);
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
}

void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
Expand Down Expand Up @@ -1954,7 +1954,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
struct ggml_tensor * dst = node_copies[id];
if (dst->view_src != NULL) {
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
ggml_backend_view_init(dst->view_src->buffer, dst);
ggml_backend_view_init(dst);
}
else {
ggml_backend_tensor_copy(src, dst);
Expand Down
2 changes: 1 addition & 1 deletion ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ extern "C" {

// Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);


#ifdef __cplusplus
Expand Down
4 changes: 2 additions & 2 deletions ggml-rpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
if (remote_ptr != 0) {
ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
ggml_backend_rpc_buffer_interface,
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"},
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
remote_size);
return buffer;
} else {
Expand Down Expand Up @@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
/* .endpoint = */ endpoint,
/* .name = */ "RPC",
/* .name = */ "RPC[" + std::string(endpoint) + "]",
};

ggml_backend_t backend = new ggml_backend {
Expand Down
Loading
Loading