Skip to content

Commit

Permalink
sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) (gger…
Browse files Browse the repository at this point in the history
…ganov#1422)

* sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.)

* metal : allow env metal variable to override resource path (ggerganov#1415)

* Allow env variable to override resource path

* Update ggml-metal.m

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* sync : restore common / main from `master`

* sync : restore whisper from `master`

* talk-llama : update to latest llama.cpp

* ruby : fix build

* ggml : fix 32-bit ARM build

* ggml : fix MIN / MAX macro collisions + update ios bindings

* ggml : fix ifdefs and MIN / MAX again

* exampels : fix Obj-C and Swift examples

* ggml : fix 32-bit ARM compatibility

* ggml : one more attempt to fix 32-bit ARM compat

* whisper : fix support for larger graphs

---------

Co-authored-by: Chris Raethke <codesoda@users.noreply.github.com>
  • Loading branch information
2 people authored and vonstring committed Nov 7, 2023
1 parent d3a09b0 commit 72bf352
Show file tree
Hide file tree
Showing 38 changed files with 30,777 additions and 7,745 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ add_library(${TARGET}
ggml.c
ggml-alloc.h
ggml-alloc.c
ggml-backend.h
ggml-backend.c
ggml-quants.h
ggml-quants.c
${GGML_SOURCES_METAL}
${GGML_SOURCES_CUDA}
${GGML_SOURCES_OPENCL}
Expand Down
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,13 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
$(CC) $(CFLAGS) -c $< -o $@

WHISPER_OBJ += ggml-alloc.o
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
$(CC) $(CFLAGS) -c $< -o $@

ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
$(CC) $(CFLAGS) -c $< -o $@

WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o

whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@
Expand Down
6 changes: 6 additions & 0 deletions bindings/ruby/ext/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")


Expand Down
87 changes: 87 additions & 0 deletions bindings/ruby/ext/ggml-backend-impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#pragma once

// ggml-backend internal header

#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

//
// Backend buffer
//

typedef void * ggml_backend_buffer_context_t;

struct ggml_backend_buffer_i {
void (*free_buffer) (ggml_backend_buffer_t buffer);
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
};

struct ggml_backend_buffer {
struct ggml_backend_buffer_i iface;

ggml_backend_t backend;
ggml_backend_buffer_context_t context;

size_t size;
};

GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
struct ggml_backend * backend,
struct ggml_backend_buffer_i iface,
ggml_backend_buffer_context_t context,
size_t size);

//
// Backend
//

typedef void * ggml_backend_context_t;

struct ggml_backend_i {
const char * (*get_name)(ggml_backend_t backend);

void (*free)(ggml_backend_t backend);

// buffer allocation
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);

// get buffer alignment
size_t (*get_alignment)(ggml_backend_t backend);

// tensor data access
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*synchronize) (ggml_backend_t backend);

// (optional) copy tensor between different backends, allow for single-copy tranfers
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);

// compute graph with a plan
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);

// compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);

// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
};

struct ggml_backend {
struct ggml_backend_i iface;

ggml_backend_context_t context;
};

#ifdef __cplusplus
}
#endif
Loading

0 comments on commit 72bf352

Please sign in to comment.