Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) #1422

Merged
merged 13 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,10 @@ add_library(${TARGET}
ggml.c
ggml-alloc.h
ggml-alloc.c
ggml-backend.h
ggml-backend.c
ggml-quants.h
ggml-quants.c
${GGML_SOURCES_METAL}
${GGML_SOURCES_CUDA}
${GGML_SOURCES_OPENCL}
Expand Down
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,13 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
$(CC) $(CFLAGS) -c $< -o $@

WHISPER_OBJ += ggml-alloc.o
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
$(CC) $(CFLAGS) -c $< -o $@

ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
$(CC) $(CFLAGS) -c $< -o $@

WHISPER_OBJ += ggml-alloc.o ggml-backend.o ggml-quants.o

whisper.o: whisper.cpp whisper.h ggml.h ggml-cuda.h
$(CXX) $(CXXFLAGS) -c $< -o $@
Expand Down
6 changes: 6 additions & 0 deletions bindings/ruby/ext/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")


Expand Down
87 changes: 87 additions & 0 deletions bindings/ruby/ext/ggml-backend-impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#pragma once

// ggml-backend internal header

#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

//
// Backend buffer
//

typedef void * ggml_backend_buffer_context_t;

struct ggml_backend_buffer_i {
void (*free_buffer) (ggml_backend_buffer_t buffer);
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback
};

struct ggml_backend_buffer {
struct ggml_backend_buffer_i iface;

ggml_backend_t backend;
ggml_backend_buffer_context_t context;

size_t size;
};

GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
struct ggml_backend * backend,
struct ggml_backend_buffer_i iface,
ggml_backend_buffer_context_t context,
size_t size);

//
// Backend
//

typedef void * ggml_backend_context_t;

struct ggml_backend_i {
const char * (*get_name)(ggml_backend_t backend);

void (*free)(ggml_backend_t backend);

// buffer allocation
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size);

// get buffer alignment
size_t (*get_alignment)(ggml_backend_t backend);

// tensor data access
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*synchronize) (ggml_backend_t backend);

// (optional) copy tensor between different backends, allow for single-copy tranfers
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);

// compute graph with a plan
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);

// compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);

// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
};

struct ggml_backend {
struct ggml_backend_i iface;

ggml_backend_context_t context;
};

#ifdef __cplusplus
}
#endif
Loading
Loading