-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) (#1422)
* sync : ggml (backend v2, k-quants, CUDA opts, Metal opts, etc.) * metal : allow env metal variable to override resource path (#1415) * Allow env variable to override resource path * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * sync : restore common / main from `master` * sync : restore whisper from `master` * talk-llama : update to latest llama.cpp * ruby : fix build * ggml : fix 32-bit ARM build * ggml : fix MIN / MAX macro collisions + update ios bindings * ggml : fix ifdefs and MIN / MAX again * exampels : fix Obj-C and Swift examples * ggml : fix 32-bit ARM compatibility * ggml : one more attempt to fix 32-bit ARM compat * whisper : fix support for larger graphs --------- Co-authored-by: Chris Raethke <codesoda@users.noreply.github.com>
- Loading branch information
Showing
38 changed files
with
30,777 additions
and
7,745 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule ios
updated
18 files
+15 −9 | Makefile | |
+15 −9 | Makefile-tmpl | |
+2 −0 | Package.swift | |
+413 −280 | Sources/whisper/ggml-alloc.c | |
+67 −8 | Sources/whisper/ggml-alloc.h | |
+87 −0 | Sources/whisper/ggml-backend-impl.h | |
+950 −0 | Sources/whisper/ggml-backend.c | |
+136 −0 | Sources/whisper/ggml-backend.h | |
+243 −0 | Sources/whisper/ggml-impl.h | |
+21 −0 | Sources/whisper/ggml-metal.h | |
+626 −227 | Sources/whisper/ggml-metal.m | |
+670 −126 | Sources/whisper/ggml-metal.metal | |
+7,277 −0 | Sources/whisper/ggml-quants.c | |
+224 −0 | Sources/whisper/ggml-quants.h | |
+3,494 −4,180 | Sources/whisper/ggml.c | |
+240 −97 | Sources/whisper/ggml.h | |
+10 −0 | Sources/whisper/include/whisper.h | |
+41 −22 | Sources/whisper/whisper.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#pragma once | ||
|
||
// ggml-backend internal header | ||
|
||
#include "ggml-backend.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// | ||
// Backend buffer | ||
// | ||
|
||
typedef void * ggml_backend_buffer_context_t; | ||
|
||
struct ggml_backend_buffer_i { | ||
void (*free_buffer) (ggml_backend_buffer_t buffer); | ||
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer | ||
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback | ||
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback | ||
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback | ||
}; | ||
|
||
struct ggml_backend_buffer { | ||
struct ggml_backend_buffer_i iface; | ||
|
||
ggml_backend_t backend; | ||
ggml_backend_buffer_context_t context; | ||
|
||
size_t size; | ||
}; | ||
|
||
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( | ||
struct ggml_backend * backend, | ||
struct ggml_backend_buffer_i iface, | ||
ggml_backend_buffer_context_t context, | ||
size_t size); | ||
|
||
// | ||
// Backend | ||
// | ||
|
||
typedef void * ggml_backend_context_t; | ||
|
||
struct ggml_backend_i { | ||
const char * (*get_name)(ggml_backend_t backend); | ||
|
||
void (*free)(ggml_backend_t backend); | ||
|
||
// buffer allocation | ||
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size); | ||
|
||
// get buffer alignment | ||
size_t (*get_alignment)(ggml_backend_t backend); | ||
|
||
// tensor data access | ||
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize | ||
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); | ||
void (*synchronize) (ggml_backend_t backend); | ||
|
||
// (optional) copy tensor between different backends, allow for single-copy tranfers | ||
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); | ||
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); | ||
|
||
// compute graph with a plan | ||
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||
|
||
// compute graph without a plan | ||
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||
|
||
// check if the backend supports an operation | ||
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); | ||
}; | ||
|
||
struct ggml_backend { | ||
struct ggml_backend_i iface; | ||
|
||
ggml_backend_context_t context; | ||
}; | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Oops, something went wrong.
f96e1c5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI. It looks like few cuda => hip mappings are missing, so it doesn't compile with
WHISPER_HIPBLAS=1
.This patch resolves the problem: