-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into gpu-param
- Loading branch information
Showing
48 changed files
with
30,893 additions
and
7,740 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
# This needs to generally match the container host's environment. | ||
ARG CUDA_VERSION=11.7.1 | ||
|
||
# Target the CUDA build image | ||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||
|
||
FROM ${BASE_CUDA_DEV_CONTAINER} as build | ||
|
||
# Unless otherwise specified, we make a fat build. | ||
ARG CUDA_DOCKER_ARCH=all | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential git cmake | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Set nvcc architecture | ||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} | ||
# Enable cuBLAS | ||
ENV WHISPER_CUBLAS=1 | ||
|
||
RUN make | ||
|
||
ENTRYPOINT ["/app/main"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule ios
updated
18 files
+15 −9 | Makefile | |
+15 −9 | Makefile-tmpl | |
+2 −0 | Package.swift | |
+413 −280 | Sources/whisper/ggml-alloc.c | |
+67 −8 | Sources/whisper/ggml-alloc.h | |
+87 −0 | Sources/whisper/ggml-backend-impl.h | |
+950 −0 | Sources/whisper/ggml-backend.c | |
+136 −0 | Sources/whisper/ggml-backend.h | |
+243 −0 | Sources/whisper/ggml-impl.h | |
+21 −0 | Sources/whisper/ggml-metal.h | |
+626 −227 | Sources/whisper/ggml-metal.m | |
+670 −126 | Sources/whisper/ggml-metal.metal | |
+7,277 −0 | Sources/whisper/ggml-quants.c | |
+224 −0 | Sources/whisper/ggml-quants.h | |
+3,494 −4,180 | Sources/whisper/ggml.c | |
+240 −97 | Sources/whisper/ggml.h | |
+10 −0 | Sources/whisper/include/whisper.h | |
+41 −22 | Sources/whisper/whisper.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#pragma once | ||
|
||
// ggml-backend internal header | ||
|
||
#include "ggml-backend.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// | ||
// Backend buffer | ||
// | ||
|
||
typedef void * ggml_backend_buffer_context_t; | ||
|
||
struct ggml_backend_buffer_i { | ||
void (*free_buffer) (ggml_backend_buffer_t buffer); | ||
void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer | ||
size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback | ||
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback | ||
void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback | ||
}; | ||
|
||
struct ggml_backend_buffer { | ||
struct ggml_backend_buffer_i iface; | ||
|
||
ggml_backend_t backend; | ||
ggml_backend_buffer_context_t context; | ||
|
||
size_t size; | ||
}; | ||
|
||
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( | ||
struct ggml_backend * backend, | ||
struct ggml_backend_buffer_i iface, | ||
ggml_backend_buffer_context_t context, | ||
size_t size); | ||
|
||
// | ||
// Backend | ||
// | ||
|
||
typedef void * ggml_backend_context_t; | ||
|
||
struct ggml_backend_i { | ||
const char * (*get_name)(ggml_backend_t backend); | ||
|
||
void (*free)(ggml_backend_t backend); | ||
|
||
// buffer allocation | ||
ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size); | ||
|
||
// get buffer alignment | ||
size_t (*get_alignment)(ggml_backend_t backend); | ||
|
||
// tensor data access | ||
// these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize | ||
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); | ||
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); | ||
void (*synchronize) (ggml_backend_t backend); | ||
|
||
// (optional) copy tensor between different backends, allow for single-copy tranfers | ||
void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); | ||
void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); | ||
|
||
// compute graph with a plan | ||
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); | ||
|
||
// compute graph without a plan | ||
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); | ||
|
||
// check if the backend supports an operation | ||
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); | ||
}; | ||
|
||
struct ggml_backend { | ||
struct ggml_backend_i iface; | ||
|
||
ggml_backend_context_t context; | ||
}; | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Oops, something went wrong.