Skip to content

Commit

Permalink
whisper : add description of ggml_mul_mat_pad
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Sep 14, 2023
1 parent f365543 commit b38f8a4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
4 changes: 2 additions & 2 deletions coreml/whisper-encoder.mm
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

// select which device to run the Core ML model on
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
//config.computeUnits = MLComputeUnitsCPUAndGPU;
config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
config.computeUnits = MLComputeUnitsCPUAndGPU;
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
//config.computeUnits = MLComputeUnitsAll;

const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
Expand Down
13 changes: 13 additions & 0 deletions whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,19 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
ggml_graph_compute(graph, &plan);
}

// faster matrix multiplications for tensors that do not have dimension 0 divisible "pad"
// the idea is to represent the original matrix multiplication:
//
// Z = X @ Y
//
// with two matrix multiplications:
//
// Z = [X_0; X_1] @ [Y_0; Y_1]
//
// here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
// and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
// general-purpose kernels
//
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
//#if !defined(GGML_USE_METAL)
// return ggml_mul_mat(ctx, x, y);
Expand Down

0 comments on commit b38f8a4

Please sign in to comment.