Skip to content

Commit

Permalink
kompute : disable LLAMA_SPLIT_LAYER after ggerganov#5321
Browse files Browse the repository at this point in the history
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
  • Loading branch information
cebtenzzre committed Jul 15, 2024
1 parent 800e8fd commit 7cd9602
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5903,6 +5903,7 @@ static bool llm_load_tensors(
model.buft_layer[i] = llama_default_buffer_type_cpu(true);
}

#ifndef GGML_USE_KOMPUTE
if (split_mode == LLAMA_SPLIT_MODE_LAYER) {
// calculate the split points
int device_count = llama_get_device_count(model);
Expand Down Expand Up @@ -5940,7 +5941,9 @@ static bool llm_load_tensors(
} else {
model.buft_output = llama_default_buffer_type_cpu(true);
}
} else {
} else
#endif
{
ggml_backend_buffer_type_t split_buft;
if (split_mode == LLAMA_SPLIT_MODE_ROW) {
split_buft = llama_default_buffer_type_split(model, main_gpu, tensor_split);
Expand Down

0 comments on commit 7cd9602

Please sign in to comment.