Skip to content

Commit

Permalink
llama : do not print "offloading layers" message in CPU-only builds (g…
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren authored and hodlen committed Apr 1, 2024
1 parent bd3b296 commit 1ea16d0
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4209,8 +4209,7 @@ static bool llm_load_tensors(
ctx_bufs.emplace_back(ctx, buf);
}

// print memory requirements
{
if (llama_supports_gpu_offload()) {
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));

LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
Expand All @@ -4222,10 +4221,11 @@ static bool llm_load_tensors(
const int max_offloadable_layers = hparams.n_layer + 1;

LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
}

for (ggml_backend_buffer_t buf : model.bufs) {
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
}
// print memory requirements
for (ggml_backend_buffer_t buf : model.bufs) {
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
}

// populate tensors_by_name
Expand Down

0 comments on commit 1ea16d0

Please sign in to comment.