Skip to content

Commit

Permalink
add fallback for m chip & fix compiler bugs (ggerganov#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
hodlen authored Dec 14, 2023
1 parent e44f640 commit a456d83
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 16 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ endif()
#

if (APPLE)
set(LLAMA_METAL_DEFAULT ON)
set(LLAMA_METAL_DEFAULT OFF) # metal has not been supported on Apple Silicon yet
else()
set(LLAMA_METAL_DEFAULT OFF)
endif()
Expand Down
37 changes: 25 additions & 12 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void ggml_print_backtrace(void) {
}
#endif

#define GGML_PERF
// #define GGML_PERF
#define GGML_DEBUG 0
#define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16
Expand Down Expand Up @@ -14436,6 +14436,7 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
// 计算剩余的元素个数
int remainder = ne00 % 8;

#if defined(__AVX2__)
// 使用AVX指令进行向量化计算
for (i = 0; i < ne00 - remainder; i += 8) {
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
Expand All @@ -14448,10 +14449,11 @@ static void ggml_compute_forward_mul_mat_axpy_dense(
for (i = ne00 - remainder; i < ne00; i++) {
res[i] += tmp[i];
}
// for (i = 0; i < dst->ne[0]; i++) {
// res[i] += tmp[i];
// }

#else
for (i = 0; i < dst->ne[0]; i++) {
res[i] += tmp[i];
}
#endif
atomic_flag_clear(&g_axpy_dense_lock);

}
Expand Down Expand Up @@ -14586,6 +14588,7 @@ static void ggml_compute_forward_mul_mat_axpy(
// 计算剩余的元素个数
int remainder = ne00 % 8;

#if defined(__AVX2__)
// 使用AVX指令进行向量化计算
for (i = 0; i < ne00 - remainder; i += 8) {
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
Expand All @@ -14598,8 +14601,11 @@ static void ggml_compute_forward_mul_mat_axpy(
for (i = ne00 - remainder; i < ne00; i++) {
res[i] += tmp[i];
}


#else
for (i = 0; i < ne00; i++) {
res[i] += tmp[i];
}
#endif
atomic_flag_clear(&g_axpy_lock);
}

Expand Down Expand Up @@ -14733,7 +14739,7 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(

// 计算剩余的元素个数
int remainder = ne00 % 8;

#if defined(__AVX2__)
// 使用AVX指令进行向量化计算
for (i = 0; i < ne00 - remainder; i += 8)
{
Expand All @@ -14748,6 +14754,11 @@ static void ggml_compute_forward_mul_mat_axpy_q4_0(
{
res[i] += tmp[i];
}
#else
for (i = 0; i < ne00; i++) {
res[i] += tmp[i];
}
#endif
atomic_flag_clear(&g_axpy_lock);
}

Expand Down Expand Up @@ -14869,6 +14880,7 @@ static void ggml_compute_forward_mul_mat_axpy_head(
// 计算剩余的元素个数
int remainder = ne00 % 8;

#if defined(__AVX2__)
// 使用AVX指令进行向量化计算
for (i = 0; i < ne00 - remainder; i += 8) {
__m256 res_vec = _mm256_loadu_ps(res + i); // 加载res中的8个浮点数
Expand All @@ -14881,10 +14893,11 @@ static void ggml_compute_forward_mul_mat_axpy_head(
for (i = ne00 - remainder; i < ne00; i++) {
res[i] += tmp[i];
}
// for (i = 0; i < ne00; i++) {
// res[i] = tmp[i];
// }

#else
for (i = 0; i < ne00; i++) {
res[i] += tmp[i];
}
#endif
atomic_flag_clear(&g_axpy_head_lock);

}
Expand Down
6 changes: 3 additions & 3 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2737,7 +2737,7 @@ struct llama_mlp_model_loader {
offset = (offset + 31) & -32;
file.seek(offset, SEEK_SET);
// point to the mmaped mlp model file
mlp_tensor -> data = mapping -> addr + static_cast<std::streamoff>(offset);
mlp_tensor -> data = (void *) (static_cast<char *>(mapping -> addr) + offset);
file.seek(tensor_data_size, SEEK_CUR);
return mlp_tensor;
}
Expand All @@ -2757,7 +2757,7 @@ struct llama_augmentation_model_loader {
// const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + 5120*40*4 * ggml_tensor_overhead() + (int64_t)13824*5120*40*4);
int model_layer = model->layers.size();
int ffn_dim = model->layers[0].ffn_up->ne[1];
const int64_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
const size_t ggml_aux_tensor_size = 4 * (100 * 100 + model_layer*ffn_dim*sizeof(float) * ggml_tensor_overhead() );
printf("augmentation buffer: %ld\n", ggml_aux_tensor_size);
struct ggml_init_params params = {
/*.mem_size =*/ ggml_aux_tensor_size,
Expand Down Expand Up @@ -2974,7 +2974,7 @@ static void llm_load_tensors(
auto create_tensor = [&] (const std::string & name, const std::vector<int64_t> & ne, ggml_backend_type backend) -> ggml_tensor * {
ggml_tensor * created_tensor = ml.create_tensor(ctx, name, ne, backend);
if (created_tensor == nullptr) {
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name);
LLAMA_LOG_ERROR("%s: error: failed to create tensor '%s'\n", __func__, name.c_str());
return nullptr;
}
if (created_tensor->backend == GGML_BACKEND_GPU || created_tensor->backend == GGML_BACKEND_GPU_SPLIT) {
Expand Down

0 comments on commit a456d83

Please sign in to comment.