update examples and tests, fix issues

ggerganov · Oct 21, 2023 · 39e63fa · 39e63fa
1 parent 772a124
commit 39e63fa
Show file tree

Hide file tree

Showing 19 changed files with 204 additions and 147 deletions.
diff --git a/examples/gpt-2/main-alloc.cpp b/examples/gpt-2/main-alloc.cpp
@@ -398,7 +398,7 @@ struct ggml_cgraph * gpt2_graph(
     const int n_head  = hparams.n_head;
 
     // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
-    static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
+    static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
     static std::vector<uint8_t> buf(buf_size);
 
     struct ggml_init_params params = {

diff --git a/examples/gpt-2/main-backend.cpp b/examples/gpt-2/main-backend.cpp
@@ -492,7 +492,7 @@ struct ggml_cgraph * gpt2_graph(
     const int n_head  = hparams.n_head;
 
     // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
-    static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
+    static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
     static std::vector<uint8_t> buf(buf_size);
 
     struct ggml_init_params params = {

diff --git a/examples/gpt-2/main-batched.cpp b/examples/gpt-2/main-batched.cpp
@@ -548,7 +548,7 @@ struct ggml_cgraph * gpt2_graph(
     const int32_t kv_head  = ggml_allocr_is_measure(allocr) ? n_ctx - n_tokens : kv_cache.head;
 
     // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
-    static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
+    static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
     static std::vector<uint8_t> buf(buf_size);
 
     struct ggml_init_params params = {

diff --git a/examples/mnist/main-cpu.cpp b/examples/mnist/main-cpu.cpp
@@ -39,10 +39,10 @@ int mnist_eval(
     struct ggml_context * ctx_data = NULL;
     struct ggml_context * ctx_eval = NULL;
 
-    struct ggml_cgraph gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
+    struct ggml_cgraph * gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
 
     // param export/import test
-    GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef));
+    GGML_ASSERT(ggml_graph_get_tensor(gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef));
 
     // allocate work context
     // needed during ggml_graph_compute() to allocate a work tensor
@@ -57,12 +57,12 @@ int mnist_eval(
 
     struct ggml_context * ctx_work = ggml_init(params);
 
-    struct ggml_tensor * input = ggml_graph_get_tensor(&gfi, "input");
+    struct ggml_tensor * input = ggml_graph_get_tensor(gfi, "input");
     memcpy(input->data, digit.data(), ggml_nbytes(input));
 
-    ggml_graph_compute_with_ctx(ctx_work, &gfi, n_threads);
+    ggml_graph_compute_with_ctx(ctx_work, gfi, n_threads);
 
-    const float * probs_data = ggml_get_data_f32(ggml_graph_get_tensor(&gfi, "probs"));
+    const float * probs_data = ggml_get_data_f32(ggml_graph_get_tensor(gfi, "probs"));
 
     const int prediction = std::max_element(probs_data, probs_data + 10) - probs_data;
 

diff --git a/examples/sam/main.cpp b/examples/sam/main.cpp
@@ -2109,7 +2109,7 @@ int main(int argc, char ** argv) {
 
     static const size_t tensor_alignment = 32;
     {
-        state.buf_compute_img_enc.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
+        state.buf_compute_img_enc.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());
         state.allocr = ggml_allocr_new_measure(tensor_alignment);
         struct ggml_cgraph * gf_measure = sam_encode_image(model, state, img1);
         if (!gf_measure) {
@@ -2144,7 +2144,7 @@ int main(int argc, char ** argv) {
         state.work_buffer.clear();
     }
     {
-        state.buf_compute_fast.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
+        state.buf_compute_fast.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());
         state.allocr = ggml_allocr_new_measure(tensor_alignment);
 
         // TODO: user input

diff --git a/examples/whisper/whisper.cpp b/examples/whisper/whisper.cpp
@@ -655,7 +655,7 @@ static void whisper_allocr_graph_init(struct whisper_allocr & allocr, std::funct
     auto & meta  = allocr.meta;
     auto & data  = allocr.data;
 
-    meta.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
+    meta.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());
 
     alloc = ggml_allocr_new_measure(tensor_alignment);
 
@@ -5413,7 +5413,7 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
     // b: N*N*sizeof(float)
     // c: N*N*sizeof(float)
     // when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
-    std::vector<uint8_t> buf(3llu*N_max*N_max*sizeof(float) + 3*ggml_tensor_overhead());
+    std::vector<uint8_t> buf(3llu*N_max*N_max*sizeof(float) + 3*ggml_tensor_overhead() + ggml_graph_overhead());
     std::vector<uint8_t> work;
 
     // put a bunch of random data in the buffer
@@ -5464,17 +5464,19 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
 
             struct ggml_tensor * c = ggml_mul_mat(ctx0, a, b);
 
-            struct ggml_cgraph gf = ggml_build_forward(c);
+            struct ggml_cgraph * gf = ggml_new_graph(ctx0);
+
+            ggml_build_forward_expand(gf, c);
 
             double tsum = 0.0;
 
             // heat-up
-            ggml_graph_compute_helper(work, &gf, n_threads);
+            ggml_graph_compute_helper(work, gf, n_threads);
 
             for (int i = 0; i < n_max; ++i) {
                 const int64_t t0 = ggml_time_us();
 
-                ggml_graph_compute_helper(work, &gf, n_threads);
+                ggml_graph_compute_helper(work, gf, n_threads);
 
                 const int64_t t1 = ggml_time_us();
 

diff --git a/include/ggml/ggml.h b/include/ggml/ggml.h
@@ -58,7 +58,8 @@
 //   {
 //       ...
 //
-//       struct ggml_cgraph gf = ggml_build_forward(f);
+//       struct ggml_cgraph * gf = ggml_new_graph(ctx);
+//       ggml_build_forward_expand(gf, f);
 //
 //       // set the input variable and parameter values
 //       ggml_set_f32(x, 2.0f);

diff --git a/src/ggml-metal.m b/src/ggml-metal.m
@@ -24,7 +24,7 @@
 
 #define UNUSED(x) (void)(x)
 
-#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
+#define GGML_MAX_CONCUR (2*GGML_DEFAULT_GRAPH_SIZE)
 
 struct ggml_metal_buffer {
     const char * name;

diff --git a/src/ggml.c b/src/ggml.c
@@ -17376,7 +17376,7 @@ void ggml_build_backward_gradient_checkpointing(
     ggml_build_backward_expand(ctx, gf, gb_tmp, true);
 
     if (n_checkpoints <= 0) {
-        *gb = *gb_tmp;
+        ggml_graph_cpy(gb_tmp, gb);
         return;
     }
 
@@ -17391,7 +17391,7 @@ void ggml_build_backward_gradient_checkpointing(
         replacements->vals[k]     = checkpoints[i];
     }
 
-    *gb = *gf;
+    ggml_graph_cpy(gf, gb);
     // rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
     // replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
     // by recomputing them from checkpoints
@@ -18402,12 +18402,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
     struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
     struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
 
-    memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
-
     // check that we allocated the correct amount of memory
     assert(obj_size == (size_t) (
         (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
 
+    memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
+
     *cgraph = (struct ggml_cgraph) {
         /*.size         =*/ size,
         /*.n_nodes      =*/ 0,
@@ -18489,6 +18489,8 @@ struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgrap
 }
 
 void ggml_graph_reset(struct ggml_cgraph * cgraph) {
+    GGML_ASSERT(cgraph->grads != NULL);
+
     for (int i = 0; i < cgraph->n_nodes; i++) {
         struct ggml_tensor * grad = cgraph->grads[i];
 
@@ -18689,14 +18691,16 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
                 case GGML_UNARY_OP_TANH:
                 case GGML_UNARY_OP_ELU:
                 case GGML_UNARY_OP_RELU:
-                    n_tasks = 1;
-                    break;
+                    {
+                        n_tasks = 1;
+                    } break;
 
                 case GGML_UNARY_OP_GELU:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_SILU:
-                    n_tasks = n_threads;
-                    break;
+                    {
+                        n_tasks = n_threads;
+                    } break;
             }
             break;
         case GGML_OP_SILU_BACK:
@@ -18921,7 +18925,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
 
             if (node_n != -1) {
                 /* FINALIZE */
-                struct ggml_tensor * node = state->shared->cgraph->nodes[node_n];
+                struct ggml_tensor * node = cgraph->nodes[node_n];
                 if (GGML_OP_HAS_FINALIZE[node->op]) {
                     params.nth = ggml_get_n_tasks(node, n_threads);
                     ggml_compute_forward(&params, node);
@@ -19242,6 +19246,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
             default:
                 break;
         }
+
         work_size = MAX(work_size, cur);
     }
 
@@ -20910,10 +20915,10 @@ enum ggml_opt_result ggml_opt_resume(
         struct ggml_tensor * f) {
 
     // build forward + backward compute graphs
-    struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, false);
-    struct ggml_cgraph * gb = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
-
+    struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
     ggml_build_forward_expand(gf, f);
+
+    struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
     ggml_build_backward_expand(ctx, gf, gb, true);
 
     return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);

diff --git a/tests/test-blas0.c b/tests/test-blas0.c
@@ -132,15 +132,17 @@ int main(int argc, const char ** argv) {
     {
         dst2 = ggml_mul_mat(ctx0, s0_f32, s1_f32);
 
-        struct ggml_cgraph gf = ggml_build_forward(dst2);
-        ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+        struct ggml_cgraph * gf = ggml_new_graph(ctx0);
+        ggml_build_forward_expand(gf, dst2);
+        ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
     }
 
     {
         dst3 = ggml_mul_mat(ctx0, s0_f16, s1_f32);
 
-        struct ggml_cgraph gf = ggml_build_forward(dst3);
-        ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+        struct ggml_cgraph * gf = ggml_new_graph(ctx0);
+        ggml_build_forward_expand(gf, dst3);
+        ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
     }
 
     bool ok_blas = true;

diff --git a/tests/test-conv-transpose.c b/tests/test-conv-transpose.c
@@ -52,6 +52,9 @@ void check_tensor(struct ggml_tensor * t, float * expected_t_d, int ne0, int ne1
             for (int i0 = 0; i0 < ne0; ++i0) {
                 float expected = *(expected_t_d + i2 * ne1 * ne0 + i1 * ne0 + i0);
                 float actual = ggml_get_data_f32(t)[i2 * ne1 * ne0 + i1 * ne0 + i0];
+                if (expected != actual) {
+                    printf("expected %.1f, got %.1f\n", expected, actual);
+                }
                 GGML_ASSERT(expected == actual);
             }
         }
@@ -100,13 +103,17 @@ void test_conv_transpose_1d(void) {
         struct ggml_tensor * out_2 = ggml_conv_transpose_1d(ctx, k, t, 2 /* s0 */, 0 /* p0 */, 1 /* d0 */);
         struct ggml_tensor * out_3 = ggml_conv_transpose_1d(ctx, k, t, 3 /* s0 */, 0 /* p0 */, 1 /* d0 */);
 
-        struct ggml_cgraph gf_1 = ggml_build_forward(out_1);
-        struct ggml_cgraph gf_2 = ggml_build_forward(out_2);
-        struct ggml_cgraph gf_3 = ggml_build_forward(out_3);
+        struct ggml_cgraph * gf_1 = ggml_new_graph(ctx);
+        struct ggml_cgraph * gf_2 = ggml_new_graph(ctx);
+        struct ggml_cgraph * gf_3 = ggml_new_graph(ctx);
+
+        ggml_build_forward_expand(gf_1, out_1);
+        ggml_build_forward_expand(gf_2, out_2);
+        ggml_build_forward_expand(gf_3, out_3);
 
-        ggml_graph_compute_with_ctx(ctx, &gf_1, 1);
-        ggml_graph_compute_with_ctx(ctx, &gf_2, 1);
-        ggml_graph_compute_with_ctx(ctx, &gf_3, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_1, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_2, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_3, 1);
 
         check_tensor(out_1, (float*)expected_out_1, 4, 3, 1);
         check_tensor(out_2, (float*)expected_out_2, 6, 3, 1);
@@ -203,13 +210,17 @@ void test_conv_transpose_2d(void) {
         struct ggml_tensor * out_2 = ggml_conv_transpose_2d_p0(ctx, k, t, 2);
         struct ggml_tensor * out_3 = ggml_conv_transpose_2d_p0(ctx, k, t, 3);
 
-        struct ggml_cgraph gf_1 = ggml_build_forward(out_1);
-        struct ggml_cgraph gf_2 = ggml_build_forward(out_2);
-        struct ggml_cgraph gf_3 = ggml_build_forward(out_3);
+        struct ggml_cgraph * gf_1 = ggml_new_graph(ctx);
+        struct ggml_cgraph * gf_2 = ggml_new_graph(ctx);
+        struct ggml_cgraph * gf_3 = ggml_new_graph(ctx);
+
+        ggml_build_forward_expand(gf_1, out_1);
+        ggml_build_forward_expand(gf_2, out_2);
+        ggml_build_forward_expand(gf_3, out_3);
 
-        ggml_graph_compute_with_ctx(ctx, &gf_1, 1);
-        ggml_graph_compute_with_ctx(ctx, &gf_2, 1);
-        ggml_graph_compute_with_ctx(ctx, &gf_3, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_1, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_2, 1);
+        ggml_graph_compute_with_ctx(ctx, gf_3, 1);
 
         // printf("in\n");
         // printf_tensor(t);

diff --git a/tests/test-customop.c b/tests/test-customop.c
@@ -150,9 +150,10 @@ int main(int argc, const char** argv) {
 
         struct ggml_tensor * m1 = ggml_map_custom1(ctx, t, custom1, 2, NULL);
 
-        struct ggml_cgraph graph = ggml_build_forward(m1);
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, m1);
 
-        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
 
         const float * output = ggml_get_data_f32(m1);
 
@@ -175,9 +176,10 @@ int main(int argc, const char** argv) {
 
         struct ggml_tensor * m2 = ggml_map_custom2(ctx, t1, t2, custom2, GGML_N_TASKS_MAX, g_userdata);
 
-        struct ggml_cgraph graph = ggml_build_forward(m2);
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, m2);
 
-        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
 
         const float * output = ggml_get_data_f32(m2);
 
@@ -203,9 +205,10 @@ int main(int argc, const char** argv) {
 
         struct ggml_tensor * m3 = ggml_map_custom3(ctx, t1, t2, t3, custom3, 1, g_userdata);
 
-        struct ggml_cgraph graph = ggml_build_forward(m3);
+        struct ggml_cgraph * graph = ggml_new_graph(ctx);
+        ggml_build_forward_expand(graph, m3);
 
-        ggml_graph_compute_with_ctx(ctx, &graph, 4);
+        ggml_graph_compute_with_ctx(ctx, graph, 4);
 
         const float * output = ggml_get_data_f32(m3);
 

diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
@@ -231,9 +231,10 @@ static bool check_gradient(
         printf("GGML_N_THREADS = %d\n", n_threads);
     }
 
-    struct ggml_cgraph * gf = ggml_build_forward_ctx(ctx0, f);
-    struct ggml_cgraph * gb = ggml_new_graph(ctx0);
-    *gb = *gf;
+    struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
+    struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
+    ggml_build_forward_expand(gf, f);
+    ggml_graph_cpy(gf, gb);
     ggml_build_backward_expand(ctx0, gf, gb, false);
 
     ggml_graph_compute_with_ctx(ctx0, gf, n_threads);