Skip to content

Commit

Permalink
update examples and tests, fix issues
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Oct 21, 2023
1 parent 772a124 commit 39e63fa
Show file tree
Hide file tree
Showing 19 changed files with 204 additions and 147 deletions.
2 changes: 1 addition & 1 deletion examples/gpt-2/main-alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ struct ggml_cgraph * gpt2_graph(
const int n_head = hparams.n_head;

// since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);

struct ggml_init_params params = {
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/main-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ struct ggml_cgraph * gpt2_graph(
const int n_head = hparams.n_head;

// since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);

struct ggml_init_params params = {
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/main-batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ struct ggml_cgraph * gpt2_graph(
const int32_t kv_head = ggml_allocr_is_measure(allocr) ? n_ctx - n_tokens : kv_cache.head;

// since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data
static size_t buf_size = ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead();
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
static std::vector<uint8_t> buf(buf_size);

struct ggml_init_params params = {
Expand Down
10 changes: 5 additions & 5 deletions examples/mnist/main-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ int mnist_eval(
struct ggml_context * ctx_data = NULL;
struct ggml_context * ctx_eval = NULL;

struct ggml_cgraph gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
struct ggml_cgraph * gfi = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);

// param export/import test
GGML_ASSERT(ggml_graph_get_tensor(&gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef));
GGML_ASSERT(ggml_graph_get_tensor(gfi, "fc1_bias")->op_params[0] == int(0xdeadbeef));

// allocate work context
// needed during ggml_graph_compute() to allocate a work tensor
Expand All @@ -57,12 +57,12 @@ int mnist_eval(

struct ggml_context * ctx_work = ggml_init(params);

struct ggml_tensor * input = ggml_graph_get_tensor(&gfi, "input");
struct ggml_tensor * input = ggml_graph_get_tensor(gfi, "input");
memcpy(input->data, digit.data(), ggml_nbytes(input));

ggml_graph_compute_with_ctx(ctx_work, &gfi, n_threads);
ggml_graph_compute_with_ctx(ctx_work, gfi, n_threads);

const float * probs_data = ggml_get_data_f32(ggml_graph_get_tensor(&gfi, "probs"));
const float * probs_data = ggml_get_data_f32(ggml_graph_get_tensor(gfi, "probs"));

const int prediction = std::max_element(probs_data, probs_data + 10) - probs_data;

Expand Down
4 changes: 2 additions & 2 deletions examples/sam/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2109,7 +2109,7 @@ int main(int argc, char ** argv) {

static const size_t tensor_alignment = 32;
{
state.buf_compute_img_enc.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
state.buf_compute_img_enc.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());
state.allocr = ggml_allocr_new_measure(tensor_alignment);
struct ggml_cgraph * gf_measure = sam_encode_image(model, state, img1);
if (!gf_measure) {
Expand Down Expand Up @@ -2144,7 +2144,7 @@ int main(int argc, char ** argv) {
state.work_buffer.clear();
}
{
state.buf_compute_fast.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
state.buf_compute_fast.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());
state.allocr = ggml_allocr_new_measure(tensor_alignment);

// TODO: user input
Expand Down
12 changes: 7 additions & 5 deletions examples/whisper/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ static void whisper_allocr_graph_init(struct whisper_allocr & allocr, std::funct
auto & meta = allocr.meta;
auto & data = allocr.data;

meta.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
meta.resize(ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead());

alloc = ggml_allocr_new_measure(tensor_alignment);

Expand Down Expand Up @@ -5413,7 +5413,7 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
// b: N*N*sizeof(float)
// c: N*N*sizeof(float)
// when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
std::vector<uint8_t> buf(3llu*N_max*N_max*sizeof(float) + 3*ggml_tensor_overhead());
std::vector<uint8_t> buf(3llu*N_max*N_max*sizeof(float) + 3*ggml_tensor_overhead() + ggml_graph_overhead());
std::vector<uint8_t> work;

// put a bunch of random data in the buffer
Expand Down Expand Up @@ -5464,17 +5464,19 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {

struct ggml_tensor * c = ggml_mul_mat(ctx0, a, b);

struct ggml_cgraph gf = ggml_build_forward(c);
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

ggml_build_forward_expand(gf, c);

double tsum = 0.0;

// heat-up
ggml_graph_compute_helper(work, &gf, n_threads);
ggml_graph_compute_helper(work, gf, n_threads);

for (int i = 0; i < n_max; ++i) {
const int64_t t0 = ggml_time_us();

ggml_graph_compute_helper(work, &gf, n_threads);
ggml_graph_compute_helper(work, gf, n_threads);

const int64_t t1 = ggml_time_us();

Expand Down
3 changes: 2 additions & 1 deletion include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
// {
// ...
//
// struct ggml_cgraph gf = ggml_build_forward(f);
// struct ggml_cgraph * gf = ggml_new_graph(ctx);
// ggml_build_forward_expand(gf, f);
//
// // set the input variable and parameter values
// ggml_set_f32(x, 2.0f);
Expand Down
2 changes: 1 addition & 1 deletion src/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

#define UNUSED(x) (void)(x)

#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
#define GGML_MAX_CONCUR (2*GGML_DEFAULT_GRAPH_SIZE)

struct ggml_metal_buffer {
const char * name;
Expand Down
29 changes: 17 additions & 12 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -17376,7 +17376,7 @@ void ggml_build_backward_gradient_checkpointing(
ggml_build_backward_expand(ctx, gf, gb_tmp, true);

if (n_checkpoints <= 0) {
*gb = *gb_tmp;
ggml_graph_cpy(gb_tmp, gb);
return;
}

Expand All @@ -17391,7 +17391,7 @@ void ggml_build_backward_gradient_checkpointing(
replacements->vals[k] = checkpoints[i];
}

*gb = *gf;
ggml_graph_cpy(gf, gb);
// rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
// replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
// by recomputing them from checkpoints
Expand Down Expand Up @@ -18402,12 +18402,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;

memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));

// check that we allocated the correct amount of memory
assert(obj_size == (size_t) (
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));

memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));

*cgraph = (struct ggml_cgraph) {
/*.size =*/ size,
/*.n_nodes =*/ 0,
Expand Down Expand Up @@ -18489,6 +18489,8 @@ struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgrap
}

void ggml_graph_reset(struct ggml_cgraph * cgraph) {
GGML_ASSERT(cgraph->grads != NULL);

for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * grad = cgraph->grads[i];

Expand Down Expand Up @@ -18689,14 +18691,16 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_RELU:
n_tasks = 1;
break;
{
n_tasks = 1;
} break;

case GGML_UNARY_OP_GELU:
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_SILU:
n_tasks = n_threads;
break;
{
n_tasks = n_threads;
} break;
}
break;
case GGML_OP_SILU_BACK:
Expand Down Expand Up @@ -18921,7 +18925,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {

if (node_n != -1) {
/* FINALIZE */
struct ggml_tensor * node = state->shared->cgraph->nodes[node_n];
struct ggml_tensor * node = cgraph->nodes[node_n];
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = ggml_get_n_tasks(node, n_threads);
ggml_compute_forward(&params, node);
Expand Down Expand Up @@ -19242,6 +19246,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
default:
break;
}

work_size = MAX(work_size, cur);
}

Expand Down Expand Up @@ -20910,10 +20915,10 @@ enum ggml_opt_result ggml_opt_resume(
struct ggml_tensor * f) {

// build forward + backward compute graphs
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, false);
struct ggml_cgraph * gb = ggml_new_graph_custom(ctx, opt->params.graph_size, true);

struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
ggml_build_forward_expand(gf, f);

struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
ggml_build_backward_expand(ctx, gf, gb, true);

return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
Expand Down
10 changes: 6 additions & 4 deletions tests/test-blas0.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,17 @@ int main(int argc, const char ** argv) {
{
dst2 = ggml_mul_mat(ctx0, s0_f32, s1_f32);

struct ggml_cgraph gf = ggml_build_forward(dst2);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
ggml_build_forward_expand(gf, dst2);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
}

{
dst3 = ggml_mul_mat(ctx0, s0_f16, s1_f32);

struct ggml_cgraph gf = ggml_build_forward(dst3);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
ggml_build_forward_expand(gf, dst3);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
}

bool ok_blas = true;
Expand Down
35 changes: 23 additions & 12 deletions tests/test-conv-transpose.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ void check_tensor(struct ggml_tensor * t, float * expected_t_d, int ne0, int ne1
for (int i0 = 0; i0 < ne0; ++i0) {
float expected = *(expected_t_d + i2 * ne1 * ne0 + i1 * ne0 + i0);
float actual = ggml_get_data_f32(t)[i2 * ne1 * ne0 + i1 * ne0 + i0];
if (expected != actual) {
printf("expected %.1f, got %.1f\n", expected, actual);
}
GGML_ASSERT(expected == actual);
}
}
Expand Down Expand Up @@ -100,13 +103,17 @@ void test_conv_transpose_1d(void) {
struct ggml_tensor * out_2 = ggml_conv_transpose_1d(ctx, k, t, 2 /* s0 */, 0 /* p0 */, 1 /* d0 */);
struct ggml_tensor * out_3 = ggml_conv_transpose_1d(ctx, k, t, 3 /* s0 */, 0 /* p0 */, 1 /* d0 */);

struct ggml_cgraph gf_1 = ggml_build_forward(out_1);
struct ggml_cgraph gf_2 = ggml_build_forward(out_2);
struct ggml_cgraph gf_3 = ggml_build_forward(out_3);
struct ggml_cgraph * gf_1 = ggml_new_graph(ctx);
struct ggml_cgraph * gf_2 = ggml_new_graph(ctx);
struct ggml_cgraph * gf_3 = ggml_new_graph(ctx);

ggml_build_forward_expand(gf_1, out_1);
ggml_build_forward_expand(gf_2, out_2);
ggml_build_forward_expand(gf_3, out_3);

ggml_graph_compute_with_ctx(ctx, &gf_1, 1);
ggml_graph_compute_with_ctx(ctx, &gf_2, 1);
ggml_graph_compute_with_ctx(ctx, &gf_3, 1);
ggml_graph_compute_with_ctx(ctx, gf_1, 1);
ggml_graph_compute_with_ctx(ctx, gf_2, 1);
ggml_graph_compute_with_ctx(ctx, gf_3, 1);

check_tensor(out_1, (float*)expected_out_1, 4, 3, 1);
check_tensor(out_2, (float*)expected_out_2, 6, 3, 1);
Expand Down Expand Up @@ -203,13 +210,17 @@ void test_conv_transpose_2d(void) {
struct ggml_tensor * out_2 = ggml_conv_transpose_2d_p0(ctx, k, t, 2);
struct ggml_tensor * out_3 = ggml_conv_transpose_2d_p0(ctx, k, t, 3);

struct ggml_cgraph gf_1 = ggml_build_forward(out_1);
struct ggml_cgraph gf_2 = ggml_build_forward(out_2);
struct ggml_cgraph gf_3 = ggml_build_forward(out_3);
struct ggml_cgraph * gf_1 = ggml_new_graph(ctx);
struct ggml_cgraph * gf_2 = ggml_new_graph(ctx);
struct ggml_cgraph * gf_3 = ggml_new_graph(ctx);

ggml_build_forward_expand(gf_1, out_1);
ggml_build_forward_expand(gf_2, out_2);
ggml_build_forward_expand(gf_3, out_3);

ggml_graph_compute_with_ctx(ctx, &gf_1, 1);
ggml_graph_compute_with_ctx(ctx, &gf_2, 1);
ggml_graph_compute_with_ctx(ctx, &gf_3, 1);
ggml_graph_compute_with_ctx(ctx, gf_1, 1);
ggml_graph_compute_with_ctx(ctx, gf_2, 1);
ggml_graph_compute_with_ctx(ctx, gf_3, 1);

// printf("in\n");
// printf_tensor(t);
Expand Down
15 changes: 9 additions & 6 deletions tests/test-customop.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,10 @@ int main(int argc, const char** argv) {

struct ggml_tensor * m1 = ggml_map_custom1(ctx, t, custom1, 2, NULL);

struct ggml_cgraph graph = ggml_build_forward(m1);
struct ggml_cgraph * graph = ggml_new_graph(ctx);
ggml_build_forward_expand(graph, m1);

ggml_graph_compute_with_ctx(ctx, &graph, 4);
ggml_graph_compute_with_ctx(ctx, graph, 4);

const float * output = ggml_get_data_f32(m1);

Expand All @@ -175,9 +176,10 @@ int main(int argc, const char** argv) {

struct ggml_tensor * m2 = ggml_map_custom2(ctx, t1, t2, custom2, GGML_N_TASKS_MAX, g_userdata);

struct ggml_cgraph graph = ggml_build_forward(m2);
struct ggml_cgraph * graph = ggml_new_graph(ctx);
ggml_build_forward_expand(graph, m2);

ggml_graph_compute_with_ctx(ctx, &graph, 4);
ggml_graph_compute_with_ctx(ctx, graph, 4);

const float * output = ggml_get_data_f32(m2);

Expand All @@ -203,9 +205,10 @@ int main(int argc, const char** argv) {

struct ggml_tensor * m3 = ggml_map_custom3(ctx, t1, t2, t3, custom3, 1, g_userdata);

struct ggml_cgraph graph = ggml_build_forward(m3);
struct ggml_cgraph * graph = ggml_new_graph(ctx);
ggml_build_forward_expand(graph, m3);

ggml_graph_compute_with_ctx(ctx, &graph, 4);
ggml_graph_compute_with_ctx(ctx, graph, 4);

const float * output = ggml_get_data_f32(m3);

Expand Down
7 changes: 4 additions & 3 deletions tests/test-grad0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,10 @@ static bool check_gradient(
printf("GGML_N_THREADS = %d\n", n_threads);
}

struct ggml_cgraph * gf = ggml_build_forward_ctx(ctx0, f);
struct ggml_cgraph * gb = ggml_new_graph(ctx0);
*gb = *gf;
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
ggml_build_forward_expand(gf, f);
ggml_graph_cpy(gf, gb);
ggml_build_backward_expand(ctx0, gf, gb, false);

ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
Expand Down
Loading

0 comments on commit 39e63fa

Please sign in to comment.