Skip to content

Commit

Permalink
update more examples
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Oct 21, 2023
1 parent 39e63fa commit 96e56b4
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 57 deletions.
10 changes: 5 additions & 5 deletions examples/dolly-v2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ bool dollyv2_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = { };
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
Expand Down Expand Up @@ -555,8 +555,8 @@ bool dollyv2_eval(
( n_ctx)*ggml_element_size(model.memory_v),
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -666,8 +666,8 @@ bool dollyv2_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down
16 changes: 8 additions & 8 deletions examples/gpt-2/main-ctx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ bool gpt2_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
Expand Down Expand Up @@ -491,8 +491,8 @@ bool gpt2_eval(
struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -673,8 +673,8 @@ bool gpt2_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down Expand Up @@ -767,7 +767,7 @@ int main(int argc, char ** argv) {
size_t mem_per_token = 0;
gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);

for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
// predict
if (embd.size() > 0) {
const int64_t t_start_us = ggml_time_us();
Expand Down Expand Up @@ -805,9 +805,9 @@ int main(int argc, char ** argv) {
embd.push_back(id);
} else {
// if here, it means we are still processing the input prompt
for (int k = i; k < embd_inp.size(); k++) {
for (size_t k = i; k < embd_inp.size(); k++) {
embd.push_back(embd_inp[k]);
if (embd.size() >= params.n_batch) {
if (int32_t(embd.size()) >= params.n_batch) {
break;
}
}
Expand Down
10 changes: 5 additions & 5 deletions examples/gpt-j/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ bool gptj_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
Expand Down Expand Up @@ -471,8 +471,8 @@ bool gptj_eval(
( n_ctx)*ggml_element_size(model.memory_v),
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -590,8 +590,8 @@ bool gptj_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down
10 changes: 5 additions & 5 deletions examples/gpt-neox/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ bool gpt_neox_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
Expand Down Expand Up @@ -537,8 +537,8 @@ bool gpt_neox_eval(
( n_ctx)*ggml_element_size(model.memory_v),
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -653,8 +653,8 @@ bool gpt_neox_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down
10 changes: 5 additions & 5 deletions examples/mnist/main-cnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ int mnist_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * input = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, 28, 28, 1, 1);
memcpy(input->data, digit.data(), ggml_nbytes(input));
Expand All @@ -86,16 +86,16 @@ int mnist_eval(
ggml_tensor * probs = ggml_soft_max(ctx0, cur);
ggml_set_name(probs, "probs");

ggml_build_forward_expand(&gf, probs);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, probs);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//ggml_graph_print(&gf);
ggml_graph_dump_dot(&gf, NULL, "mnist-cnn.dot");
ggml_graph_dump_dot(gf, NULL, "mnist-cnn.dot");

if (fname_cgraph) {
// export the compute graph for later use
// see the "mnist-cpu" example
ggml_graph_export(&gf, fname_cgraph);
ggml_graph_export(gf, fname_cgraph);

fprintf(stderr, "%s: exported compute graph to '%s'\n", __func__, fname_cgraph);
}
Expand Down
8 changes: 4 additions & 4 deletions examples/mnist/main-mtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ int mnist_eval(
struct ggml_context * ctx_data = NULL;
struct ggml_context * ctx_eval = NULL;

struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
struct ggml_cgraph * gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);

// allocate work context
static size_t buf_size = 128ull*1024*1024; // TODO
Expand All @@ -50,12 +50,12 @@ int mnist_eval(
struct ggml_context * ctx_work = ggml_init(params);

// this allocates all Metal resources and memory buffers
auto ctx_mtl = mnist_mtl_init(ctx_data, ctx_eval, ctx_work, &gf);
auto ctx_mtl = mnist_mtl_init(ctx_data, ctx_eval, ctx_work, gf);

int prediction = -1;

for (int i = 0; i < 1; ++i) {
struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "input");
struct ggml_tensor * input = ggml_graph_get_tensor(gf, "input");

if (i % 2 == 0) {
memcpy(input->data, digit.data(), ggml_nbytes(input));
Expand All @@ -64,7 +64,7 @@ int mnist_eval(
}

// the actual inference happens here
prediction = mnist_mtl_eval(ctx_mtl, &gf);
prediction = mnist_mtl_eval(ctx_mtl, gf);
}

mnist_mtl_free(ctx_mtl);
Expand Down
10 changes: 5 additions & 5 deletions examples/mnist/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ int mnist_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * input = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hparams.n_input);
memcpy(input->data, digit.data(), ggml_nbytes(input));
Expand All @@ -203,16 +203,16 @@ int mnist_eval(
ggml_set_name(probs, "probs");

// build / export / run the computation graph
ggml_build_forward_expand(&gf, probs);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, probs);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//ggml_graph_print (&gf);
ggml_graph_dump_dot(&gf, NULL, "mnist.dot");
ggml_graph_dump_dot(gf, NULL, "mnist.dot");

if (fname_cgraph) {
// export the compute graph for later use
// see the "mnist-cpu" example
ggml_graph_export(&gf, "mnist.ggml");
ggml_graph_export(gf, "mnist.ggml");

fprintf(stderr, "%s: exported compute graph to '%s'\n", __func__, fname_cgraph);
}
Expand Down
10 changes: 5 additions & 5 deletions examples/mpt/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd));
Expand Down Expand Up @@ -544,8 +544,8 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
ggml_view_1d(ctx0, model.memory_v, N * n_embd,
(ggml_element_size(model.memory_v) * n_embd) * (il * n_ctx + n_past));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0,
Expand Down Expand Up @@ -650,8 +650,8 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
// inpL = ggml_soft_max(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

// std::cout << "Qcur" << std::endl;
// print_tensor(Qcur);
Expand Down
10 changes: 5 additions & 5 deletions examples/replit/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd));
Expand Down Expand Up @@ -515,8 +515,8 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
ggml_view_1d(ctx0, model.memory_v, N * n_embd,
(ggml_element_size(model.memory_v) * n_embd) * (il * n_ctx + n_past));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0,
Expand Down Expand Up @@ -614,8 +614,8 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
// inpL = ggml_soft_max(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

// std::cout << "Qcur" << std::endl;
// print_tensor(Qcur);
Expand Down
10 changes: 5 additions & 5 deletions examples/starcoder/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ bool starcoder_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
Expand Down Expand Up @@ -528,8 +528,8 @@ bool starcoder_eval(
struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -716,8 +716,8 @@ bool starcoder_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down
10 changes: 5 additions & 5 deletions examples/starcoder/starcoder-mmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ bool starcoder_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = {};
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);

Expand Down Expand Up @@ -739,8 +739,8 @@ bool starcoder_eval(
struct ggml_tensor * k = ggml_view_1d(ctx0, cache.k, N*n_embd, (ggml_element_size(cache.k)*n_embd)*(il*n_ctx + n_past));
struct ggml_tensor * v = ggml_view_1d(ctx0, cache.v, N*n_embd, (ggml_element_size(cache.v)*n_embd)*(il*n_ctx + n_past));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -927,8 +927,8 @@ bool starcoder_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down

0 comments on commit 96e56b4

Please sign in to comment.