Skip to content

Commit

Permalink
call llama.cpp directly from go
Browse files Browse the repository at this point in the history
  • Loading branch information
mxyng committed Jul 11, 2023
1 parent a3ec1ec commit fd4792e
Show file tree
Hide file tree
Showing 16 changed files with 458 additions and 1,287 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,16 @@ dist
__pycache__
ollama
ggml-metal.metal

# cmake gitignore
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake
_deps
43 changes: 43 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
cmake_minimum_required(VERSION 3.12)
project(ollama)

include(FetchContent)

FetchContent_Declare(
"llama.cpp"
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
GIT_TAG 55dbb91
)

FetchContent_MakeAvailable(llama.cpp)

add_custom_target(
ollama
ALL
DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal
COMMAND
${CMAKE_COMMAND} -E
env
CGO_CPPFLAGS='-I${llama.cpp_SOURCE_DIR}'
CGO_LDFLAGS='-L${llama.cpp_BINARY_DIR} -lllama -lggml_static -lm -lstdc++'
CGO_CXXFLAGS='-std=c++11'
--
go build .
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)

add_custom_command(
OUTPUT
${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal
COMMAND
${CMAKE_COMMAND} -E
copy_if_different
${llama.cpp_SOURCE_DIR}/ggml-metal.metal
${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal
WORKING_DIRECTORY
${CMAKE_CURRENT_SOURCE_DIR}
)

add_dependencies(ollama llama ggml_static)
19 changes: 0 additions & 19 deletions Makefile

This file was deleted.

147 changes: 66 additions & 81 deletions api/types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package api

import "runtime"

type PullRequest struct {
Model string `json:"model"`
}
Expand All @@ -14,93 +16,76 @@ type GenerateRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`

ModelOptions *ModelOptions `json:"model_opts,omitempty"`
PredictOptions *PredictOptions `json:"predict_opts,omitempty"`
Options `json:"options"`
}

type ModelOptions struct {
ContextSize int `json:"context_size,omitempty"`
Seed int `json:"seed,omitempty"`
NBatch int `json:"n_batch,omitempty"`
F16Memory bool `json:"memory_f16,omitempty"`
MLock bool `json:"mlock,omitempty"`
MMap bool `json:"mmap,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
Embeddings bool `json:"embeddings,omitempty"`
NUMA bool `json:"numa,omitempty"`
NGPULayers int `json:"gpu_layers,omitempty"`
MainGPU string `json:"main_gpu,omitempty"`
TensorSplit string `json:"tensor_split,omitempty"`
type GenerateResponse struct {
Response string `json:"response"`
}

type PredictOptions struct {
Seed int `json:"seed,omitempty"`
Threads int `json:"threads,omitempty"`
Tokens int `json:"tokens,omitempty"`
TopK int `json:"top_k,omitempty"`
Repeat int `json:"repeat,omitempty"`
Batch int `json:"batch,omitempty"`
NKeep int `json:"nkeep,omitempty"`
TopP float64 `json:"top_p,omitempty"`
Temperature float64 `json:"temp,omitempty"`
Penalty float64 `json:"penalty,omitempty"`
F16KV bool
DebugMode bool
StopPrompts []string
IgnoreEOS bool `json:"ignore_eos,omitempty"`

TailFreeSamplingZ float64 `json:"tfs_z,omitempty"`
TypicalP float64 `json:"typical_p,omitempty"`
FrequencyPenalty float64 `json:"freq_penalty,omitempty"`
PresencePenalty float64 `json:"pres_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatETA float64 `json:"mirostat_lr,omitempty"`
MirostatTAU float64 `json:"mirostat_ent,omitempty"`
PenalizeNL bool `json:"penalize_nl,omitempty"`
LogitBias string `json:"logit_bias,omitempty"`

PathPromptCache string
MLock bool `json:"mlock,omitempty"`
MMap bool `json:"mmap,omitempty"`
PromptCacheAll bool
PromptCacheRO bool
MainGPU string
TensorSplit string
}
type Options struct {
Seed int `json:"seed,omitempty"`

var DefaultModelOptions ModelOptions = ModelOptions{
ContextSize: 512,
Seed: 0,
F16Memory: true,
MLock: false,
Embeddings: true,
MMap: true,
LowVRAM: false,
}
// Backend options
UseNUMA bool `json:"numa,omitempty"`

// Model options
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
EmbeddingOnly bool `json:"embedding_only,omitempty"`

// Predict options
RepeatLastN int `json:"repeat_last_n,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
TFSZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`

var DefaultPredictOptions PredictOptions = PredictOptions{
Seed: -1,
Threads: -1,
Tokens: 512,
Penalty: 1.1,
Repeat: 64,
Batch: 512,
NKeep: 64,
TopK: 90,
TopP: 0.86,
TailFreeSamplingZ: 1.0,
TypicalP: 1.0,
Temperature: 0.8,
FrequencyPenalty: 0.0,
PresencePenalty: 0.0,
Mirostat: 0,
MirostatTAU: 5.0,
MirostatETA: 0.1,
MMap: true,
StopPrompts: []string{"llama"},
NumThread int `json:"num_thread,omitempty"`
}

type GenerateResponse struct {
Response string `json:"response"`
func DefaultOptions() Options {
return Options{
Seed: -1,

UseNUMA: false,

NumCtx: 512,
NumBatch: 512,
NumGPU: 1,
LowVRAM: false,
F16KV: true,
UseMMap: true,
UseMLock: false,

RepeatLastN: 512,
RepeatPenalty: 1.1,
FrequencyPenalty: 0.0,
PresencePenalty: 0.0,
Temperature: 0.8,
TopK: 40,
TopP: 0.9,
TFSZ: 1.0,
TypicalP: 1.0,
Mirostat: 0,
MirostatTau: 5.0,
MirostatEta: 0.1,

NumThread: runtime.NumCPU(),
}
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.10.0 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/sync v0.3.0
golang.org/x/sys v0.10.0 // indirect
golang.org/x/term v0.10.0
golang.org/x/text v0.10.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
23 changes: 0 additions & 23 deletions llama/CMakeLists.txt

This file was deleted.

Loading

0 comments on commit fd4792e

Please sign in to comment.