Skip to content

Commit

Permalink
llm: normalise kvct parameter handling (ollama#7926)
Browse files Browse the repository at this point in the history
  • Loading branch information
sammcj authored Dec 4, 2024
1 parent 1bdab9f commit 539be43
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion llm/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,

var kvct string
if fa {
requested := envconfig.KvCacheType()
requested := strings.ToLower(envconfig.KvCacheType())
if requested != "" && ggml.SupportsKVCacheType(requested) {
kvct = requested
}
Expand Down
2 changes: 1 addition & 1 deletion llm/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
fa = false
}

kvct := envconfig.KvCacheType()
kvct := strings.ToLower(envconfig.KvCacheType())

if fa {
slog.Info("enabling flash attention")
Expand Down

0 comments on commit 539be43

Please sign in to comment.