Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Ollama keep_alive param to control how long models stay loaded #319

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ class ChatOllama extends BaseChatModel<ChatOllamaOptions> {
model: options?.model ?? defaultOptions.model ?? throwNullModelError(),
messages: messages.toMessages(),
format: options?.format?.toResponseFormat(),
keepAlive: options?.keepAlive,
stream: stream,
options: RequestOptions(
numKeep: options?.numKeep ?? defaultOptions.numKeep,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ChatOllamaOptions extends ChatModelOptions {
const ChatOllamaOptions({
this.model = 'llama2',
this.format,
this.keepAlive,
this.numKeep,
this.seed,
this.numPredict,
Expand Down Expand Up @@ -58,6 +59,14 @@ class ChatOllamaOptions extends ChatModelOptions {
/// Otherwise, the model may generate large amounts whitespace.
final OllamaResponseFormat? format;

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
final int? keepAlive;

/// Number of tokens to keep from the prompt.
/// (Default: 0)
final int? numKeep;
Expand Down
9 changes: 9 additions & 0 deletions packages/langchain_ollama/lib/src/llms/models/models.dart
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class OllamaOptions extends LLMOptions {
this.context,
this.format,
this.raw,
this.keepAlive,
this.numKeep,
this.seed,
this.numPredict,
Expand Down Expand Up @@ -80,6 +81,14 @@ class OllamaOptions extends LLMOptions {
/// yourself.
final bool? raw;

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
final int? keepAlive;

/// Number of tokens to keep from the prompt.
/// (Default: 0)
final int? numKeep;
Expand Down
1 change: 1 addition & 0 deletions packages/langchain_ollama/lib/src/llms/ollama.dart
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class Ollama extends BaseLLM<OllamaOptions> {
context: options?.context,
format: options?.format?.toResponseFormat(),
raw: options?.raw,
keepAlive: options?.keepAlive,
stream: stream,
options: RequestOptions(
numKeep: options?.numKeep ?? defaultOptions.numKeep,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {

/// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
@Default(false) bool stream,

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
@JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
}) = _GenerateChatCompletionRequest;

/// Object construction from a JSON representation
Expand All @@ -51,7 +59,8 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
'messages',
'format',
'options',
'stream'
'stream',
'keep_alive'
];

/// Perform validations on the schema property values
Expand All @@ -67,6 +76,7 @@ class GenerateChatCompletionRequest with _$GenerateChatCompletionRequest {
'format': format,
'options': options,
'stream': stream,
'keep_alive': keepAlive,
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {

/// If `false` the response will be returned as a single response object, otherwise the response will be streamed as a series of objects.
@Default(false) bool stream,

/// How long (in minutes) to keep the model loaded in memory.
///
/// - If set to a positive duration (e.g. 20), the model will stay loaded for the provided duration.
/// - If set to a negative duration (e.g. -1), the model will stay loaded indefinitely.
/// - If set to 0, the model will be unloaded immediately once finished.
/// - If not set, the model will stay loaded for 5 minutes by default
@JsonKey(name: 'keep_alive', includeIfNull: false) int? keepAlive,
}) = _GenerateCompletionRequest;

/// Object construction from a JSON representation
Expand All @@ -73,7 +81,8 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
'options',
'format',
'raw',
'stream'
'stream',
'keep_alive'
];

/// Perform validations on the schema property values
Expand All @@ -94,6 +103,7 @@ class GenerateCompletionRequest with _$GenerateCompletionRequest {
'format': format,
'raw': raw,
'stream': stream,
'keep_alive': keepAlive,
};
}
}
Loading
Loading