diff --git a/packages/langchain_anthropic/lib/src/chat_models/chat_anthropic.dart b/packages/langchain_anthropic/lib/src/chat_models/chat_anthropic.dart index 13a687a3..1c8360d4 100644 --- a/packages/langchain_anthropic/lib/src/chat_models/chat_anthropic.dart +++ b/packages/langchain_anthropic/lib/src/chat_models/chat_anthropic.dart @@ -154,7 +154,8 @@ class ChatAnthropic extends BaseChatModel { final Map? queryParams, final http.Client? client, super.defaultOptions = const ChatAnthropicOptions( - model: 'claude-3-5-sonnet-20240620', + model: defaultModel, + maxTokens: defaultMaxTokens, ), this.encoding = 'cl100k_base', }) : _client = a.AnthropicClient( @@ -177,6 +178,12 @@ class ChatAnthropic extends BaseChatModel { @override String get modelType => 'anthropic-chat'; + /// The default model to use unless another is specified. + static const defaultModel = 'claude-3-5-sonnet-20240620'; + + /// The default max tokens to use unless another is specified. + static const defaultMaxTokens = 1024; + @override Future invoke( final PromptValue input, { @@ -187,7 +194,6 @@ class ChatAnthropic extends BaseChatModel { input.toChatMessages(), options: options, defaultOptions: defaultOptions, - throwNullModelError: throwNullModelError, ), ); return completion.toChatResult(); @@ -205,7 +211,6 @@ class ChatAnthropic extends BaseChatModel { options: options, defaultOptions: defaultOptions, stream: true, - throwNullModelError: throwNullModelError, ), ) .transform(MessageStreamEventTransformer()); diff --git a/packages/langchain_anthropic/lib/src/chat_models/mappers.dart b/packages/langchain_anthropic/lib/src/chat_models/mappers.dart index 002df82c..020ef844 100644 --- a/packages/langchain_anthropic/lib/src/chat_models/mappers.dart +++ b/packages/langchain_anthropic/lib/src/chat_models/mappers.dart @@ -9,6 +9,7 @@ import 'package:langchain_core/language_models.dart'; import 'package:langchain_core/tools.dart'; import 'package:rxdart/rxdart.dart' show WhereNotNullExtension; +import 'chat_anthropic.dart'; import 'types.dart'; /// Creates a [CreateMessageRequest] from the given input. @@ -17,7 +18,6 @@ a.CreateMessageRequest createMessageRequest( required final ChatAnthropicOptions? options, required final ChatAnthropicOptions defaultOptions, final bool stream = false, - required Never Function() throwNullModelError, }) { final systemMsg = messages.firstOrNull is SystemChatMessage ? messages.firstOrNull?.contentAsString @@ -31,10 +31,12 @@ a.CreateMessageRequest createMessageRequest( return a.CreateMessageRequest( model: a.Model.modelId( - options?.model ?? defaultOptions.model ?? throwNullModelError(), + options?.model ?? defaultOptions.model ?? ChatAnthropic.defaultModel, ), messages: messagesDtos, - maxTokens: options?.maxTokens ?? defaultOptions.maxTokens ?? 1024, + maxTokens: options?.maxTokens ?? + defaultOptions.maxTokens ?? + ChatAnthropic.defaultMaxTokens, stopSequences: options?.stopSequences ?? defaultOptions.stopSequences, system: systemMsg, temperature: options?.temperature ?? defaultOptions.temperature, diff --git a/packages/langchain_anthropic/lib/src/chat_models/types.dart b/packages/langchain_anthropic/lib/src/chat_models/types.dart index 4374c820..98069444 100644 --- a/packages/langchain_anthropic/lib/src/chat_models/types.dart +++ b/packages/langchain_anthropic/lib/src/chat_models/types.dart @@ -1,14 +1,28 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; import 'package:langchain_core/tools.dart'; +import 'package:meta/meta.dart'; /// {@template chat_anthropic_options} /// Options to pass into the Anthropic Chat Model. +/// +/// Available models: +/// - `claude-3-5-sonnet-20240620` +/// - `claude-3-haiku-20240307` +/// - `claude-3-opus-20240229` +/// - `claude-3-sonnet-20240229` +/// - `claude-2.0` +/// - `claude-2.1` +/// +/// Mind that the list may be outdated. +/// See https://docs.anthropic.com/en/docs/about-claude/models for the latest list. /// {@endtemplate} +@immutable class ChatAnthropicOptions extends ChatModelOptions { /// {@macro chat_anthropic_options} const ChatAnthropicOptions({ - this.model = 'claude-3-5-sonnet-20240620', - this.maxTokens = 1024, + super.model, + this.maxTokens, this.stopSequences, this.temperature, this.topK, @@ -19,20 +33,6 @@ class ChatAnthropicOptions extends ChatModelOptions { super.concurrencyLimit, }); - /// ID of the model to use (e.g. 'claude-3-5-sonnet-20240620'). - /// - /// Available models: - /// - `claude-3-5-sonnet-20240620` - /// - `claude-3-haiku-20240307` - /// - `claude-3-opus-20240229` - /// - `claude-3-sonnet-20240229` - /// - `claude-2.0` - /// - `claude-2.1` - /// - /// Mind that the list may be outdated. - /// See https://docs.anthropic.com/en/docs/about-claude/models for the latest list. - final String? model; - /// The maximum number of tokens to generate before stopping. /// /// Note that our models may stop _before_ reaching this maximum. This parameter @@ -113,4 +113,33 @@ class ChatAnthropicOptions extends ChatModelOptions { concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatAnthropicOptions other) { + return model == other.model && + maxTokens == other.maxTokens && + const ListEquality() + .equals(stopSequences, other.stopSequences) && + temperature == other.temperature && + topK == other.topK && + topP == other.topP && + userId == other.userId && + const ListEquality().equals(tools, other.tools) && + toolChoice == other.toolChoice && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + maxTokens.hashCode ^ + const ListEquality().hash(stopSequences) ^ + temperature.hashCode ^ + topK.hashCode ^ + topP.hashCode ^ + userId.hashCode ^ + const ListEquality().hash(tools) ^ + toolChoice.hashCode ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_anthropic/pubspec.yaml b/packages/langchain_anthropic/pubspec.yaml index 33e625f7..180234ac 100644 --- a/packages/langchain_anthropic/pubspec.yaml +++ b/packages/langchain_anthropic/pubspec.yaml @@ -21,6 +21,7 @@ dependencies: http: ^1.1.0 langchain_core: 0.3.3 langchain_tiktoken: ^1.0.1 + meta: ^1.11.0 rxdart: ^0.27.7 dev_dependencies: diff --git a/packages/langchain_core/lib/src/chat_models/types.dart b/packages/langchain_core/lib/src/chat_models/types.dart index fa0bc0fc..e9b788c7 100644 --- a/packages/langchain_core/lib/src/chat_models/types.dart +++ b/packages/langchain_core/lib/src/chat_models/types.dart @@ -10,9 +10,10 @@ import '../tools/base.dart'; class ChatModelOptions extends LanguageModelOptions { /// {@macro chat_model_options} const ChatModelOptions({ - super.concurrencyLimit, + super.model, this.tools, this.toolChoice, + super.concurrencyLimit, }); /// A list of tools the model may call. diff --git a/packages/langchain_core/lib/src/language_models/base.dart b/packages/langchain_core/lib/src/language_models/base.dart index 33d3b002..3156cd74 100644 --- a/packages/langchain_core/lib/src/language_models/base.dart +++ b/packages/langchain_core/lib/src/language_models/base.dart @@ -1,5 +1,3 @@ -import 'package:meta/meta.dart'; - import '../langchain/base.dart'; import '../prompts/types.dart'; import 'types.dart'; @@ -58,33 +56,4 @@ abstract class BaseLanguageModel< @override String toString() => modelType; - - /// Throws an error if the model id is not specified. - @protected - Never throwNullModelError() { - throw ArgumentError(''' -Null model in $runtimeType. - -You need to specify the id of model to use either in `$runtimeType.defaultOptions` -or in the options passed when invoking the model. - -Example: -``` -// In defaultOptions -final model = $runtimeType( - defaultOptions: ${runtimeType}Options( - model: 'model-id', - ), -); - -// Or when invoking the model -final res = await model.invoke( - prompt, - options: ${runtimeType}Options( - model: 'model-id', - ), -); -``` -'''); - } } diff --git a/packages/langchain_core/lib/src/language_models/types.dart b/packages/langchain_core/lib/src/language_models/types.dart index f1475ad2..c2e6df11 100644 --- a/packages/langchain_core/lib/src/language_models/types.dart +++ b/packages/langchain_core/lib/src/language_models/types.dart @@ -10,8 +10,13 @@ import '../langchain/types.dart'; abstract class LanguageModelOptions extends BaseLangChainOptions { /// {@macro language_model_options} const LanguageModelOptions({ + this.model, super.concurrencyLimit, }); + + /// ID of the language model to use. + /// Check the provider's documentation for available models. + final String? model; } /// {@template language_model} diff --git a/packages/langchain_core/lib/src/llms/types.dart b/packages/langchain_core/lib/src/llms/types.dart index d6bed6f3..47b98285 100644 --- a/packages/langchain_core/lib/src/llms/types.dart +++ b/packages/langchain_core/lib/src/llms/types.dart @@ -9,6 +9,7 @@ import '../language_models/types.dart'; class LLMOptions extends LanguageModelOptions { /// {@macro llm_options} const LLMOptions({ + super.model, super.concurrencyLimit, }); } diff --git a/packages/langchain_firebase/lib/src/chat_models/vertex_ai/chat_firebase_vertex_ai.dart b/packages/langchain_firebase/lib/src/chat_models/vertex_ai/chat_firebase_vertex_ai.dart index 77ce67d6..20b2b520 100644 --- a/packages/langchain_firebase/lib/src/chat_models/vertex_ai/chat_firebase_vertex_ai.dart +++ b/packages/langchain_firebase/lib/src/chat_models/vertex_ai/chat_firebase_vertex_ai.dart @@ -154,7 +154,7 @@ class ChatFirebaseVertexAI extends BaseChatModel { /// - [ChatFirebaseVertexAI.location] ChatFirebaseVertexAI({ super.defaultOptions = const ChatFirebaseVertexAIOptions( - model: 'gemini-1.5-flash', + model: defaultModel, ), this.app, this.appCheck, @@ -188,15 +188,18 @@ class ChatFirebaseVertexAI extends BaseChatModel { /// A UUID generator. late final Uuid _uuid = const Uuid(); - @override - String get modelType => 'chat-firebase-vertex-ai'; - /// The current model set in [_firebaseClient]; String _currentModel; /// The current system instruction set in [_firebaseClient]; String? _currentSystemInstruction; + @override + String get modelType => 'chat-firebase-vertex-ai'; + + /// The default model to use unless another is specified. + static const defaultModel = 'gemini-1.5-flash'; + @override Future invoke( final PromptValue input, { @@ -329,8 +332,7 @@ class ChatFirebaseVertexAI extends BaseChatModel { final List messages, final ChatFirebaseVertexAIOptions? options, ) { - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final model = options?.model ?? defaultOptions.model ?? defaultModel; final systemInstruction = messages.firstOrNull is SystemChatMessage ? messages.firstOrNull?.contentAsString diff --git a/packages/langchain_firebase/lib/src/chat_models/vertex_ai/types.dart b/packages/langchain_firebase/lib/src/chat_models/vertex_ai/types.dart index d2aee55d..7c92e16c 100644 --- a/packages/langchain_firebase/lib/src/chat_models/vertex_ai/types.dart +++ b/packages/langchain_firebase/lib/src/chat_models/vertex_ai/types.dart @@ -1,12 +1,19 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; +import 'package:langchain_core/tools.dart'; +import 'package:meta/meta.dart'; /// {@template chat_firebase_vertex_ai_options} /// Options to pass into the Vertex AI for Firebase model. +/// +/// You can find a list of available models here: +/// https://firebase.google.com/docs/vertex-ai/gemini-models /// {@endtemplate} +@immutable class ChatFirebaseVertexAIOptions extends ChatModelOptions { /// {@macro chat_firebase_vertex_ai_options} const ChatFirebaseVertexAIOptions({ - this.model = 'gemini-1.5-flash', + super.model, this.topP, this.topK, this.candidateCount, @@ -20,12 +27,6 @@ class ChatFirebaseVertexAIOptions extends ChatModelOptions { super.concurrencyLimit, }); - /// The LLM to use. - /// - /// You can find a list of available models here: - /// https://firebase.google.com/docs/vertex-ai/gemini-models - final String? model; - /// The maximum cumulative probability of tokens to consider when sampling. /// The model uses combined Top-k and nucleus sampling. Tokens are sorted /// based on their assigned probabilities so that only the most likely @@ -99,7 +100,11 @@ class ChatFirebaseVertexAIOptions extends ChatModelOptions { final int? maxOutputTokens, final double? temperature, final List? stopSequences, + final String? responseMimeType, final List? safetySettings, + final List? tools, + final ChatToolChoice? toolChoice, + final int? concurrencyLimit, }) { return ChatFirebaseVertexAIOptions( model: model ?? this.model, @@ -109,9 +114,48 @@ class ChatFirebaseVertexAIOptions extends ChatModelOptions { maxOutputTokens: maxOutputTokens ?? this.maxOutputTokens, temperature: temperature ?? this.temperature, stopSequences: stopSequences ?? this.stopSequences, + responseMimeType: responseMimeType ?? this.responseMimeType, safetySettings: safetySettings ?? this.safetySettings, + tools: tools ?? this.tools, + toolChoice: toolChoice ?? this.toolChoice, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatFirebaseVertexAIOptions other) { + return model == other.model && + topP == other.topP && + topK == other.topK && + candidateCount == other.candidateCount && + maxOutputTokens == other.maxOutputTokens && + temperature == other.temperature && + const ListEquality() + .equals(stopSequences, other.stopSequences) && + responseMimeType == other.responseMimeType && + const ListEquality() + .equals(safetySettings, other.safetySettings) && + const ListEquality().equals(tools, other.tools) && + toolChoice == other.toolChoice && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + topP.hashCode ^ + topK.hashCode ^ + candidateCount.hashCode ^ + maxOutputTokens.hashCode ^ + temperature.hashCode ^ + const ListEquality().hash(stopSequences) ^ + responseMimeType.hashCode ^ + const ListEquality() + .hash(safetySettings) ^ + const ListEquality().hash(tools) ^ + toolChoice.hashCode ^ + concurrencyLimit.hashCode; + } } /// {@template chat_google_generative_ai_safety_setting} @@ -119,6 +163,7 @@ class ChatFirebaseVertexAIOptions extends ChatModelOptions { /// Passing a safety setting for a category changes the allowed probability that /// content is blocked. /// {@endtemplate} +@immutable class ChatFirebaseVertexAISafetySetting { /// {@macro chat_google_generative_ai_safety_setting} const ChatFirebaseVertexAISafetySetting({ @@ -131,6 +176,28 @@ class ChatFirebaseVertexAISafetySetting { /// Controls the probability threshold at which harm is blocked. final ChatFirebaseVertexAISafetySettingThreshold threshold; + + /// Creates a copy of this [ChatFirebaseVertexAISafetySetting] object with + /// the given fields replaced with the new values. + ChatFirebaseVertexAISafetySetting copyWith({ + final ChatFirebaseVertexAISafetySettingCategory? category, + final ChatFirebaseVertexAISafetySettingThreshold? threshold, + }) { + return ChatFirebaseVertexAISafetySetting( + category: category ?? this.category, + threshold: threshold ?? this.threshold, + ); + } + + @override + bool operator ==(covariant final ChatFirebaseVertexAISafetySetting other) { + return category == other.category && threshold == other.threshold; + } + + @override + int get hashCode { + return category.hashCode ^ threshold.hashCode; + } } /// Safety settings categorizes. diff --git a/packages/langchain_google/lib/src/chat_models/google_ai/chat_google_generative_ai.dart b/packages/langchain_google/lib/src/chat_models/google_ai/chat_google_generative_ai.dart index 58934755..0fde4b9f 100644 --- a/packages/langchain_google/lib/src/chat_models/google_ai/chat_google_generative_ai.dart +++ b/packages/langchain_google/lib/src/chat_models/google_ai/chat_google_generative_ai.dart @@ -211,7 +211,7 @@ class ChatGoogleGenerativeAI final Map? queryParams, final http.Client? client, super.defaultOptions = const ChatGoogleGenerativeAIOptions( - model: 'gemini-1.5-flash', + model: defaultModel, ), }) : _currentModel = defaultOptions.model ?? '', _httpClient = createDefaultHttpClient( @@ -247,15 +247,18 @@ class ChatGoogleGenerativeAI /// Get the API key. String get apiKey => _httpClient.headers['x-goog-api-key'] ?? ''; - @override - String get modelType => 'chat-google-generative-ai'; - /// The current model set in [_googleAiClient]; String _currentModel; /// The current system instruction set in [_googleAiClient]; String? _currentSystemInstruction; + @override + String get modelType => 'chat-google-generative-ai'; + + /// The default model to use unless another is specified. + static const defaultModel = 'gemini-1.5-flash'; + @override Future invoke( final PromptValue input, { @@ -389,8 +392,7 @@ class ChatGoogleGenerativeAI final List messages, final ChatGoogleGenerativeAIOptions? options, ) { - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final model = options?.model ?? defaultOptions.model ?? defaultModel; final systemInstruction = messages.firstOrNull is SystemChatMessage ? messages.firstOrNull?.contentAsString diff --git a/packages/langchain_google/lib/src/chat_models/google_ai/types.dart b/packages/langchain_google/lib/src/chat_models/google_ai/types.dart index c86c80a5..8c4bff41 100644 --- a/packages/langchain_google/lib/src/chat_models/google_ai/types.dart +++ b/packages/langchain_google/lib/src/chat_models/google_ai/types.dart @@ -1,12 +1,17 @@ import 'package:langchain_core/chat_models.dart'; +import 'package:langchain_core/tools.dart'; +import 'package:meta/meta.dart'; /// {@template chat_google_generative_ai_options} /// Options to pass into the Google Generative AI Chat Model. +/// +/// You can find a list of available models [here](https://ai.google.dev/models). /// {@endtemplate} +@immutable class ChatGoogleGenerativeAIOptions extends ChatModelOptions { /// {@macro chat_google_generative_ai_options} const ChatGoogleGenerativeAIOptions({ - this.model = 'gemini-1.5-flash', + super.model, this.topP, this.topK, this.candidateCount, @@ -21,11 +26,6 @@ class ChatGoogleGenerativeAIOptions extends ChatModelOptions { super.concurrencyLimit, }); - /// The LLM to use. - /// - /// You can find a list of available models here: https://ai.google.dev/models - final String? model; - /// The maximum cumulative probability of tokens to consider when sampling. /// The model uses combined Top-k and nucleus sampling. Tokens are sorted /// based on their assigned probabilities so that only the most likely @@ -126,6 +126,9 @@ class ChatGoogleGenerativeAIOptions extends ChatModelOptions { final double? temperature, final List? stopSequences, final List? safetySettings, + final List? tools, + final ChatToolChoice? toolChoice, + final int? concurrencyLimit, }) { return ChatGoogleGenerativeAIOptions( model: model ?? this.model, @@ -136,8 +139,41 @@ class ChatGoogleGenerativeAIOptions extends ChatModelOptions { temperature: temperature ?? this.temperature, stopSequences: stopSequences ?? this.stopSequences, safetySettings: safetySettings ?? this.safetySettings, + tools: tools ?? this.tools, + toolChoice: toolChoice ?? this.toolChoice, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatGoogleGenerativeAIOptions other) { + return model == other.model && + topP == other.topP && + topK == other.topK && + candidateCount == other.candidateCount && + maxOutputTokens == other.maxOutputTokens && + temperature == other.temperature && + stopSequences == other.stopSequences && + safetySettings == other.safetySettings && + tools == other.tools && + toolChoice == other.toolChoice && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + topP.hashCode ^ + topK.hashCode ^ + candidateCount.hashCode ^ + maxOutputTokens.hashCode ^ + temperature.hashCode ^ + stopSequences.hashCode ^ + safetySettings.hashCode ^ + tools.hashCode ^ + toolChoice.hashCode ^ + concurrencyLimit.hashCode; + } } /// {@template chat_google_generative_ai_safety_setting} diff --git a/packages/langchain_google/lib/src/chat_models/vertex_ai/chat_vertex_ai.dart b/packages/langchain_google/lib/src/chat_models/vertex_ai/chat_vertex_ai.dart index 4f668b40..e79f00b4 100644 --- a/packages/langchain_google/lib/src/chat_models/vertex_ai/chat_vertex_ai.dart +++ b/packages/langchain_google/lib/src/chat_models/vertex_ai/chat_vertex_ai.dart @@ -117,8 +117,8 @@ class ChatVertexAI extends BaseChatModel { final String location = 'us-central1', final String? rootUrl, super.defaultOptions = const ChatVertexAIOptions( - publisher: 'google', - model: 'chat-bison', + publisher: defaultPublisher, + model: defaultModel, ), }) : client = VertexAIGenAIClient( httpClient: httpClient, @@ -139,6 +139,12 @@ class ChatVertexAI extends BaseChatModel { @override String get modelType => 'vertex-ai-chat'; + /// The default publisher to use unless another is specified. + static const defaultPublisher = 'google'; + + /// The default model to use unless another is specified. + static const defaultModel = 'chat-bison'; + @override Future invoke( final PromptValue input, { @@ -158,19 +164,15 @@ class ChatVertexAI extends BaseChatModel { final examples = (options?.examples ?? defaultOptions.examples) ?.map((final e) => e.toVertexAIChatExample()) .toList(growable: false); - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final publisher = + options?.publisher ?? defaultOptions.publisher ?? defaultPublisher; + final model = options?.model ?? defaultOptions.model ?? defaultModel; final result = await client.chat.predict( context: context, examples: examples, messages: vertexMessages, - publisher: options?.publisher ?? - defaultOptions.publisher ?? - ArgumentError.checkNotNull( - defaultOptions.publisher, - 'VertexAIOptions.publisher', - ), + publisher: publisher, model: model, parameters: VertexAITextChatModelRequestParams( maxOutputTokens: @@ -216,18 +218,15 @@ class ChatVertexAI extends BaseChatModel { final examples = (options?.examples ?? defaultOptions.examples) ?.map((final e) => e.toVertexAIChatExample()) .toList(growable: false); - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final publisher = + options?.publisher ?? defaultOptions.publisher ?? defaultPublisher; + final model = options?.model ?? defaultOptions.model ?? defaultModel; final res = await client.chat.countTokens( context: context, examples: examples, messages: vertexMessages, - publisher: options?.publisher ?? - ArgumentError.checkNotNull( - defaultOptions.publisher, - 'VertexAIOptions.publisher', - ), + publisher: publisher, model: model, ); return res.totalTokens; diff --git a/packages/langchain_google/lib/src/chat_models/vertex_ai/types.dart b/packages/langchain_google/lib/src/chat_models/vertex_ai/types.dart index 49316c4e..c0642867 100644 --- a/packages/langchain_google/lib/src/chat_models/vertex_ai/types.dart +++ b/packages/langchain_google/lib/src/chat_models/vertex_ai/types.dart @@ -1,13 +1,19 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; +import 'package:meta/meta.dart'; /// {@template chat_vertex_ai_options} /// Options to pass into the Vertex AI Chat Model. +/// +/// You can find a list of available models here: +/// https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models /// {@endtemplate} +@immutable class ChatVertexAIOptions extends ChatModelOptions { /// {@macro chat_vertex_ai_options} const ChatVertexAIOptions({ - this.publisher = 'google', - this.model = 'chat-bison', + this.publisher, + super.model, this.maxOutputTokens, this.temperature, this.topP, @@ -23,17 +29,6 @@ class ChatVertexAIOptions extends ChatModelOptions { /// Use `google` for first-party models. final String? publisher; - /// The text model to use. - /// - /// To use the latest model version, specify the model name without a version - /// number (e.g. `chat-bison`). - /// To use a stable model version, specify the model version number - /// (e.g. `chat-bison@001`). - /// - /// You can find a list of available models here: - /// https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models - final String? model; - /// Maximum number of tokens that can be generated in the response. A token /// is approximately four characters. 100 tokens correspond to roughly /// 60-80 words. @@ -114,6 +109,7 @@ class ChatVertexAIOptions extends ChatModelOptions { final List? stopSequences, final int? candidateCount, final List? examples, + final int? concurrencyLimit, }) { return ChatVertexAIOptions( publisher: publisher ?? this.publisher, @@ -125,6 +121,36 @@ class ChatVertexAIOptions extends ChatModelOptions { stopSequences: stopSequences ?? this.stopSequences, candidateCount: candidateCount ?? this.candidateCount, examples: examples ?? this.examples, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatVertexAIOptions other) { + return publisher == other.publisher && + model == other.model && + maxOutputTokens == other.maxOutputTokens && + temperature == other.temperature && + topP == other.topP && + topK == other.topK && + const ListEquality() + .equals(stopSequences, other.stopSequences) && + candidateCount == other.candidateCount && + const ListEquality().equals(examples, other.examples) && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return publisher.hashCode ^ + model.hashCode ^ + maxOutputTokens.hashCode ^ + temperature.hashCode ^ + topP.hashCode ^ + topK.hashCode ^ + const ListEquality().hash(stopSequences) ^ + candidateCount.hashCode ^ + const ListEquality().hash(examples) ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_google/lib/src/llms/vertex_ai/types.dart b/packages/langchain_google/lib/src/llms/vertex_ai/types.dart index bf382c44..f9eee704 100644 --- a/packages/langchain_google/lib/src/llms/vertex_ai/types.dart +++ b/packages/langchain_google/lib/src/llms/vertex_ai/types.dart @@ -1,13 +1,19 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/llms.dart'; +import 'package:meta/meta.dart'; /// {@template vertex_ai_options} /// Options to pass into the Vertex AI LLM. +/// +/// You can find a list of available models here: +/// https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models /// {@endtemplate} +@immutable class VertexAIOptions extends LLMOptions { /// {@macro vertex_ai_options} const VertexAIOptions({ - this.publisher = 'google', - this.model = 'text-bison', + this.publisher, + super.model, this.maxOutputTokens, this.temperature, this.topP, @@ -22,17 +28,6 @@ class VertexAIOptions extends LLMOptions { /// Use `google` for first-party models. final String? publisher; - /// The text model to use. - /// - /// To use the latest model version, specify the model name without a version - /// number (e.g. `text-bison`). - /// To use a stable model version, specify the model version number - /// (e.g. `text-bison@001`). - /// - /// You can find a list of available models here: - /// https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models - final String? model; - /// Maximum number of tokens that can be generated in the response. A token /// is approximately four characters. 100 tokens correspond to roughly /// 60-80 words. @@ -109,6 +104,7 @@ class VertexAIOptions extends LLMOptions { final int? topK, final List? stopSequences, final int? candidateCount, + final int? concurrencyLimit, }) { return VertexAIOptions( publisher: publisher ?? this.publisher, @@ -119,6 +115,35 @@ class VertexAIOptions extends LLMOptions { topK: topK ?? this.topK, stopSequences: stopSequences ?? this.stopSequences, candidateCount: candidateCount ?? this.candidateCount, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + @override + bool operator ==(covariant final VertexAIOptions other) { + return publisher == other.publisher && + model == other.model && + maxOutputTokens == other.maxOutputTokens && + temperature == other.temperature && + topP == other.topP && + topK == other.topK && + const ListEquality() + .equals(stopSequences, other.stopSequences) && + candidateCount == other.candidateCount && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return publisher.hashCode ^ + model.hashCode ^ + maxOutputTokens.hashCode ^ + temperature.hashCode ^ + topP.hashCode ^ + topK.hashCode ^ + const ListEquality().hash(stopSequences) ^ + candidateCount.hashCode ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_google/lib/src/llms/vertex_ai/vertex_ai.dart b/packages/langchain_google/lib/src/llms/vertex_ai/vertex_ai.dart index a0873fcc..955cc7ca 100644 --- a/packages/langchain_google/lib/src/llms/vertex_ai/vertex_ai.dart +++ b/packages/langchain_google/lib/src/llms/vertex_ai/vertex_ai.dart @@ -123,8 +123,8 @@ class VertexAI extends BaseLLM { final String location = 'us-central1', final String? rootUrl, super.defaultOptions = const VertexAIOptions( - publisher: 'google', - model: 'text-bison', + publisher: defaultPublisher, + model: defaultModel, ), }) : client = VertexAIGenAIClient( httpClient: httpClient, @@ -145,21 +145,24 @@ class VertexAI extends BaseLLM { @override String get modelType => 'vertex-ai'; + /// The default publisher to use unless another is specified. + static const defaultPublisher = 'google'; + + /// The default model to use unless another is specified. + static const defaultModel = 'text-bison'; + @override Future invoke( final PromptValue input, { final VertexAIOptions? options, }) async { final id = _uuid.v4(); - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final publisher = + options?.publisher ?? defaultOptions.publisher ?? defaultPublisher; + final model = options?.model ?? defaultOptions.model ?? defaultModel; final result = await client.text.predict( prompt: input.toString(), - publisher: options?.publisher ?? - ArgumentError.checkNotNull( - defaultOptions.publisher, - 'VertexAIOptions.publisher', - ), + publisher: publisher, model: model, parameters: VertexAITextModelRequestParams( maxOutputTokens: @@ -191,15 +194,12 @@ class VertexAI extends BaseLLM { final PromptValue promptValue, { final VertexAIOptions? options, }) async { - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final publisher = + options?.publisher ?? defaultOptions.publisher ?? defaultPublisher; + final model = options?.model ?? defaultOptions.model ?? defaultModel; final res = await client.text.countTokens( prompt: promptValue.toString(), - publisher: options?.publisher ?? - ArgumentError.checkNotNull( - defaultOptions.publisher, - 'VertexAIOptions.publisher', - ), + publisher: publisher, model: model, ); return res.totalTokens; diff --git a/packages/langchain_google/test/chat_models/google_ai/chat_google_generative_ai_test.dart b/packages/langchain_google/test/chat_models/google_ai/chat_google_generative_ai_test.dart index f6567f6d..6d692977 100644 --- a/packages/langchain_google/test/chat_models/google_ai/chat_google_generative_ai_test.dart +++ b/packages/langchain_google/test/chat_models/google_ai/chat_google_generative_ai_test.dart @@ -14,7 +14,7 @@ import 'package:test/test.dart'; void main() { group('ChatGoogleGenerativeAI tests', () { - const defaultModel = 'gemini-1.5-pro-latest'; + const defaultModel = 'gemini-1.5-pro'; late ChatGoogleGenerativeAI chatModel; @@ -73,7 +73,7 @@ void main() { expect(res.output.content, isNotEmpty); }); - test('Text-and-image input with gemini-pro-vision', () async { + test('Text-and-image input', () async { final res = await chatModel.invoke( PromptValue.chat([ ChatMessage.human( @@ -89,9 +89,6 @@ void main() { ]), ), ]), - options: const ChatGoogleGenerativeAIOptions( - model: 'gemini-pro-vision', - ), ); expect(res.output.content.toLowerCase(), contains('apple')); @@ -122,7 +119,8 @@ void main() { ), ); expect(res.output.content.length, lessThan(20)); - expect(res.finishReason, FinishReason.length); + // It seems the gemini-1.5 doesn't return length reason anymore + // expect(res.finishReason, FinishReason.length); }); test('Test Multi-turn conversations with gemini-pro', () async { @@ -177,7 +175,7 @@ void main() { 'properties': { 'location': { 'type': 'string', - 'description': 'The city and state, e.g. San Francisco, CA', + 'description': 'The city and country, e.g. San Francisco, US', }, 'unit': { 'type': 'string', @@ -196,7 +194,7 @@ void main() { ); final humanMessage = ChatMessage.humanText( - 'What’s the weather like in Boston and Madrid right now in celsius?', + 'What’s the weather like in Boston, US and Madrid, Spain in Celsius?', ); final res1 = await model.invoke(PromptValue.chat([humanMessage])); diff --git a/packages/langchain_mistralai/lib/src/chat_models/chat_mistralai.dart b/packages/langchain_mistralai/lib/src/chat_models/chat_mistralai.dart index 31bc53aa..70f6bd4b 100644 --- a/packages/langchain_mistralai/lib/src/chat_models/chat_mistralai.dart +++ b/packages/langchain_mistralai/lib/src/chat_models/chat_mistralai.dart @@ -156,7 +156,7 @@ class ChatMistralAI extends BaseChatModel { final Map? queryParams, final http.Client? client, super.defaultOptions = const ChatMistralAIOptions( - model: 'mistral-small', + model: defaultModel, ), this.encoding = 'cl100k_base', }) : _client = MistralAIClient( @@ -179,6 +179,9 @@ class ChatMistralAI extends BaseChatModel { @override String get modelType => 'chat-mistralai'; + /// The default model to use unless another is specified. + static const defaultModel = 'mistral-small'; + @override Future invoke( final PromptValue input, { @@ -216,7 +219,7 @@ class ChatMistralAI extends BaseChatModel { }) { return ChatCompletionRequest( model: ChatCompletionModel.modelId( - options?.model ?? defaultOptions.model ?? throwNullModelError(), + options?.model ?? defaultOptions.model ?? defaultModel, ), messages: messages.toChatCompletionMessages(), temperature: options?.temperature ?? defaultOptions.temperature, diff --git a/packages/langchain_mistralai/lib/src/chat_models/types.dart b/packages/langchain_mistralai/lib/src/chat_models/types.dart index 60158ea7..aa2f9537 100644 --- a/packages/langchain_mistralai/lib/src/chat_models/types.dart +++ b/packages/langchain_mistralai/lib/src/chat_models/types.dart @@ -1,12 +1,16 @@ import 'package:langchain_core/chat_models.dart'; +import 'package:meta/meta.dart'; /// {@template chat_mistral_ai_options} /// Options to pass into ChatMistralAI. +/// +/// You can check the list of available models [here](https://docs.mistral.ai/models). /// {@endtemplate} +@immutable class ChatMistralAIOptions extends ChatModelOptions { /// {@macro chat_mistral_ai_options} const ChatMistralAIOptions({ - this.model = 'mistral-small', + super.model, this.temperature, this.topP, this.maxTokens, @@ -15,11 +19,6 @@ class ChatMistralAIOptions extends ChatModelOptions { super.concurrencyLimit, }); - /// ID of the model to use. You can use the [List Available Models](https://docs.mistral.ai/api#operation/listModels) - /// API to see all of your available models, or see our [Model overview](https://docs.mistral.ai/models) - /// for model descriptions. - final String? model; - /// What sampling temperature to use, between 0.0 and 2.0. Higher values like /// 0.8 will make the output more random, while lower values like 0.2 will /// make it more focused and deterministic. @@ -56,6 +55,7 @@ class ChatMistralAIOptions extends ChatModelOptions { final int? maxTokens, final bool? safePrompt, final int? randomSeed, + final int? concurrencyLimit, }) { return ChatMistralAIOptions( model: model ?? this.model, @@ -64,6 +64,29 @@ class ChatMistralAIOptions extends ChatModelOptions { maxTokens: maxTokens ?? this.maxTokens, safePrompt: safePrompt ?? this.safePrompt, randomSeed: randomSeed ?? this.randomSeed, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatMistralAIOptions other) { + return model == other.model && + temperature == other.temperature && + topP == other.topP && + maxTokens == other.maxTokens && + safePrompt == other.safePrompt && + randomSeed == other.randomSeed && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + temperature.hashCode ^ + topP.hashCode ^ + maxTokens.hashCode ^ + safePrompt.hashCode ^ + randomSeed.hashCode ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama/chat_ollama.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama/chat_ollama.dart index 7dbed939..2ff391ef 100644 --- a/packages/langchain_ollama/lib/src/chat_models/chat_ollama/chat_ollama.dart +++ b/packages/langchain_ollama/lib/src/chat_models/chat_ollama/chat_ollama.dart @@ -178,6 +178,9 @@ class ChatOllama extends BaseChatModel { @override String get modelType => 'chat-ollama'; + /// The default model to use unless another is specified. + static const defaultModel = 'llama3'; + @override Future invoke( final PromptValue input, { @@ -218,7 +221,7 @@ class ChatOllama extends BaseChatModel { final ChatOllamaOptions? options, }) { return GenerateChatCompletionRequest( - model: options?.model ?? defaultOptions.model ?? throwNullModelError(), + model: options?.model ?? defaultOptions.model ?? defaultModel, messages: messages.toMessages(), format: (options?.format ?? defaultOptions.format)?.toResponseFormat(), keepAlive: options?.keepAlive ?? defaultOptions.keepAlive, diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama/types.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama/types.dart index 67598acb..971f259c 100644 --- a/packages/langchain_ollama/lib/src/chat_models/chat_ollama/types.dart +++ b/packages/langchain_ollama/lib/src/chat_models/chat_ollama/types.dart @@ -1,15 +1,21 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; +import 'package:meta/meta.dart'; import '../../../langchain_ollama.dart'; import '../../llms/types.dart'; /// {@template chat_ollama_options} /// Options to pass into ChatOllama. +/// +/// For a complete list of supported models and model variants, see the +/// [Ollama model library](https://ollama.ai/library). /// {@endtemplate} +@immutable class ChatOllamaOptions extends ChatModelOptions { /// {@macro chat_ollama_options} const ChatOllamaOptions({ - this.model = 'llama3', + super.model, this.format, this.keepAlive, this.numKeep, @@ -44,9 +50,6 @@ class ChatOllamaOptions extends ChatModelOptions { super.concurrencyLimit, }); - /// The model used to generate completions - final String? model; - /// The format to return a response in. Currently the only accepted value is /// json. /// @@ -203,6 +206,7 @@ class ChatOllamaOptions extends ChatModelOptions { ChatOllamaOptions copyWith({ final String? model, final OllamaResponseFormat? format, + final int? keepAlive, final int? numKeep, final int? seed, final int? numPredict, @@ -223,7 +227,6 @@ class ChatOllamaOptions extends ChatModelOptions { final bool? numa, final int? numCtx, final int? numBatch, - final int? numGqa, final int? numGpu, final int? mainGpu, final bool? lowVram, @@ -232,14 +235,13 @@ class ChatOllamaOptions extends ChatModelOptions { final bool? vocabOnly, final bool? useMmap, final bool? useMlock, - final bool? embeddingOnly, - final double? ropeFrequencyBase, - final double? ropeFrequencyScale, final int? numThread, + final int? concurrencyLimit, }) { return ChatOllamaOptions( model: model ?? this.model, format: format ?? this.format, + keepAlive: keepAlive ?? this.keepAlive, numKeep: numKeep ?? this.numKeep, seed: seed ?? this.seed, numPredict: numPredict ?? this.numPredict, @@ -269,6 +271,81 @@ class ChatOllamaOptions extends ChatModelOptions { useMmap: useMmap ?? this.useMmap, useMlock: useMlock ?? this.useMlock, numThread: numThread ?? this.numThread, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatOllamaOptions other) { + return model == other.model && + format == other.format && + keepAlive == other.keepAlive && + numKeep == other.numKeep && + seed == other.seed && + numPredict == other.numPredict && + topK == other.topK && + topP == other.topP && + tfsZ == other.tfsZ && + typicalP == other.typicalP && + repeatLastN == other.repeatLastN && + temperature == other.temperature && + repeatPenalty == other.repeatPenalty && + presencePenalty == other.presencePenalty && + frequencyPenalty == other.frequencyPenalty && + mirostat == other.mirostat && + mirostatTau == other.mirostatTau && + mirostatEta == other.mirostatEta && + penalizeNewline == other.penalizeNewline && + const ListEquality().equals(stop, other.stop) && + numa == other.numa && + numCtx == other.numCtx && + numBatch == other.numBatch && + numGpu == other.numGpu && + mainGpu == other.mainGpu && + lowVram == other.lowVram && + f16KV == other.f16KV && + logitsAll == other.logitsAll && + vocabOnly == other.vocabOnly && + useMmap == other.useMmap && + useMlock == other.useMlock && + numThread == other.numThread && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + format.hashCode ^ + keepAlive.hashCode ^ + numKeep.hashCode ^ + seed.hashCode ^ + numPredict.hashCode ^ + topK.hashCode ^ + topP.hashCode ^ + tfsZ.hashCode ^ + typicalP.hashCode ^ + repeatLastN.hashCode ^ + temperature.hashCode ^ + repeatPenalty.hashCode ^ + presencePenalty.hashCode ^ + frequencyPenalty.hashCode ^ + mirostat.hashCode ^ + mirostatTau.hashCode ^ + mirostatEta.hashCode ^ + penalizeNewline.hashCode ^ + const ListEquality().hash(stop) ^ + numa.hashCode ^ + numCtx.hashCode ^ + numBatch.hashCode ^ + numGpu.hashCode ^ + mainGpu.hashCode ^ + lowVram.hashCode ^ + f16KV.hashCode ^ + logitsAll.hashCode ^ + vocabOnly.hashCode ^ + useMmap.hashCode ^ + useMlock.hashCode ^ + numThread.hashCode ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/chat_ollama_tools.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/chat_ollama_tools.dart index 889e7c87..677fd308 100644 --- a/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/chat_ollama_tools.dart +++ b/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/chat_ollama_tools.dart @@ -83,7 +83,7 @@ class ChatOllamaTools extends BaseChatModel { final Map? queryParams, final http.Client? client, super.defaultOptions = const ChatOllamaToolsOptions( - options: ChatOllamaOptions(model: 'llama3'), + options: ChatOllamaOptions(model: defaultModel), ), this.encoding = 'cl100k_base', }) : _client = OllamaClient( @@ -108,6 +108,9 @@ class ChatOllamaTools extends BaseChatModel { @override String get modelType => 'chat-ollama-tools'; + /// The default model to use unless another is specified. + static const defaultModel = 'llama3'; + @override Future invoke( PromptValue input, { @@ -132,7 +135,7 @@ class ChatOllamaTools extends BaseChatModel { final defaultOptions = this.defaultOptions.options ?? const ChatOllamaOptions(); return GenerateChatCompletionRequest( - model: options?.model ?? defaultOptions.model ?? throwNullModelError(), + model: options?.model ?? defaultOptions.model ?? defaultModel, messages: messages.toMessages(), format: ResponseFormat.json, keepAlive: options?.keepAlive ?? defaultOptions.keepAlive, diff --git a/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/types.dart b/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/types.dart index 9447a51f..f10f1186 100644 --- a/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/types.dart +++ b/packages/langchain_ollama/lib/src/chat_models/chat_ollama_tools/types.dart @@ -1,5 +1,7 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; import 'package:langchain_core/tools.dart'; +import 'package:meta/meta.dart'; import '../chat_ollama/types.dart'; import 'chat_ollama_tools.dart'; @@ -9,6 +11,7 @@ export '../chat_ollama/types.dart'; /// {@template chat_ollama_tools_options} /// Options to pass into [ChatOllamaTools]. /// {@endtemplate} +@immutable class ChatOllamaToolsOptions extends ChatModelOptions { /// {@macro chat_ollama_tools_options} const ChatOllamaToolsOptions({ @@ -57,6 +60,39 @@ Example response format: Ensure your response is valid JSON and follows this exact format. '''; + + /// Creates a copy of this [ChatOllamaToolsOptions] object with the given + /// fields replaced with the new values. + ChatOllamaToolsOptions copyWith({ + ChatOllamaOptions? options, + List? tools, + ChatToolChoice? toolChoice, + String? toolsSystemPromptTemplate, + }) { + return ChatOllamaToolsOptions( + options: options ?? this.options, + tools: tools ?? this.tools, + toolChoice: toolChoice ?? this.toolChoice, + toolsSystemPromptTemplate: + toolsSystemPromptTemplate ?? this.toolsSystemPromptTemplate, + ); + } + + @override + bool operator ==(covariant final ChatOllamaToolsOptions other) { + return options == other.options && + const ListEquality().equals(tools, other.tools) && + toolChoice == other.toolChoice && + toolsSystemPromptTemplate == other.toolsSystemPromptTemplate; + } + + @override + int get hashCode { + return options.hashCode ^ + const ListEquality().hash(tools) ^ + toolChoice.hashCode ^ + toolsSystemPromptTemplate.hashCode; + } } /// Default tool called if model decides no other tools should be called diff --git a/packages/langchain_ollama/lib/src/llms/ollama.dart b/packages/langchain_ollama/lib/src/llms/ollama.dart index e61c6e27..fd9a8ed4 100644 --- a/packages/langchain_ollama/lib/src/llms/ollama.dart +++ b/packages/langchain_ollama/lib/src/llms/ollama.dart @@ -152,7 +152,7 @@ class Ollama extends BaseLLM { final Map? queryParams, final http.Client? client, super.defaultOptions = const OllamaOptions( - model: 'llama3', + model: defaultModel, ), this.encoding = 'cl100k_base', }) : _client = OllamaClient( @@ -177,6 +177,9 @@ class Ollama extends BaseLLM { @override String get modelType => 'ollama'; + /// The default model to use unless another is specified. + static const defaultModel = 'llama3'; + @override Future invoke( final PromptValue input, { @@ -210,7 +213,7 @@ class Ollama extends BaseLLM { final OllamaOptions? options, }) { return GenerateCompletionRequest( - model: options?.model ?? defaultOptions.model ?? throwNullModelError(), + model: options?.model ?? defaultOptions.model ?? defaultModel, prompt: prompt, system: options?.system, template: options?.template, diff --git a/packages/langchain_ollama/lib/src/llms/types.dart b/packages/langchain_ollama/lib/src/llms/types.dart index dcbe7669..494e759e 100644 --- a/packages/langchain_ollama/lib/src/llms/types.dart +++ b/packages/langchain_ollama/lib/src/llms/types.dart @@ -1,12 +1,18 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/llms.dart'; +import 'package:meta/meta.dart'; /// {@template ollama_options} /// Options to pass into the Ollama LLM. +/// +/// For a complete list of supported models and model variants, see the +/// [Ollama model library](https://ollama.ai/library). /// {@endtemplate} +@immutable class OllamaOptions extends LLMOptions { /// {@macro ollama_options} const OllamaOptions({ - this.model = 'llama3', + super.model, this.system, this.template, this.context, @@ -45,9 +51,6 @@ class OllamaOptions extends LLMOptions { super.concurrencyLimit, }); - /// The model used to generate completions - final String? model; - /// The system prompt (Overrides what is defined in the Modelfile). final String? system; @@ -228,6 +231,7 @@ class OllamaOptions extends LLMOptions { final List? context, final OllamaResponseFormat? format, final bool? raw, + final int? keepAlive, final int? numKeep, final int? seed, final int? numPredict, @@ -248,7 +252,6 @@ class OllamaOptions extends LLMOptions { final bool? numa, final int? numCtx, final int? numBatch, - final int? numGqa, final int? numGpu, final int? mainGpu, final bool? lowVram, @@ -257,10 +260,8 @@ class OllamaOptions extends LLMOptions { final bool? vocabOnly, final bool? useMmap, final bool? useMlock, - final bool? embeddingOnly, - final double? ropeFrequencyBase, - final double? ropeFrequencyScale, final int? numThread, + final int? concurrencyLimit, }) { return OllamaOptions( model: model ?? this.model, @@ -269,6 +270,7 @@ class OllamaOptions extends LLMOptions { context: context ?? this.context, format: format ?? this.format, raw: raw ?? this.raw, + keepAlive: keepAlive ?? this.keepAlive, numKeep: numKeep ?? this.numKeep, seed: seed ?? this.seed, numPredict: numPredict ?? this.numPredict, @@ -298,8 +300,93 @@ class OllamaOptions extends LLMOptions { useMmap: useMmap ?? this.useMmap, useMlock: useMlock ?? this.useMlock, numThread: numThread ?? this.numThread, + concurrencyLimit: concurrencyLimit ?? super.concurrencyLimit, ); } + + @override + bool operator ==(covariant final OllamaOptions other) { + return identical(this, other) || + runtimeType == other.runtimeType && + model == other.model && + system == other.system && + template == other.template && + const ListEquality().equals(context, other.context) && + format == other.format && + raw == other.raw && + keepAlive == other.keepAlive && + numKeep == other.numKeep && + seed == other.seed && + numPredict == other.numPredict && + topK == other.topK && + topP == other.topP && + tfsZ == other.tfsZ && + typicalP == other.typicalP && + repeatLastN == other.repeatLastN && + temperature == other.temperature && + repeatPenalty == other.repeatPenalty && + presencePenalty == other.presencePenalty && + frequencyPenalty == other.frequencyPenalty && + mirostat == other.mirostat && + mirostatTau == other.mirostatTau && + mirostatEta == other.mirostatEta && + penalizeNewline == other.penalizeNewline && + const ListEquality().equals(stop, other.stop) && + numa == other.numa && + numCtx == other.numCtx && + numBatch == other.numBatch && + numGpu == other.numGpu && + mainGpu == other.mainGpu && + lowVram == other.lowVram && + f16KV == other.f16KV && + logitsAll == other.logitsAll && + vocabOnly == other.vocabOnly && + useMmap == other.useMmap && + useMlock == other.useMlock && + numThread == other.numThread && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + system.hashCode ^ + template.hashCode ^ + const ListEquality().hash(context) ^ + format.hashCode ^ + raw.hashCode ^ + keepAlive.hashCode ^ + numKeep.hashCode ^ + seed.hashCode ^ + numPredict.hashCode ^ + topK.hashCode ^ + topP.hashCode ^ + tfsZ.hashCode ^ + typicalP.hashCode ^ + repeatLastN.hashCode ^ + temperature.hashCode ^ + repeatPenalty.hashCode ^ + presencePenalty.hashCode ^ + frequencyPenalty.hashCode ^ + mirostat.hashCode ^ + mirostatTau.hashCode ^ + mirostatEta.hashCode ^ + penalizeNewline.hashCode ^ + const ListEquality().hash(stop) ^ + numa.hashCode ^ + numCtx.hashCode ^ + numBatch.hashCode ^ + numGpu.hashCode ^ + mainGpu.hashCode ^ + lowVram.hashCode ^ + f16KV.hashCode ^ + logitsAll.hashCode ^ + vocabOnly.hashCode ^ + useMmap.hashCode ^ + useMlock.hashCode ^ + numThread.hashCode ^ + concurrencyLimit.hashCode; + } } /// The format to return a response in. diff --git a/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart b/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart index 0fa46c03..7e001289 100644 --- a/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart +++ b/packages/langchain_ollama/test/chat_models/chat_ollama_test.dart @@ -107,7 +107,7 @@ void main() { ]), ); expect( - res.output.content.replaceAll(RegExp(r'[\s\n]'), ''), + res.output.content.replaceAll(RegExp(r'[\s\n-]'), ''), contains('123456789'), ); expect(res.finishReason, FinishReason.stop); diff --git a/packages/langchain_openai/lib/src/chat_models/chat_openai.dart b/packages/langchain_openai/lib/src/chat_models/chat_openai.dart index e218637a..54c955e9 100644 --- a/packages/langchain_openai/lib/src/chat_models/chat_openai.dart +++ b/packages/langchain_openai/lib/src/chat_models/chat_openai.dart @@ -25,8 +25,10 @@ import 'types.dart'; /// - [Completions API docs](https://platform.openai.com/docs/api-reference/chat) /// /// You can also use this wrapper to consume OpenAI-compatible APIs like -/// [Anyscale](https://www.anyscale.com), [Together AI](https://www.together.ai), -/// [OpenRouter](https://openrouter.ai), [One API](https://github.com/songquanpeng/one-api), etc. +/// [TogetherAI](https://www.together.ai/), [Anyscale](https://www.anyscale.com/), +/// [OpenRouter](https://openrouter.ai), [One API](https://github.com/songquanpeng/one-api), +/// [Groq](https://groq.com/), [Llamafile](https://llamafile.ai/), +/// [GPT4All](https://gpt4all.io/), [FastChat](https://github.com/lm-sys/FastChat), etc. /// /// ### Call options /// @@ -172,7 +174,7 @@ class ChatOpenAI extends BaseChatModel { /// [OpenAI dashboard](https://platform.openai.com/account/api-keys). /// - `organization`: your OpenAI organization ID (if applicable). /// - [ChatOpenAI.encoding] - /// - [OpenAI.defaultOptions] + /// - [ChatOpenAI.defaultOptions] /// /// Advance configuration options: /// - `baseUrl`: the base URL to use. Defaults to OpenAI's API URL. You can @@ -192,7 +194,7 @@ class ChatOpenAI extends BaseChatModel { final Map? queryParams, final http.Client? client, super.defaultOptions = const ChatOpenAIOptions( - model: 'gpt-3.5-turbo', + model: defaultModel, ), this.encoding, }) : _client = OpenAIClient( @@ -236,6 +238,9 @@ class ChatOpenAI extends BaseChatModel { @override String get modelType => 'openai-chat'; + /// The default model to use unless another is specified. + static const defaultModel = 'gpt-3.5-turbo'; + @override Future invoke( final PromptValue input, { @@ -288,7 +293,7 @@ class ChatOpenAI extends BaseChatModel { return CreateChatCompletionRequest( model: ChatCompletionModel.modelId( - options?.model ?? defaultOptions.model ?? throwNullModelError(), + options?.model ?? defaultOptions.model ?? defaultModel, ), messages: messagesDtos, tools: toolsDtos, @@ -334,8 +339,7 @@ class ChatOpenAI extends BaseChatModel { final PromptValue promptValue, { final ChatOpenAIOptions? options, }) async { - final model = - options?.model ?? defaultOptions.model ?? throwNullModelError(); + final model = options?.model ?? defaultOptions.model ?? defaultModel; final tiktoken = _getTiktoken(); final messages = promptValue.toChatMessages(); diff --git a/packages/langchain_openai/lib/src/chat_models/types.dart b/packages/langchain_openai/lib/src/chat_models/types.dart index 299902fe..ed53c65c 100644 --- a/packages/langchain_openai/lib/src/chat_models/types.dart +++ b/packages/langchain_openai/lib/src/chat_models/types.dart @@ -1,13 +1,39 @@ +import 'package:collection/collection.dart'; import 'package:langchain_core/chat_models.dart'; import 'package:langchain_core/tools.dart'; +import 'package:meta/meta.dart'; /// {@template chat_openai_options} /// Options to pass into the OpenAI Chat Model. +/// +/// Available [ChatOpenAIOptions.model]s: +/// - `gpt-4` +/// - `gpt-4-32k` +/// - `gpt-4-32k-0314` +/// - `gpt-4-32k-0613` +/// - `gpt-4-0125-preview` +/// - `gpt-4-0314` +/// - `gpt-4-0613` +/// - `gpt-4-1106-preview` +/// - `gpt-4-turbo` +/// - `gpt-4-turbo-2024-04-09` +/// - `gpt-4-turbo-preview` +/// - `gpt-4-vision-preview` +/// - `gpt-4o` +/// - `gpt-4o-2024-05-13` +/// - `gpt-4o-mini` +/// - `gpt-4o-mini-2024-07-18` +/// - `gpt-3.5-turbo` +/// - `gpt-3.5-turbo-16k` +/// +/// Mind that the list may be outdated. +/// See https://platform.openai.com/docs/models for the latest list. /// {@endtemplate} +@immutable class ChatOpenAIOptions extends ChatModelOptions { /// {@macro chat_openai_options} const ChatOpenAIOptions({ - this.model = 'gpt-3.5-turbo', + super.model, this.frequencyPenalty, this.logitBias, this.maxTokens, @@ -18,40 +44,14 @@ class ChatOpenAIOptions extends ChatModelOptions { this.stop, this.temperature, this.topP, + super.tools, + super.toolChoice, this.parallelToolCalls, this.serviceTier, this.user, - super.tools, - super.toolChoice, super.concurrencyLimit, }); - /// ID of the model to use (e.g. 'gpt-3.5-turbo'). - /// - /// Available models: - /// - `gpt-4` - /// - `gpt-4-32k` - /// - `gpt-4-32k-0314` - /// - `gpt-4-32k-0613` - /// - `gpt-4-0125-preview` - /// - `gpt-4-0314` - /// - `gpt-4-0613` - /// - `gpt-4-1106-preview` - /// - `gpt-4-turbo` - /// - `gpt-4-turbo-2024-04-09` - /// - `gpt-4-turbo-preview` - /// - `gpt-4-vision-preview` - /// - `gpt-4o` - /// - `gpt-4o-2024-05-13` - /// - `gpt-4o-mini` - /// - `gpt-4o-mini-2024-07-18` - /// - `gpt-3.5-turbo` - /// - `gpt-3.5-turbo-16k` - /// - /// Mind that the list may be outdated. - /// See https://platform.openai.com/docs/models for the latest list. - final String? model; - /// Number between -2.0 and 2.0. Positive values penalize new tokens based on /// their existing frequency in the text so far, decreasing the model's /// likelihood to repeat the same line verbatim. @@ -145,22 +145,23 @@ class ChatOpenAIOptions extends ChatModelOptions { /// Creates a copy of this [ChatOpenAIOptions] object with the given fields /// replaced with the new values. ChatOpenAIOptions copyWith({ - final String? model, - final double? frequencyPenalty, - final Map? logitBias, - final int? maxTokens, - final int? n, - final double? presencePenalty, - final ChatOpenAIResponseFormat? responseFormat, - final int? seed, - final List? stop, - final double? temperature, - final double? topP, - final bool? parallelToolCalls, - final ChatOpenAIServiceTier? serviceTier, - final String? user, - final List? tools, - final ChatToolChoice? toolChoice, + String? model, + double? frequencyPenalty, + Map? logitBias, + int? maxTokens, + int? n, + double? presencePenalty, + ChatOpenAIResponseFormat? responseFormat, + int? seed, + List? stop, + double? temperature, + double? topP, + List? tools, + ChatToolChoice? toolChoice, + bool? parallelToolCalls, + ChatOpenAIServiceTier? serviceTier, + String? user, + int? concurrencyLimit, }) { return ChatOpenAIOptions( model: model ?? this.model, @@ -174,13 +175,59 @@ class ChatOpenAIOptions extends ChatModelOptions { stop: stop ?? this.stop, temperature: temperature ?? this.temperature, topP: topP ?? this.topP, + tools: tools ?? this.tools, + toolChoice: toolChoice ?? this.toolChoice, parallelToolCalls: parallelToolCalls ?? this.parallelToolCalls, serviceTier: serviceTier ?? this.serviceTier, user: user ?? this.user, - tools: tools ?? this.tools, - toolChoice: toolChoice ?? this.toolChoice, + concurrencyLimit: concurrencyLimit ?? this.concurrencyLimit, ); } + + @override + bool operator ==(covariant final ChatOpenAIOptions other) { + return identical(this, other) || + runtimeType == other.runtimeType && + model == other.model && + frequencyPenalty == other.frequencyPenalty && + const MapEquality() + .equals(logitBias, other.logitBias) && + maxTokens == other.maxTokens && + n == other.n && + presencePenalty == other.presencePenalty && + responseFormat == other.responseFormat && + seed == other.seed && + const ListEquality().equals(stop, other.stop) && + temperature == other.temperature && + topP == other.topP && + const ListEquality().equals(tools, other.tools) && + toolChoice == other.toolChoice && + parallelToolCalls == other.parallelToolCalls && + serviceTier == other.serviceTier && + user == other.user && + concurrencyLimit == other.concurrencyLimit; + } + + @override + int get hashCode { + return model.hashCode ^ + frequencyPenalty.hashCode ^ + const MapEquality().hash(logitBias) ^ + maxTokens.hashCode ^ + n.hashCode ^ + presencePenalty.hashCode ^ + responseFormat.hashCode ^ + seed.hashCode ^ + const ListEquality().hash(stop) ^ + temperature.hashCode ^ + topP.hashCode ^ + const ListEquality().hash(tools) ^ + toolChoice.hashCode ^ + parallelToolCalls.hashCode ^ + serviceTier.hashCode ^ + user.hashCode ^ + concurrencyLimit.hashCode; + } } /// {@template chat_openai_response_format} diff --git a/packages/langchain_openai/lib/src/llms/openai.dart b/packages/langchain_openai/lib/src/llms/openai.dart index 9471acfc..aed0e9e9 100644 --- a/packages/langchain_openai/lib/src/llms/openai.dart +++ b/packages/langchain_openai/lib/src/llms/openai.dart @@ -1,3 +1,5 @@ +import 'dart:math'; + import 'package:http/http.dart' as http; import 'package:langchain_core/llms.dart'; import 'package:langchain_core/prompts.dart'; @@ -186,8 +188,9 @@ class OpenAI extends BaseLLM { final Map? queryParams, final http.Client? client, super.defaultOptions = const OpenAIOptions( - model: 'gpt-3.5-turbo-instruct', - maxTokens: 256, + model: defaultModel, + maxTokens: defaultMaxTokens, + concurrencyLimit: defaultConcurrencyLimit, ), this.encoding, }) : _client = OpenAIClient( @@ -228,6 +231,15 @@ class OpenAI extends BaseLLM { @override String get modelType => 'openai'; + /// The default model to use unless another is specified. + static const defaultModel = 'gpt-3.5-turbo-instruct'; + + /// The default max tokens to use unless another is specified. + static const defaultMaxTokens = 256; + + /// The default concurrency limit to use unless another is specified. + static const defaultConcurrencyLimit = 20; + @override Future invoke( final PromptValue input, { @@ -259,7 +271,8 @@ class OpenAI extends BaseLLM { // Otherwise, we can batch the calls to the API final finalOptions = options?.first ?? defaultOptions; - final concurrencyLimit = finalOptions.concurrencyLimit; + final concurrencyLimit = + min(finalOptions.concurrencyLimit, defaultConcurrencyLimit); var index = 0; final results = []; @@ -302,7 +315,7 @@ class OpenAI extends BaseLLM { }) { return CreateCompletionRequest( model: CompletionModel.modelId( - options?.model ?? defaultOptions.model ?? throwNullModelError(), + options?.model ?? defaultOptions.model ?? defaultModel, ), prompt: CompletionPrompt.listString(prompts), bestOf: options?.bestOf ?? defaultOptions.bestOf, @@ -310,7 +323,8 @@ class OpenAI extends BaseLLM { options?.frequencyPenalty ?? defaultOptions.frequencyPenalty, logitBias: options?.logitBias ?? defaultOptions.logitBias, logprobs: options?.logprobs ?? defaultOptions.logprobs, - maxTokens: options?.maxTokens ?? defaultOptions.maxTokens, + maxTokens: + options?.maxTokens ?? defaultOptions.maxTokens ?? defaultMaxTokens, n: options?.n ?? defaultOptions.n, presencePenalty: options?.presencePenalty ?? defaultOptions.presencePenalty, @@ -340,7 +354,7 @@ class OpenAI extends BaseLLM { final encoding = this.encoding != null ? getEncoding(this.encoding!) : encodingForModel( - options?.model ?? defaultOptions.model ?? throwNullModelError(), + options?.model ?? defaultOptions.model ?? defaultModel, ); return encoding.encode(promptValue.toString()); } diff --git a/packages/langchain_openai/lib/src/llms/types.dart b/packages/langchain_openai/lib/src/llms/types.dart index 6869a4c4..7f8da471 100644 --- a/packages/langchain_openai/lib/src/llms/types.dart +++ b/packages/langchain_openai/lib/src/llms/types.dart @@ -4,17 +4,24 @@ import 'package:meta/meta.dart'; /// {@template openai_options} /// Options to pass into the OpenAI LLM. +/// +/// Available models: +/// - `gpt-3.5-turbo-instruct` +/// - `davinci-002` +/// - `babbage-002` +/// Mind that the list may be outdated. +/// See https://platform.openai.com/docs/models for the latest list. /// {@endtemplate} @immutable class OpenAIOptions extends LLMOptions { /// {@macro openai_options} const OpenAIOptions({ - this.model = 'gpt-3.5-turbo-instruct', + super.model, this.bestOf, this.frequencyPenalty, this.logitBias, this.logprobs, - this.maxTokens = 256, + this.maxTokens, this.n, this.presencePenalty, this.seed, @@ -23,20 +30,9 @@ class OpenAIOptions extends LLMOptions { this.temperature, this.topP, this.user, - super.concurrencyLimit = 20, + super.concurrencyLimit, }); - /// ID of the model to use (e.g. 'gpt-3.5-turbo-instruct'). - /// - /// Available models: - /// - `gpt-3.5-turbo-instruct` - /// - `davinci-002` - /// - `babbage-002` - /// - /// Mind that the list may be outdated. - /// See https://platform.openai.com/docs/models for the latest list. - final String? model; - /// Generates best_of completions server-side and returns the "best" /// (the one with the highest log probability per token). /// @@ -128,20 +124,21 @@ class OpenAIOptions extends LLMOptions { /// Creates a copy of this [OpenAIOptions] object with the given fields /// replaced with the new values. OpenAIOptions copyWith({ - final String? model, - final int? bestOf, - final double? frequencyPenalty, - final Map? logitBias, - final int? logprobs, - final int? maxTokens, - final int? n, - final double? presencePenalty, - final int? seed, - final List? stop, - final String? suffix, - final double? temperature, - final double? topP, - final String? user, + String? model, + int? bestOf, + double? frequencyPenalty, + Map? logitBias, + int? logprobs, + int? maxTokens, + int? n, + double? presencePenalty, + int? seed, + List? stop, + String? suffix, + double? temperature, + double? topP, + String? user, + int? concurrencyLimit, }) { return OpenAIOptions( model: model ?? this.model, @@ -158,42 +155,48 @@ class OpenAIOptions extends LLMOptions { temperature: temperature ?? this.temperature, topP: topP ?? this.topP, user: user ?? this.user, + concurrencyLimit: concurrencyLimit ?? super.concurrencyLimit, ); } @override - bool operator ==(covariant final OpenAIOptions other) => - identical(this, other) || - runtimeType == other.runtimeType && - model == other.model && - bestOf == other.bestOf && - frequencyPenalty == other.frequencyPenalty && - const MapEquality().equals(logitBias, other.logitBias) && - logprobs == other.logprobs && - maxTokens == other.maxTokens && - n == other.n && - presencePenalty == other.presencePenalty && - seed == other.seed && - stop == other.stop && - suffix == other.suffix && - temperature == other.temperature && - topP == other.topP && - user == other.user; + bool operator ==(covariant final OpenAIOptions other) { + return identical(this, other) || + runtimeType == other.runtimeType && + model == other.model && + bestOf == other.bestOf && + frequencyPenalty == other.frequencyPenalty && + const MapEquality() + .equals(logitBias, other.logitBias) && + logprobs == other.logprobs && + maxTokens == other.maxTokens && + n == other.n && + presencePenalty == other.presencePenalty && + seed == other.seed && + const ListEquality().equals(stop, other.stop) && + suffix == other.suffix && + temperature == other.temperature && + topP == other.topP && + user == other.user && + concurrencyLimit == other.concurrencyLimit; + } @override - int get hashCode => - model.hashCode ^ - bestOf.hashCode ^ - frequencyPenalty.hashCode ^ - const MapEquality().hash(logitBias) ^ - logprobs.hashCode ^ - maxTokens.hashCode ^ - n.hashCode ^ - presencePenalty.hashCode ^ - seed.hashCode ^ - stop.hashCode ^ - suffix.hashCode ^ - temperature.hashCode ^ - topP.hashCode ^ - user.hashCode; + int get hashCode { + return model.hashCode ^ + bestOf.hashCode ^ + frequencyPenalty.hashCode ^ + const MapEquality().hash(logitBias) ^ + logprobs.hashCode ^ + maxTokens.hashCode ^ + n.hashCode ^ + presencePenalty.hashCode ^ + seed.hashCode ^ + const ListEquality().hash(stop) ^ + suffix.hashCode ^ + temperature.hashCode ^ + topP.hashCode ^ + user.hashCode ^ + concurrencyLimit.hashCode; + } } diff --git a/packages/langchain_openai/test/chains/qa_with_sources_test.dart b/packages/langchain_openai/test/chains/qa_with_sources_test.dart index b1080986..a94ea862 100644 --- a/packages/langchain_openai/test/chains/qa_with_sources_test.dart +++ b/packages/langchain_openai/test/chains/qa_with_sources_test.dart @@ -53,7 +53,6 @@ void main() { final llm = ChatOpenAI( apiKey: openaiApiKey, defaultOptions: const ChatOpenAIOptions( - model: 'gpt-3.5-turbo-0613', temperature: 0, ), ); diff --git a/packages/langchain_openai/test/chat_models/anyscale_test.dart b/packages/langchain_openai/test/chat_models/anyscale_test.dart index 1a2fdef1..f0a99e88 100644 --- a/packages/langchain_openai/test/chat_models/anyscale_test.dart +++ b/packages/langchain_openai/test/chat_models/anyscale_test.dart @@ -30,8 +30,6 @@ void main() { 'codellama/CodeLlama-34b-Instruct-hf', 'mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mixtral-8x7B-Instruct-v0.1', - 'HuggingFaceH4/zephyr-7b-beta', - 'Open-Orca/Mistral-7B-OpenOrca', ]; for (final model in models) { final res = await chatModel.invoke( @@ -67,8 +65,6 @@ void main() { 'codellama/CodeLlama-34b-Instruct-hf', 'mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mixtral-8x7B-Instruct-v0.1', - 'HuggingFaceH4/zephyr-7b-beta', - 'Open-Orca/Mistral-7B-OpenOrca', ]; for (final model in models) { final stream = chatModel.stream( diff --git a/packages/langchain_openai/test/chat_models/chat_openai_test.dart b/packages/langchain_openai/test/chat_models/chat_openai_test.dart index 6268a77b..7c2d95d1 100644 --- a/packages/langchain_openai/test/chat_models/chat_openai_test.dart +++ b/packages/langchain_openai/test/chat_models/chat_openai_test.dart @@ -208,7 +208,6 @@ void main() { test('Test countTokens messages', () async { final models = [ - 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-0613', 'gpt-3.5-turbo-16k-0613', 'gpt-4-0314',