Skip to content

Commit

Permalink
feat: use llama.cpp ggml-org/llama.cpp#9639
Browse files Browse the repository at this point in the history
  • Loading branch information
jpohhhh committed Feb 20, 2025
1 parent f87b2e5 commit 194b3b1
Show file tree
Hide file tree
Showing 14 changed files with 491 additions and 137 deletions.
8 changes: 4 additions & 4 deletions example/lib/main.dart
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ class _MyAppState extends State<MyApp> {
if (_tool != null)
Tool(
name: _tool!.name,
jsonSchema: jsonEncode(_tool!.parametersAsString),
jsonSchema: _tool!.parametersAsString,
),
],
maxTokens: _maxTokens.round(),
Expand Down Expand Up @@ -516,11 +516,11 @@ class _MyAppState extends State<MyApp> {
print(
'Download progress: $downloadProgress, Load progress: $loadProgress');
});
}, (response, done) {
}, (response, responseJson, done) {
setState(() {
_mlcDownloadProgress = null;
_mlcLoadProgress = null;
latestResult = response;
latestResult = responseJson;
if (done) {
_runningRequestId = null;
}
Expand Down Expand Up @@ -549,7 +549,7 @@ class _MyAppState extends State<MyApp> {

_inferenceStartTime = DateTime.now();

int requestId = await fllamaChat(request, (response, done) {
int requestId = await fllamaChat(request, (response, responseJson, done) {
setState(() {
latestResult = response;
fllamaTokenize(FllamaTokenizeRequest(
Expand Down
2 changes: 1 addition & 1 deletion example/macos/Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ EXTERNAL SOURCES:

SPEC CHECKSUMS:
file_selector_macos: 585232b688707857504f9cb5f985a7c97fe4dd30
fllama: 70eecc4bce57bde96fc985659224585564a2bfa2
fllama: 6136327ecf6807fee16e195977b999e40b7c3a2d
FlutterMacOS: 8f6f14fa908a6fb3fba0cd85dbd81ec4b251fb24
shared_preferences_foundation: 9e1978ff2562383bd5676f64ec4e9aa8fa06a6f7

Expand Down
70 changes: 35 additions & 35 deletions example/pubspec.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,42 @@ packages:
dependency: transitive
description:
name: async
sha256: "947bfcf187f74dbc5e146c9eb9c0f10c9f8b30743e341481c1e2ed3ecc18c20c"
sha256: d2872f9c19731c2e5f10444b14686eb7cc85c76274bd6c16e1816bff9a3bab63
url: "https://pub.dev"
source: hosted
version: "2.11.0"
version: "2.12.0"
boolean_selector:
dependency: transitive
description:
name: boolean_selector
sha256: "6cfb5af12253eaf2b368f07bacc5a80d1301a071c73360d746b7f2e32d762c66"
sha256: "8aab1771e1243a5063b8b0ff68042d67334e3feab9e95b9490f9a6ebf73b42ea"
url: "https://pub.dev"
source: hosted
version: "2.1.1"
version: "2.1.2"
characters:
dependency: transitive
description:
name: characters
sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
sha256: f71061c654a3380576a52b451dd5532377954cf9dbd272a78fc8479606670803
url: "https://pub.dev"
source: hosted
version: "1.3.0"
version: "1.4.0"
clock:
dependency: transitive
description:
name: clock
sha256: cb6d7f03e1de671e34607e909a7213e31d7752be4fb66a86d29fe1eb14bfb5cf
sha256: fddb70d9b5277016c77a80201021d40a2247104d9f4aa7bab7157b7e3f05b84b
url: "https://pub.dev"
source: hosted
version: "1.1.1"
version: "1.1.2"
collection:
dependency: transitive
description:
name: collection
sha256: a1ace0a119f20aabc852d165077c036cd864315bd99b7eaa10a60100341941bf
sha256: "2f5709ae4d3d59dd8f7cd309b4e023046b57d8a6c82130785d2b0e5868084e76"
url: "https://pub.dev"
source: hosted
version: "1.19.0"
version: "1.19.1"
cross_file:
dependency: transitive
description:
Expand Down Expand Up @@ -69,10 +69,10 @@ packages:
dependency: transitive
description:
name: fake_async
sha256: "511392330127add0b769b75a987850d136345d9227c6b94c96a04cf4a391bf78"
sha256: "6a95e56b2449df2273fd8c45a662d6947ce1ebb7aafe80e550a3f68297f3cacc"
url: "https://pub.dev"
source: hosted
version: "1.3.1"
version: "1.3.2"
ffi:
dependency: transitive
description:
Expand Down Expand Up @@ -244,18 +244,18 @@ packages:
dependency: transitive
description:
name: leak_tracker
sha256: "7bb2830ebd849694d1ec25bf1f44582d6ac531a57a365a803a6034ff751d2d06"
sha256: c35baad643ba394b40aac41080300150a4f08fd0fd6a10378f8f7c6bc161acec
url: "https://pub.dev"
source: hosted
version: "10.0.7"
version: "10.0.8"
leak_tracker_flutter_testing:
dependency: transitive
description:
name: leak_tracker_flutter_testing
sha256: "9491a714cca3667b60b5c420da8217e6de0d1ba7a5ec322fab01758f6998f379"
sha256: f8b613e7e6a13ec79cfdc0e97638fddb3ab848452eff057653abd3edba760573
url: "https://pub.dev"
source: hosted
version: "3.0.8"
version: "3.0.9"
leak_tracker_testing:
dependency: transitive
description:
Expand All @@ -276,10 +276,10 @@ packages:
dependency: transitive
description:
name: matcher
sha256: d2323aa2060500f906aa31a895b4030b6da3ebdcc5619d14ce1aada65cd161cb
sha256: dc58c723c3c24bf8d3e2d3ad3f2f9d7bd9cf43ec6feaa64181775e60190153f2
url: "https://pub.dev"
source: hosted
version: "0.12.16+1"
version: "0.12.17"
material_color_utilities:
dependency: transitive
description:
Expand All @@ -300,10 +300,10 @@ packages:
dependency: transitive
description:
name: path
sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
sha256: "75cca69d1490965be98c73ceaea117e8a04dd21217b37b292c9ddbec0d955bc5"
url: "https://pub.dev"
source: hosted
version: "1.9.0"
version: "1.9.1"
path_provider_linux:
dependency: transitive
description:
Expand Down Expand Up @@ -409,10 +409,10 @@ packages:
dependency: transitive
description:
name: source_span
sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
sha256: "254ee5351d6cb365c859e20ee823c3bb479bf4a293c22d17a9f1bf144ce86f7c"
url: "https://pub.dev"
source: hosted
version: "1.10.0"
version: "1.10.1"
sprintf:
dependency: transitive
description:
Expand All @@ -425,42 +425,42 @@ packages:
dependency: transitive
description:
name: stack_trace
sha256: "9f47fd3630d76be3ab26f0ee06d213679aa425996925ff3feffdec504931c377"
sha256: "8b27215b45d22309b5cddda1aa2b19bdfec9df0e765f2de506401c071d38d1b1"
url: "https://pub.dev"
source: hosted
version: "1.12.0"
version: "1.12.1"
stream_channel:
dependency: transitive
description:
name: stream_channel
sha256: ba2aa5d8cc609d96bbb2899c28934f9e1af5cddbd60a827822ea467161eb54e7
sha256: "969e04c80b8bcdf826f8f16579c7b14d780458bd97f56d107d3950fdbeef059d"
url: "https://pub.dev"
source: hosted
version: "2.1.2"
version: "2.1.4"
string_scanner:
dependency: transitive
description:
name: string_scanner
sha256: "688af5ed3402a4bde5b3a6c15fd768dbf2621a614950b17f04626c431ab3c4c3"
sha256: "921cd31725b72fe181906c6a94d987c78e3b98c2e205b397ea399d4054872b43"
url: "https://pub.dev"
source: hosted
version: "1.3.0"
version: "1.4.1"
term_glyph:
dependency: transitive
description:
name: term_glyph
sha256: a29248a84fbb7c79282b40b8c72a1209db169a2e0542bce341da992fe1bc7e84
sha256: "7f554798625ea768a7518313e58f83891c7f5024f88e46e7182a4558850a4b8e"
url: "https://pub.dev"
source: hosted
version: "1.2.1"
version: "1.2.2"
test_api:
dependency: transitive
description:
name: test_api
sha256: "664d3a9a64782fcdeb83ce9c6b39e78fd2971d4e37827b9b06c3aa1edc5e760c"
sha256: fb31f383e2ee25fbbfe06b40fe21e1e458d14080e3c67e7ba0acfde4df4e0bbd
url: "https://pub.dev"
source: hosted
version: "0.7.3"
version: "0.7.4"
textwrap:
dependency: transitive
description:
Expand Down Expand Up @@ -497,10 +497,10 @@ packages:
dependency: transitive
description:
name: vm_service
sha256: f6be3ed8bd01289b34d679c2b62226f63c0e69f9fd2e50a6b3c1c729a961041b
sha256: "0968250880a6c5fe7edc067ed0a13d4bae1577fe2771dcf3010d52c4a9d3ca14"
url: "https://pub.dev"
source: hosted
version: "14.3.0"
version: "14.3.1"
web:
dependency: transitive
description:
Expand All @@ -526,5 +526,5 @@ packages:
source: hosted
version: "1.0.4"
sdks:
dart: ">=3.4.0 <4.0.0"
dart: ">=3.7.0-0 <4.0.0"
flutter: ">=3.19.0"
2 changes: 1 addition & 1 deletion lib/fllama_io.dart
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import 'package:fllama/io/fllama_bindings_generated.dart';
import 'package:fllama/io/fllama_io_helpers.dart';
import 'package:fllama/misc/openai.dart';

typedef FllamaInferenceCallback = void Function(String response, bool done);
typedef FllamaInferenceCallback = void Function(String response, String openaiResponseJsonString, bool done);
typedef FllamaMlcLoadCallback = void Function(
double downloadProgress, double loadProgress);

Expand Down
2 changes: 1 addition & 1 deletion lib/fllama_unimplemented.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import 'package:fllama/fllama.dart';
import 'package:fllama/fllama_io.dart';
import 'package:fllama/io/fllama_io_helpers.dart';

typedef FllamaInferenceCallback = void Function(String response, bool done);
typedef FllamaInferenceCallback = void Function(String response, String openaiResponseJsonString, bool done);
typedef FllamaMlcLoadCallback = void Function(
double downloadProgress, double loadProgress);

Expand Down
55 changes: 19 additions & 36 deletions lib/fllama_universal.dart
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class FllamaInferenceRequest {
int maxTokens;
String modelPath;
String? modelMmprojPath;
String? openAiRequestJsonString;
int numGpuLayers;

/// Number of threads to use for inference.
Expand Down Expand Up @@ -84,6 +85,7 @@ class FllamaInferenceRequest {
this.modelMmprojPath,
this.numThreads = 2,
this.logger,
this.openAiRequestJsonString,
});
}

Expand Down Expand Up @@ -129,23 +131,6 @@ Future<int> fllamaChat(
request: request,
);

final String grammar;
if (request.tools.isNotEmpty) {
if (request.tools.length > 1) {
// ignore: avoid_print
print(
'[fllama] WARNING: More than one tool was specified. No grammar will be enforced. (via fllamaChat)');
grammar = '';
} else {
grammar = request.tools.first.grammar;
// ignore: avoid_print
print('[fllama] Grammar to be enforced: $grammar');
}
} else {
// ignore: avoid_print
print('[fllama] No tools were specified. No grammar will be enforced.');
grammar = '';
}
final inferenceRequest = FllamaInferenceRequest(
contextSize: request.contextSize,
input: text,
Expand All @@ -157,10 +142,12 @@ Future<int> fllamaChat(
penaltyRepeat: request.presencePenalty,
temperature: request.temperature,
topP: request.topP,
grammar: grammar,
grammar: '', // deprecated, llama.cpp handles tools internally now
logger: request.logger,
eosToken: eosToken,
openAiRequestJsonString: request.toJsonString(),
);

return fllamaInference(inferenceRequest, callback);
}

Expand All @@ -185,20 +172,20 @@ String fllamaApplyChatTemplate({
}

if (request.tools.isNotEmpty) {
final tools = request.tools.map((tool) {
return tool.typescriptDefinition;
}).join('\n\n');
jsonMessages.insert(0, {
'role': 'system',
'content': '''
You have access to the following functions:
$tools
// final tools = request.tools.map((tool) {
// return tool.typescriptDefinition;
// }).join('\n\n');
// jsonMessages.insert(0, {
// 'role': 'system',
// 'content': '''
// You have access to the following functions:
// $tools

You are a helpful assistant with tool calling capabilities.
When you receive a tool call response, use the output to format an answer to the orginal use question.
If you are using tools, respond in the format {"name": function name, "parameters": dictionary of function arguments}. If multiple tools are used, use array format.
''',
});
// You are a helpful assistant with tool calling capabilities.
// When you receive a tool call response, use the output to format an answer to the orginal use question.
// If you are using tools, respond in the format {"name": function name, "parameters": dictionary of function arguments}. If multiple tools are used, use array format.
// ''',
// });
}

if (jsonMessages.isEmpty) {
Expand Down Expand Up @@ -261,22 +248,18 @@ If you are using tools, respond in the format {"name": function name, "parameter
if (chatTemplate != chatMlTemplate) {
final llamaChatTemplate = Llama3ChatTemplate();
// ignore: avoid_print

if (llamaChatTemplate.matches(chatTemplate)) {
// ex. bartowski's llama 3.2 8B cannot be parsed, but it is
// desirable to use. ChatML as a fallback breaks it. First response
// generally works, then it fails.
print(
'[fllama] Using Llama 3 chat template as a fallback because the chat template could not be applied. Exception: $e. Chat template: $chatTemplate. Messages: $jsonMessages.');

return fllamaApplyChatTemplate(
chatTemplate: llamaChatTemplate.template,
request: request,
bosToken: llamaChatTemplate.bosToken,
eosToken: llamaChatTemplate.eosToken,
);
}
print(
'[fllama] Using ChatML because the chat template could not be applied. Exception: $e. Chat template: $chatTemplate. Messages: $jsonMessages.');
return fllamaApplyChatTemplate(
chatTemplate: chatMlTemplate,
request: request,
Expand Down
Loading

0 comments on commit 194b3b1

Please sign in to comment.