Skip to content

Commit

Permalink
[release/4.0] Support O3 OpenAI model mapping (#7395)
Browse files Browse the repository at this point in the history
* Support O3 OpenAI model mapping

* Update test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Co-authored-by: Tarek Mahmoud Sayed <10833894+tarekgh@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
  • Loading branch information
4 people authored Feb 21, 2025
1 parent 208651a commit 867a30f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
[
// chat
( "o1-", ModelEncoding.O200kBase ), // e.g. o1-mini
( "o3-", ModelEncoding.O200kBase ), // e.g. o3-mini
( "gpt-4o-", ModelEncoding.O200kBase), // e.g., gpt-4o-2024-05-13
( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k
( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc.
Expand All @@ -1038,6 +1039,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
// chat
{ "gpt-4o", ModelEncoding.O200kBase },
{ "o1", ModelEncoding.O200kBase },
{ "o3", ModelEncoding.O200kBase },
{ "gpt-4", ModelEncoding.Cl100kBase },
{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase },
{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase },
Expand Down
5 changes: 5 additions & 0 deletions test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,10 @@ public void TestEncodeR50kBase()
[Theory]
[InlineData("o1")]
[InlineData("o1-")]
[InlineData("o1-mini")]
[InlineData("o3")]
[InlineData("o3-")]
[InlineData("o3-mini")]
[InlineData("gpt-4o")]
[InlineData("gpt-4o-")]
[InlineData("gpt-4")]
Expand Down Expand Up @@ -496,6 +500,7 @@ public void TestEncodingNamesNegativeCases()
[InlineData("gpt-4")]
[InlineData("gpt-4o")]
[InlineData("o1")]
[InlineData("o3")]
[InlineData("text-davinci-003")]
[InlineData("text-curie-001")]
[InlineData("text-davinci-edit-001")]
Expand Down

0 comments on commit 867a30f

Please sign in to comment.