forked from dotnet/docs
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
272 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
docs/core/whats-new/snippets/dotnet-9/csharp/LlamaTokenizer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Net.Http; | ||
using System.Threading.Tasks; | ||
using Microsoft.ML.Tokenizers; | ||
|
||
internal class LlamaTokenizerExample | ||
{ | ||
public static async Task RunItAsync() | ||
{ | ||
// Create the Tokenizer. | ||
HttpClient httpClient = new HttpClient(); | ||
string modelUrl = @"https://huggingface.co/hf-internal-testing/llama-tokenizer/resolve/main/tokenizer.model"; | ||
using Stream remoteStream = await httpClient.GetStreamAsync(modelUrl); | ||
Tokenizer tokenizer = Tokenizer.CreateLlama(remoteStream); | ||
|
||
string text = "Hello, World!"; | ||
|
||
// Encode to IDs. | ||
IReadOnlyList<int> encodedIds = tokenizer.EncodeToIds(text); | ||
Console.WriteLine($"encodedIds = {{{string.Join(", ", encodedIds)}}}"); | ||
// encodedIds = {1, 15043, 29892, 2787, 29991} | ||
|
||
// Decode IDs to text. | ||
string? decodedText = tokenizer.Decode(encodedIds); | ||
Console.WriteLine($"decodedText = {decodedText}"); | ||
// decodedText = Hello, World! | ||
|
||
// Get token count. | ||
int idsCount = tokenizer.CountTokens(text); | ||
Console.WriteLine($"idsCount = {idsCount}"); | ||
// idsCount = 5 | ||
|
||
// Full encoding. | ||
EncodingResult result = tokenizer.Encode(text); | ||
Console.WriteLine($"result.Tokens = {{'{string.Join("', '", result.Tokens)}'}}"); | ||
// result.Tokens = {'<s>', '▁Hello', ',', '▁World', '!'} | ||
Console.WriteLine($"result.Offsets = {{{string.Join(", ", result.Offsets)}}}"); | ||
// result.Offsets = {(0, 0), (0, 6), (6, 1), (7, 6), (13, 1)} | ||
Console.WriteLine($"result.Ids = {{{string.Join(", ", result.Ids)}}}"); | ||
// result.Ids = {1, 15043, 29892, 2787, 29991} | ||
|
||
// Encode up to number of tokens limit. | ||
int index1 = tokenizer.IndexOfTokenCount( | ||
text, | ||
maxTokenCount: 2, | ||
out string processedText1, | ||
out int tokenCount1 | ||
);// Encode up to two tokens. | ||
Console.WriteLine($"processedText1 = {processedText1}"); | ||
// processedText1 = ▁Hello,▁World! | ||
Console.WriteLine($"tokenCount1 = {tokenCount1}"); | ||
// tokenCount1 = 2 | ||
Console.WriteLine($"index1 = {index1}"); | ||
// index1 = 6 | ||
|
||
int index2 = tokenizer.LastIndexOfTokenCount( | ||
text, | ||
maxTokenCount: 1, | ||
out string processedText2, | ||
out int tokenCount2 | ||
); // Encode from end up to one token. | ||
Console.WriteLine($"processedText2 = {processedText2}"); | ||
// processedText2 = ▁Hello,▁World! | ||
Console.WriteLine($"tokenCount2 = {tokenCount2}"); | ||
// tokenCount2 = 1 | ||
Console.WriteLine($"index2 = {index2}"); | ||
// index2 = 13 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
Linq.RunIt(); | ||
//Serialization.RunIt(); | ||
//TimeSpan.RunIt(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 32 additions & 11 deletions
43
docs/core/whats-new/snippets/dotnet-9/csharp/Reflection.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML.Tokenizers; | ||
|
||
internal class TiktokenExample | ||
{ | ||
public static void RunIt() | ||
{ | ||
// <Tiktoken> | ||
Tokenizer tokenizer = Tokenizer.CreateTiktokenForModel("gpt-4"); | ||
string text = "Hello, World!"; | ||
|
||
// Encode to IDs. | ||
IReadOnlyList<int> encodedIds = tokenizer.EncodeToIds(text); | ||
Console.WriteLine($"encodedIds = {{{string.Join(", ", encodedIds)}}}"); | ||
// encodedIds = {9906, 11, 4435, 0} | ||
|
||
// Decode IDs to text. | ||
string decodedText = tokenizer.Decode(encodedIds); | ||
Console.WriteLine($"decodedText = {decodedText}"); | ||
// decodedText = Hello, World! | ||
|
||
// Get token count. | ||
int idsCount = tokenizer.CountTokens(text); | ||
Console.WriteLine($"idsCount = {idsCount}"); | ||
// idsCount = 4 | ||
|
||
// Full encoding. | ||
EncodingResult result = tokenizer.Encode(text); | ||
Console.WriteLine($"result.Tokens = {{'{string.Join("', '", result.Tokens)}'}}"); | ||
// result.Tokens = {'Hello', ',', ' World', '!'} | ||
Console.WriteLine($"result.Offsets = {{{string.Join(", ", result.Offsets)}}}"); | ||
// result.Offsets = {(0, 5), (5, 1), (6, 6), (12, 1)} | ||
Console.WriteLine($"result.Ids = {{{string.Join(", ", result.Ids)}}}"); | ||
// result.Ids = {9906, 11, 4435, 0} | ||
|
||
// Encode up to number of tokens limit. | ||
int index1 = tokenizer.IndexOfTokenCount( | ||
text, | ||
maxTokenCount: 1, | ||
out string processedText1, | ||
out int tokenCount1 | ||
); // Encode up to one token. | ||
Console.WriteLine($"processedText1 = {processedText1}"); | ||
// processedText1 = Hello, World! | ||
Console.WriteLine($"tokenCount1 = {tokenCount1}"); | ||
// tokenCount1 = 1 | ||
Console.WriteLine($"index1 = {index1}"); | ||
// index1 = 5 | ||
|
||
int index2 = tokenizer.LastIndexOfTokenCount( | ||
text, | ||
maxTokenCount: 1, | ||
out string processedText2, | ||
out int tokenCount2 | ||
); // Encode from end up to one token. | ||
Console.WriteLine($"processedText2 = {processedText2}"); | ||
// processedText2 = Hello, World! | ||
Console.WriteLine($"tokenCount2 = {tokenCount2}"); | ||
// tokenCount2 = 1 | ||
Console.WriteLine($"index2 = {index2}"); | ||
// index2 = 12 | ||
// </Tiktoken> | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
using System; | ||
|
||
internal class TimeSpanExample | ||
{ | ||
public static void RunIt() | ||
{ | ||
// <TimeSpan.From> | ||
TimeSpan timeSpan1 = TimeSpan.FromSeconds(value: 101.832); | ||
Console.WriteLine($"timeSpan1 = {timeSpan1}"); | ||
// timeSpan1 = 00:01:41.8319999 | ||
|
||
TimeSpan timeSpan2 = TimeSpan.FromSeconds(seconds: 101, milliseconds: 832); | ||
Console.WriteLine($"timeSpan2 = {timeSpan2}"); | ||
// timeSpan2 = 00:01:41.8320000 | ||
// </TimeSpan.From> | ||
} | ||
} |