From ef2319ebb3e8c5e927622028b8a51a6489ae154e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B2=88=E6=98=9F=E7=B9=81?= Date: Sun, 4 Aug 2024 21:15:11 +0800 Subject: [PATCH 1/3] feat: upgrade kernel memory --- .../DashScopeTextEmbeddingGenerator.cs | 6 ++++++ .../DashScopeTextGenerator.cs | 6 ++++++ .../KernelMemory.DashScope.csproj | 4 ++-- src/KernelMemory.DashScope/LengthTokenizer.cs | 6 ++++++ src/KernelMemory.DashScope/QWenTokenizer.cs | 17 ++++++++++++++++- .../SemanticKernel.DashScope.csproj | 6 +++--- .../KernelMemory.DashScope.UnitTests.csproj | 6 +++--- .../SemanticKernel.DashScope.UnitTest.csproj | 4 ++-- 8 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs index d76009a..ca8cd9d 100644 --- a/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs +++ b/src/KernelMemory.DashScope/DashScopeTextEmbeddingGenerator.cs @@ -24,6 +24,12 @@ public int CountTokens(string text) return tokenizer?.CountTokens(text) ?? text.Length; } + /// + public IReadOnlyList GetTokens(string text) + { + return tokenizer?.GetTokens(text) ?? [text]; + } + /// public async Task GenerateEmbeddingAsync( string text, diff --git a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs index cb05242..788bac0 100644 --- a/src/KernelMemory.DashScope/DashScopeTextGenerator.cs +++ b/src/KernelMemory.DashScope/DashScopeTextGenerator.cs @@ -30,6 +30,12 @@ public int CountTokens(string text) return tokenizer?.CountTokens(text) ?? QWenTokenizer.CountTokensStatic(text); } + /// + public IReadOnlyList GetTokens(string text) + { + return tokenizer?.GetTokens(text) ?? QWenTokenizer.GetTokensStatic(text); + } + /// public async IAsyncEnumerable GenerateTextAsync( string prompt, diff --git a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj index ddbf3a2..b0f9cb1 100644 --- a/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj +++ b/src/KernelMemory.DashScope/KernelMemory.DashScope.csproj @@ -19,8 +19,8 @@ - - + + diff --git a/src/KernelMemory.DashScope/LengthTokenizer.cs b/src/KernelMemory.DashScope/LengthTokenizer.cs index 88aafe0..dd48306 100644 --- a/src/KernelMemory.DashScope/LengthTokenizer.cs +++ b/src/KernelMemory.DashScope/LengthTokenizer.cs @@ -12,4 +12,10 @@ public int CountTokens(string text) { return text.Length; } + + /// + public IReadOnlyList GetTokens(string text) + { + return text.Select(x => $"{x}").ToList(); + } } diff --git a/src/KernelMemory.DashScope/QWenTokenizer.cs b/src/KernelMemory.DashScope/QWenTokenizer.cs index 36dd0f8..bf7bc16 100644 --- a/src/KernelMemory.DashScope/QWenTokenizer.cs +++ b/src/KernelMemory.DashScope/QWenTokenizer.cs @@ -13,7 +13,6 @@ public class QWenTokenizer : ITextTokenizer .Concat(Enumerable.Range(0, 205).Select(x => $"<|extra_{x}|>")) .Select((x, i) => new KeyValuePair(x, 151643 + i)) .ToDictionary(); - private static readonly ITokenizer Tokenizer = TokenizerBuilder.CreateTokenizer( DashScopeEmbeddedResource.ReadBpeFile(), SpecialTokens, @@ -45,6 +44,12 @@ public int CountTokens(string text) return Tokenizer.Encode(text).Count; } + /// + public IReadOnlyList GetTokens(string text) + { + return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList(); + } + /// /// Count tokens. /// @@ -54,4 +59,14 @@ public static int CountTokensStatic(string text) { return Tokenizer.Encode(text).Count; } + + /// + /// Get tokens + /// + /// The text to tokenizers. + /// + public static IReadOnlyList GetTokensStatic(string text) + { + return Tokenizer.Encode(text).Select(x => Tokenizer.Decode([x])).ToList(); + } } diff --git a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj index 1410f1d..a203fdf 100644 --- a/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj +++ b/src/SemanticKernel.DashScope/SemanticKernel.DashScope.csproj @@ -19,9 +19,9 @@ - - - + + + diff --git a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj index d773f79..7a7872c 100644 --- a/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj +++ b/test/KernelMemory.DashScope.UnitTests/KernelMemory.DashScope.UnitTests.csproj @@ -14,10 +14,10 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + - - + + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj index f349475..a7372f3 100644 --- a/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj +++ b/test/SemanticKernel.DashScope.UnitTest/SemanticKernel.DashScope.UnitTest.csproj @@ -5,8 +5,8 @@ runtime; build; native; contentfiles; analyzers; buildtransitive - - + + all runtime; build; native; contentfiles; analyzers; buildtransitive From fefb157e150d95ad57c917be84bbd0b94f34907b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B2=88=E6=98=9F=E7=B9=81?= Date: Sun, 4 Aug 2024 21:15:35 +0800 Subject: [PATCH 2/3] chore: code clean up --- test/SemanticKernel.DashScope.UnitTest/Cases.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/SemanticKernel.DashScope.UnitTest/Cases.cs b/test/SemanticKernel.DashScope.UnitTest/Cases.cs index 93eb1be..97ea7e8 100644 --- a/test/SemanticKernel.DashScope.UnitTest/Cases.cs +++ b/test/SemanticKernel.DashScope.UnitTest/Cases.cs @@ -55,7 +55,7 @@ public static class Cases public static KernelFunction NormalFunction(Action method) => KernelFunctionFactory.CreateFromMethod( - (string location) => + (string _) => { method(); return "Weather"; From 05e93015929c5119763ff95638819efafce0ec7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B2=88=E6=98=9F=E7=B9=81?= Date: Sun, 4 Aug 2024 21:20:35 +0800 Subject: [PATCH 3/3] fix: test --- test/SemanticKernel.DashScope.UnitTest/Cases.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/SemanticKernel.DashScope.UnitTest/Cases.cs b/test/SemanticKernel.DashScope.UnitTest/Cases.cs index 97ea7e8..93eb1be 100644 --- a/test/SemanticKernel.DashScope.UnitTest/Cases.cs +++ b/test/SemanticKernel.DashScope.UnitTest/Cases.cs @@ -55,7 +55,7 @@ public static class Cases public static KernelFunction NormalFunction(Action method) => KernelFunctionFactory.CreateFromMethod( - (string _) => + (string location) => { method(); return "Weather";