Skip to content

Commit 296c0e9

Browse files
authored
TokenizersBackend (from transformers v5) (#296)
1 parent fae7768 commit 296c0e9

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

Sources/Tokenizers/Tokenizer.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ enum TokenizerModel {
168168
"LlamaTokenizer": BPETokenizer.self,
169169
"RobertaTokenizer": BPETokenizer.self,
170170
"T5Tokenizer": T5Tokenizer.self,
171+
"TokenizersBackend": BPETokenizer.self,
171172
"PreTrainedTokenizer": BPETokenizer.self,
172173
"Qwen2Tokenizer": BPETokenizer.self,
173174
"WhisperTokenizer": BPETokenizer.self,

Tests/TokenizersTests/TokenizerTests.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,4 +362,13 @@ struct TokenizerTests {
362362
#expect(tokenizer.tokenize(text: "<s>Who are you?</s>") == ["<s>", "Who", "Ġare", "Ġyou", "?", "</s>"])
363363
#expect(tokenizer.encode(text: "<s>Who are you?</s>") == [0, 0, 12375, 32, 47, 116, 2, 2])
364364
}
365+
366+
@Test
367+
func tokenizerBackend() async throws {
368+
let tokenizerOpt = try await AutoTokenizer.from(pretrained: "mlx-community/Ministral-3-3B-Instruct-2512-4bit") as? PreTrainedTokenizer
369+
#expect(tokenizerOpt != nil)
370+
let tokenizer = tokenizerOpt!
371+
372+
#expect(tokenizer.encode(text: "She took a train to the West") == [6284, 5244, 1261, 10018, 1317, 1278, 5046])
373+
}
365374
}

0 commit comments

Comments
 (0)