-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathEncoderProviderTest.php
75 lines (59 loc) · 2.29 KB
/
EncoderProviderTest.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
declare(strict_types=1);
namespace Yethee\Tiktoken\Tests;
use org\bovigo\vfs\vfsStream;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
use Yethee\Tiktoken\EncoderProvider;
use function dirname;
use function hash;
final class EncoderProviderTest extends TestCase
{
/**
* @param non-empty-string $modelName
* @param non-empty-string $encoding
*/
#[DataProvider('getEncoderForModelProvider')]
public function testGetEncoderForModel(string $modelName, string $encoding): void
{
$provider = new EncoderProvider();
$provider->setVocabCache(dirname(__DIR__) . '/.cache/vocab');
$encoder = $provider->getForModel($modelName);
self::assertSame($encoding, $encoder->getEncoding());
}
public function testEncode(): void
{
$provider = new EncoderProvider();
$provider->setVocabCache(dirname(__DIR__) . '/.cache/vocab');
$encoder = $provider->get('p50k_base');
self::assertSame([31373, 995], $encoder->encode('hello world'));
$encoder = $provider->get('cl100k_base');
self::assertSame([15339, 1917], $encoder->encode('hello world'));
}
public function testUseHashWhenLoadVocab(): void
{
$cache = vfsStream::setup('cache');
$vocabCacheFilename = hash('sha1', EncoderProvider::ENCODINGS['p50k_base']['vocab']);
$cacheFile = vfsStream::newFile($vocabCacheFilename)
->withContent('broken cache')
->at($cache);
$provider = new EncoderProvider();
/** @psalm-suppress ArgumentTypeCoercion */
$provider->setVocabCache($cache->url());
$provider->get('p50k_base');
self::assertNotEquals('broken cache', $cacheFile->getContent());
}
/**
* @return iterable<array{non-empty-string, non-empty-string}>
*
* @psalm-api
*/
public static function getEncoderForModelProvider(): iterable
{
yield 'text-davinci-003' => ['text-davinci-003', 'p50k_base'];
yield 'text-davinci-edit-001' => ['text-davinci-edit-001', 'p50k_edit'];
yield 'gpt-3.5-turbo-0301' => ['gpt-3.5-turbo-0301', 'cl100k_base'];
yield 'gpt-4-32k' => ['gpt-4-32k', 'cl100k_base'];
yield 'gpt-4o-2024-05-13' => ['gpt-4o-2024-05-13', 'o200k_base'];
}
}