1
- import { PreTrainedTokenizer } from ' https://cdn.jsdelivr.net/npm/@huggingface/[email protected] ' ;
1
+ import { PreTrainedTokenizer } from " https://cdn.jsdelivr.net/npm/@huggingface/[email protected] " ;
2
2
3
3
// import 'phonemize' code from Kokoro.js repo
4
- import { phonemize } from ' ./phonemizer.js' ;
4
+ import { phonemize } from " ./phonemizer.js" ;
5
5
6
6
const { RawTensor, RawSession } = Supabase . ai ;
7
7
@@ -15,95 +15,97 @@ const SAMPLE_RATE = 24000; // 24 kHz
15
15
> The size of s and c is 256 × 1
16
16
*/
17
17
const STYLE_DIM = 256 ;
18
- const MODEL_ID = ' onnx-community/Kokoro-82M-ONNX' ;
18
+ const MODEL_ID = " onnx-community/Kokoro-82M-ONNX" ;
19
19
20
20
// https://huggingface.co/onnx-community/Kokoro-82M-ONNX#samples
21
21
const ALLOWED_VOICES = [
22
- ' af_bella' ,
23
- ' af_nicole' ,
24
- ' af_sarah' ,
25
- ' af_sky' ,
26
- ' am_adam' ,
27
- ' am_michael' ,
28
- ' bf_emma' ,
29
- ' bf_isabella' ,
30
- ' bm_george' ,
31
- ' bm_lewis' ,
22
+ " af_bella" ,
23
+ " af_nicole" ,
24
+ " af_sarah" ,
25
+ " af_sky" ,
26
+ " am_adam" ,
27
+ " am_michael" ,
28
+ " bf_emma" ,
29
+ " bf_isabella" ,
30
+ " bm_george" ,
31
+ " bm_lewis" ,
32
32
] ;
33
33
34
34
const session = await RawSession . fromHuggingFace ( MODEL_ID ) ;
35
35
36
36
Deno . serve ( async ( req ) => {
37
- const params = new URL ( req . url ) . searchParams ;
38
- const text = params . get ( ' text' ) ?? ' Hello from Supabase!' ;
39
- const voice = params . get ( ' voice' ) ?? ' af_bella' ;
40
-
41
- if ( ! ALLOWED_VOICES . includes ( voice ) ) {
42
- return Response . json ( {
43
- error : `invalid voice '${ voice } '` ,
44
- must_be_one_of : ALLOWED_VOICES ,
45
- } , { status : 400 } ) ;
46
- }
47
-
48
- const tokenizer = await loadTokenizer ( ) ;
49
- const language = voice . at ( 0 ) ; // 'a'merican | 'b'ritish
50
- const phonemes = await phonemize ( text , language ) ;
51
- const { input_ids } = tokenizer ( phonemes , {
52
- truncation : true ,
53
- } ) ;
54
-
55
- // Select voice style based on number of input tokens
56
- const num_tokens = Math . max (
57
- input_ids . dims . at ( - 1 ) - 2 , // Without padding;
58
- 0 ,
59
- ) ;
60
-
61
- const voiceStyle = await loadVoiceStyle ( voice , num_tokens ) ;
62
-
63
- const { waveform } = await session . run ( {
64
- input_ids,
65
- style : voiceStyle ,
66
- speed : new Tensor ( ' float32' , [ 1 ] , [ 1 ] ) ,
67
- } ) ;
68
-
69
- // Do `wave` encoding from rust backend
70
- const audio = await waveform . tryEncodeAudio ( SAMPLE_RATE ) ;
71
-
72
- return new Response ( audio , {
73
- headers : {
74
- ' Content-Type' : ' audio/wav' ,
75
- } ,
76
- } ) ;
37
+ const params = new URL ( req . url ) . searchParams ;
38
+ const text = params . get ( " text" ) ?? " Hello from Supabase!" ;
39
+ const voice = params . get ( " voice" ) ?? " af_bella" ;
40
+
41
+ if ( ! ALLOWED_VOICES . includes ( voice ) ) {
42
+ return Response . json ( {
43
+ error : `invalid voice '${ voice } '` ,
44
+ must_be_one_of : ALLOWED_VOICES ,
45
+ } , { status : 400 } ) ;
46
+ }
47
+
48
+ const tokenizer = await loadTokenizer ( ) ;
49
+ const language = voice . at ( 0 ) ; // 'a'merican | 'b'ritish
50
+ const phonemes = await phonemize ( text , language ) ;
51
+ const { input_ids } = tokenizer ( phonemes , {
52
+ truncation : true ,
53
+ } ) ;
54
+
55
+ // Select voice style based on number of input tokens
56
+ const num_tokens = Math . max (
57
+ input_ids . dims . at ( - 1 ) - 2 , // Without padding;
58
+ 0 ,
59
+ ) ;
60
+
61
+ const voiceStyle = await loadVoiceStyle ( voice , num_tokens ) ;
62
+
63
+ const { waveform } = await session . run ( {
64
+ input_ids,
65
+ style : voiceStyle ,
66
+ speed : new Tensor ( " float32" , [ 1 ] , [ 1 ] ) ,
67
+ } ) ;
68
+
69
+ // Do `wave` encoding from rust backend
70
+ const audio = await waveform . tryEncodeAudio ( SAMPLE_RATE ) ;
71
+
72
+ return new Response ( audio , {
73
+ headers : {
74
+ " Content-Type" : " audio/wav" ,
75
+ } ,
76
+ } ) ;
77
77
} ) ;
78
78
79
79
async function loadVoiceStyle ( voice : string , num_tokens : number ) {
80
- const voice_url =
81
- `https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices/${ voice } .bin?download=true` ;
80
+ const voice_url =
81
+ `https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices/${ voice } .bin?download=true` ;
82
82
83
- console . log ( ' loading voice:' , voice_url ) ;
83
+ console . log ( " loading voice:" , voice_url ) ;
84
84
85
- const voiceBuffer = await fetch ( voice_url ) . then ( async ( res ) => await res . arrayBuffer ( ) ) ;
85
+ const voiceBuffer = await fetch ( voice_url ) . then ( async ( res ) =>
86
+ await res . arrayBuffer ( )
87
+ ) ;
86
88
87
- const offset = num_tokens * STYLE_DIM ;
88
- const voiceData = new Float32Array ( voiceBuffer ) . slice (
89
- offset ,
90
- offset + STYLE_DIM ,
91
- ) ;
89
+ const offset = num_tokens * STYLE_DIM ;
90
+ const voiceData = new Float32Array ( voiceBuffer ) . slice (
91
+ offset ,
92
+ offset + STYLE_DIM ,
93
+ ) ;
92
94
93
- return new Tensor ( ' float32' , voiceData , [ 1 , STYLE_DIM ] ) ;
95
+ return new Tensor ( " float32" , voiceData , [ 1 , STYLE_DIM ] ) ;
94
96
}
95
97
96
98
async function loadTokenizer ( ) {
97
- // BUG: invalid 'h' not JSON. That's why we need to manually fetch the assets
98
- // const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
99
+ // BUG: invalid 'h' not JSON. That's why we need to manually fetch the assets
100
+ // const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
99
101
100
- const tokenizerData = await fetch (
101
- ' https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer.json?download=true' ,
102
- ) . then ( async ( res ) => await res . json ( ) ) ;
102
+ const tokenizerData = await fetch (
103
+ " https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer.json?download=true" ,
104
+ ) . then ( async ( res ) => await res . json ( ) ) ;
103
105
104
- const tokenizerConfig = await fetch (
105
- ' https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer_config.json?download=true' ,
106
- ) . then ( async ( res ) => await res . json ( ) ) ;
106
+ const tokenizerConfig = await fetch (
107
+ " https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer_config.json?download=true" ,
108
+ ) . then ( async ( res ) => await res . json ( ) ) ;
107
109
108
- return new PreTrainedTokenizer ( tokenizerData , tokenizerConfig ) ;
110
+ return new PreTrainedTokenizer ( tokenizerData , tokenizerConfig ) ;
109
111
}
0 commit comments