Skip to content

Commit 64be111

Browse files
authored
Merge pull request #6 from chengsokdara/feat/real-time-transcription
Add Real-Time transcription support
2 parents 03e43e0 + b29781f commit 64be111

File tree

6 files changed

+153
-95
lines changed

6 files changed

+153
-95
lines changed

CHANGELOG.md

+17-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10-
## [0.0.13] - 2023-04-01
10+
## [0.2.0] - 2023-04-01
1111

1212
### Added
1313

@@ -18,15 +18,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1818
- able to run custom ffmpeg command
1919
- expose onTranscribing event
2020

21+
## [0.1.0] - 2023-03-11
22+
23+
### Added
24+
25+
- streaming option for real-time trascription
26+
- timeSlice option to control onDataAvailable event
27+
- onDataAvaiable option for getting recorded blob in interval based on timeSlice
28+
29+
### Changed
30+
31+
- recording in higher audio quality to help Whisper in transcription
32+
33+
### Removed
34+
35+
- customServer option, deprecated since 0.0.11
36+
2137
## [0.0.12] - 2023-03-09
2238

2339
### Changed
2440

2541
- autoTranscribe default to true
2642
- update examples in README.md
2743

28-
### Added
29-
3044
## [0.0.11] - 2023-03-08
3145

3246
### Added

README.md

+32-28
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# useWhisper()
1+
# useWhisper
22

3-
React Hook for OpenAI Whisper API with speech recorder and silence removal built-in
3+
React Hook for OpenAI Whisper API with speech recorder, real-time transcription and silence removal built-in
44

55
---
66

@@ -22,8 +22,6 @@ _Try OpenAI API price calculator, token counter, and dataset manager (preview)_
2222

2323
- ### Usage
2424

25-
- ###### Provide your own OpenAI API key
26-
2725
```jsx
2826
import { useWhisper } from '@chengsokdara/use-whisper'
2927

@@ -54,9 +52,7 @@ const App = () => {
5452
}
5553
```
5654

57-
_**NOTE:** by providing apiKey, it could be exposed in the browser devtool network tab_
58-
59-
- ###### Custom REST API (if you want to keep your OpenAI API key secure)
55+
- ###### Custom Server (keep OpenAI API token secure)
6056

6157
```jsx
6258
import { useWhisper } from '@chengsokdara/use-whisper'
@@ -105,16 +101,19 @@ const App = () => {
105101

106102
- ### Examples
107103

108-
- ###### Remove silence before sending to Whisper to save cost
104+
- ###### Real-time streaming trascription
109105

110106
```jsx
111107
import { useWhisper } from '@chengsokdara/use-whisper'
112108

113109
const App = () => {
114110
const { transcript } = useWhisper({
115111
apiKey: env.process.OPENAI_API_TOKEN, // YOUR_OPEN_AI_TOKEN
116-
// use ffmpeg-wasp to remove silence from recorded speech
117-
removeSilence: true,
112+
streaming: true,
113+
timeSlice: 1_000, // 1 second
114+
whisperConfig: {
115+
language: 'en',
116+
},
118117
})
119118

120119
return (
@@ -125,16 +124,16 @@ const App = () => {
125124
}
126125
```
127126

128-
- ###### Auto start recording on component mounted
127+
- ###### Remove silence before sending to Whisper to save cost
129128

130129
```jsx
131130
import { useWhisper } from '@chengsokdara/use-whisper'
132131

133132
const App = () => {
134133
const { transcript } = useWhisper({
135134
apiKey: env.process.OPENAI_API_TOKEN, // YOUR_OPEN_AI_TOKEN
136-
// will auto start recording speech upon component mounted
137-
autoStart: true,
135+
// use ffmpeg-wasp to remove silence from recorded speech
136+
removeSilence: true,
138137
})
139138

140139
return (
@@ -145,16 +144,16 @@ const App = () => {
145144
}
146145
```
147146

148-
- ###### Keep recording as long as the user is speaking
147+
- ###### Auto start recording on component mounted
149148

150149
```jsx
151150
import { useWhisper } from '@chengsokdara/use-whisper'
152151

153152
const App = () => {
154153
const { transcript } = useWhisper({
155154
apiKey: env.process.OPENAI_API_TOKEN, // YOUR_OPEN_AI_TOKEN
156-
nonStop: true, // keep recording as long as the user is speaking
157-
stopTimeout: 5000, // auto stop after 5 seconds
155+
// will auto start recording speech upon component mounted
156+
autoStart: true,
158157
})
159158

160159
return (
@@ -165,15 +164,16 @@ const App = () => {
165164
}
166165
```
167166

168-
- ###### Auto transcribe speech when recorder stopped
167+
- ###### Keep recording as long as the user is speaking
169168

170169
```jsx
171170
import { useWhisper } from '@chengsokdara/use-whisper'
172171

173172
const App = () => {
174173
const { transcript } = useWhisper({
175174
apiKey: env.process.OPENAI_API_TOKEN, // YOUR_OPEN_AI_TOKEN
176-
autoTranscribe: true, // will try to automatically transcribe speech
175+
nonStop: true, // keep recording as long as the user is speaking
176+
stopTimeout: 5000, // auto stop after 5 seconds
177177
})
178178

179179
return (
@@ -192,6 +192,7 @@ import { useWhisper } from '@chengsokdara/use-whisper'
192192
const App = () => {
193193
const { transcript } = useWhisper({
194194
apiKey: env.process.OPENAI_API_TOKEN, // YOUR_OPEN_AI_TOKEN
195+
autoTranscribe: true,
195196
whisperConfig: {
196197
prompt: 'previous conversation', // you can pass previous conversation for context
197198
response_format: 'text', // output text instead of json
@@ -222,16 +223,19 @@ _most of these dependecies are lazy loaded, so it is only imported when it is ne
222223

223224
- ###### Config Object
224225

225-
| Name | Type | Default Value | Description |
226-
| -------------- | -------------------------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------- |
227-
| apiKey | string | '' | your OpenAI API token |
228-
| autoStart | boolean | false | auto start speech recording on component mount |
229-
| autoTranscribe | boolean | true | should auto transcribe after stop recording |
230-
| nonStop | boolean | false | if true, record will auto stop after stopTimeout. However if user keep on speaking, the recorder will keep recording |
231-
| removeSilence | boolean | false | remove silence before sending file to OpenAI API |
232-
| stopTimeout | number | 5,000 ms | if nonStop is true, this become required. This control when the recorder auto stop |
233-
| whisperConfig | [WhisperApiConfig](#whisperapiconfig) | undefined | Whisper API transcription config |
234-
| onTranscribe | (blob: Blob) => Promise<[Transcript](#transcript)> | undefined | callback function to handle transcription on your own custom server |
226+
| Name | Type | Default Value | Description |
227+
| --------------- | -------------------------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------- |
228+
| apiKey | string | '' | your OpenAI API token |
229+
| autoStart | boolean | false | auto start speech recording on component mount |
230+
| autoTranscribe | boolean | true | should auto transcribe after stop recording |
231+
| nonStop | boolean | false | if true, record will auto stop after stopTimeout. However if user keep on speaking, the recorder will keep recording |
232+
| removeSilence | boolean | false | remove silence before sending file to OpenAI API |
233+
| stopTimeout | number | 5,000 ms | if nonStop is true, this become required. This control when the recorder auto stop |
234+
| streaming | boolean | false | transcribe speech in real-time based on timeSlice |
235+
| timeSlice | number | 2000 ms | interval between each onDataAvailable event |
236+
| whisperConfig | [WhisperApiConfig](#whisperapiconfig) | undefined | Whisper API transcription config |
237+
| onDataAvailable | (blob: Blob) => void | undefined | callback function for getting recorded blob in interval between timeSlice |
238+
| onTranscribe | (blob: Blob) => Promise<[Transcript](#transcript)> | undefined | callback function to handle transcription on your own custom server |
235239

236240
- ###### WhisperApiConfig
237241

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"prepublishOnly": "yarn run build-prod"
3333
},
3434
"dependencies": {
35-
"@chengsokdara/react-hooks-async": "^0.0.1",
35+
"@chengsokdara/react-hooks-async": "^0.0.2",
3636
"@ffmpeg/ffmpeg": "^0.11.6",
3737
"axios": "^1.3.4",
3838
"hark": "^1.2.3",

src/types.ts

+3-8
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ export type UseWhisperConfig = {
22
apiKey?: string
33
autoStart?: boolean
44
autoTranscribe?: boolean
5-
/** @deprecated: use {@link UseWhisperConfig.onTranscribe} instead */
6-
customServer?: string
75
nonStop?: boolean
86
removeSilence?: boolean
97
stopTimeout?: number
8+
streaming?: boolean
9+
timeSlice?: number
1010
whisperConfig?: WhisperApiConfig
11+
onDataAvailable?: (blob: Blob) => void
1112
onTranscribe?: (blob: Blob) => Promise<UseWhisperTranscript>
1213
}
1314

@@ -32,12 +33,6 @@ export type UseWhisperReturn = {
3233

3334
export type UseWhisperHook = (config?: UseWhisperConfig) => UseWhisperReturn
3435

35-
/** @deprecated along with {@link UseWhisperConfig.customServer} */
36-
export type CustomServerRequestBody = {
37-
file: string | ArrayBuffer | null
38-
model: 'whisper-1' | string
39-
}
40-
4136
export type WhisperApiConfig = {
4237
model?: 'whisper-1' | string
4338
prompt?: string

0 commit comments

Comments
 (0)