Skip to content

Commit 07616a5

Browse files
committed
add mode option between transcriptions or translations
1 parent ea93123 commit 07616a5

File tree

5 files changed

+35
-19
lines changed

5 files changed

+35
-19
lines changed

CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1818
- able to run custom ffmpeg command
1919
- expose onTranscribing event
2020

21+
## [0.1.1] - 2023-03-14
22+
23+
### Added
24+
25+
- add mode option for Whisper API, choose either transcriptions or translations
26+
(currently only support translation to English)
27+
28+
### Changed
29+
30+
- default timeSlice from 2000ms to 1000ms
31+
2132
## [0.1.0] - 2023-03-11
2233

2334
### Added

README.md

+14-13
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,20 @@ _most of these dependecies are lazy loaded, so it is only imported when it is ne
232232

233233
- ###### Config Object
234234

235-
| Name | Type | Default Value | Description |
236-
| --------------- | -------------------------------------------------- | ------------- | -------------------------------------------------------------------------------------------------------------------- |
237-
| apiKey | string | '' | your OpenAI API token |
238-
| autoStart | boolean | false | auto start speech recording on component mount |
239-
| autoTranscribe | boolean | true | should auto transcribe after stop recording |
240-
| nonStop | boolean | false | if true, record will auto stop after stopTimeout. However if user keep on speaking, the recorder will keep recording |
241-
| removeSilence | boolean | false | remove silence before sending file to OpenAI API |
242-
| stopTimeout | number | 5,000 ms | if nonStop is true, this become required. This control when the recorder auto stop |
243-
| streaming | boolean | false | transcribe speech in real-time based on timeSlice |
244-
| timeSlice | number | 2000 ms | interval between each onDataAvailable event |
245-
| whisperConfig | [WhisperApiConfig](#whisperapiconfig) | undefined | Whisper API transcription config |
246-
| onDataAvailable | (blob: Blob) => void | undefined | callback function for getting recorded blob in interval between timeSlice |
247-
| onTranscribe | (blob: Blob) => Promise<[Transcript](#transcript)> | undefined | callback function to handle transcription on your own custom server |
235+
| Name | Type | Default Value | Description |
236+
| --------------- | -------------------------------------------------- | -------------- | -------------------------------------------------------------------------------------------------------------------- |
237+
| apiKey | string | '' | your OpenAI API token |
238+
| autoStart | boolean | false | auto start speech recording on component mount |
239+
| autoTranscribe | boolean | true | should auto transcribe after stop recording |
240+
| mode | string | transcriptions | control Whisper mode either transcriptions or translations, currently only support translation to English |
241+
| nonStop | boolean | false | if true, record will auto stop after stopTimeout. However if user keep on speaking, the recorder will keep recording |
242+
| removeSilence | boolean | false | remove silence before sending file to OpenAI API |
243+
| stopTimeout | number | 5,000 ms | if nonStop is true, this become required. This control when the recorder auto stop |
244+
| streaming | boolean | false | transcribe speech in real-time based on timeSlice |
245+
| timeSlice | number | 1000 ms | interval between each onDataAvailable event |
246+
| whisperConfig | [WhisperApiConfig](#whisperapiconfig) | undefined | Whisper API transcription config |
247+
| onDataAvailable | (blob: Blob) => void | undefined | callback function for getting recorded blob in interval between timeSlice |
248+
| onTranscribe | (blob: Blob) => Promise<[Transcript](#transcript)> | undefined | callback function to handle transcription on your own custom server |
248249

249250
- ###### WhisperApiConfig
250251

src/configs.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,4 @@ export const ffmpegCoreUrl =
66
export const silenceRemoveCommand =
77
'silenceremove=start_periods=1:stop_periods=-1:start_threshold=-30dB:stop_threshold=-30dB:start_silence=2:stop_silence=2'
88

9-
export const whisperApiEndpoint =
10-
'https://api.openai.com/v1/audio/transcriptions'
9+
export const whisperApiEndpoint = 'https://api.openai.com/v1/audio/'

src/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ export type UseWhisperConfig = {
22
apiKey?: string
33
autoStart?: boolean
44
autoTranscribe?: boolean
5+
mode?: 'transcriptions' | 'translations'
56
nonStop?: boolean
67
removeSilence?: boolean
78
stopTimeout?: number

src/useWhisper.ts

+8-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {
1313
silenceRemoveCommand,
1414
whisperApiEndpoint,
1515
} from './configs'
16-
import type {
16+
import {
1717
UseWhisperConfig,
1818
UseWhisperHook,
1919
UseWhisperTimeout,
@@ -27,11 +27,12 @@ const defaultConfig: UseWhisperConfig = {
2727
apiKey: '',
2828
autoStart: false,
2929
autoTranscribe: true,
30+
mode: 'transcriptions',
3031
nonStop: false,
3132
removeSilence: false,
3233
stopTimeout: defaultStopTimeout,
3334
streaming: false,
34-
timeSlice: 2_000,
35+
timeSlice: 1_000,
3536
onDataAvailable: undefined,
3637
onTranscribe: undefined,
3738
}
@@ -59,6 +60,7 @@ export const useWhisper: UseWhisperHook = (config) => {
5960
apiKey,
6061
autoStart,
6162
autoTranscribe,
63+
mode,
6264
nonStop,
6365
removeSilence,
6466
stopTimeout,
@@ -462,7 +464,9 @@ export const useWhisper: UseWhisperHook = (config) => {
462464
const body = new FormData()
463465
body.append('file', file)
464466
body.append('model', 'whisper-1')
465-
body.append('language', whisperConfig?.language ?? 'en')
467+
if (mode === 'transcriptions') {
468+
body.append('language', whisperConfig?.language ?? 'en')
469+
}
466470
if (whisperConfig?.prompt) {
467471
body.append('prompt', whisperConfig.prompt)
468472
}
@@ -478,7 +482,7 @@ export const useWhisper: UseWhisperHook = (config) => {
478482
headers['Authorization'] = `Bearer ${apiKey}`
479483
}
480484
const { default: axios } = await import('axios')
481-
const response = await axios.post(whisperApiEndpoint, body, {
485+
const response = await axios.post(whisperApiEndpoint + mode, body, {
482486
headers,
483487
})
484488
return response.data.text

0 commit comments

Comments
 (0)