Skip to content

Commit bd6fc01

Browse files
committed
stop sending audio to Whisper if it is empty
1 parent f112fab commit bd6fc01

10 files changed

+399
-298
lines changed

.github/workflows/npm-publish.yml

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# This workflow will run tests using node and then publish a package to GitHub Packages when a release is created
2+
# For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages
3+
4+
name: Publish to npmjs.com
5+
6+
on:
7+
release:
8+
types: [published]
9+
10+
jobs:
11+
build:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v3
15+
- uses: actions/setup-node@v3
16+
with:
17+
node-version: 16
18+
- run: yarn
19+
- run: yarn test
20+
21+
publish-npm:
22+
needs: build
23+
runs-on: ubuntu-latest
24+
steps:
25+
- uses: actions/checkout@v3
26+
- uses: actions/setup-node@v3
27+
with:
28+
node-version: 16
29+
registry-url: https://registry.npmjs.org/
30+
- run: yarn
31+
- run: yarn publish
32+
env:
33+
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}

CHANGELOG.md

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7+
8+
## [Unreleased]
9+
10+
## [0.0.11] - 2023-04-01
11+
12+
### Added
13+
14+
- react-native support
15+
- demo web app
16+
- server app example
17+
18+
## [0.0.10] - 2023-03-08
19+
20+
### Added
21+
22+
- this changelog file
23+
- comments to every functions and variables
24+
- @chengsokdara/react-hooks-async for useCallbackAsync and useEffectAsync
25+
- add Github Actions CI/CD
26+
27+
### Fixed
28+
29+
- now will not send audio to Whisper if converted mp3 audio is empty
30+
31+
### Changed
32+
33+
- transcripting state changed to transcribing to make it a valid word
34+
- move constants string to configs.ts
35+
36+
### Removed
37+
38+
- hooks directory, now use @chengsokdara/react-hooks-async package instead
39+
- remove console.log from distribution build

README.md

+24-18
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
# useWhisper
1+
# useWhisper()
22

33
React Hook for OpenAI Whisper API with speech recorder and silence removal built-in
44

55
---
66

7-
_Try OpenAI API price calculator, token counter, and dataset manager (preview)_
7+
_Try OpenAI API price calculator, token counter, and dataset manager (preview)_
88
[https://openai-price-calculator.web.app](https://openai-price-calculator.web.app)
99

1010
- ### Install
@@ -24,8 +24,8 @@ const App = () => {
2424
const {
2525
recording,
2626
speaking,
27+
transcribing,
2728
transcript,
28-
transcripting,
2929
pauseRecording,
3030
startRecording,
3131
stopRecording,
@@ -37,7 +37,7 @@ const App = () => {
3737
<div>
3838
<p>Recording: {recording}</p>
3939
<p>Speaking: {speaking}</p>
40-
<p>Transcripting: {transcripting}</p>
40+
<p>Transcribing: {transcribing}</p>
4141
<p>Transcribed Text: {transcript.text}</p>
4242
<button onClick={() => startRecording()}>Start</button>
4343
<button onClick={() => pauseRecording()}>Pause</button>
@@ -117,7 +117,6 @@ import { useWhisper } from '@chengsokdara/use-whisper'
117117
const App = () => {
118118
const { transcript } = useWhisper({
119119
// will auto start recording speech upon component mounted
120-
//
121120
autoStart: true,
122121
})
123122

@@ -151,10 +150,13 @@ const App = () => {
151150

152151
- ### Dependencies
153152

154-
- **recordrtc:** cross-browser audio recorder
155-
- **@ffmpeg/ffmpeg:** for remove silence feature
156-
- **hark:** for speaking detection
157-
- **axios:** since fetch does not work with Whisper API
153+
most of these dependecies are lazy loaded, so it is only imported when it is needed
154+
155+
- **@chengsokdara/react-hooks-async** asynchronous react hooks
156+
- **recordrtc:** cross-browser audio recorder
157+
- **@ffmpeg/ffmpeg:** for silence removal feature
158+
- **hark:** for speaking detection
159+
- **axios:** since fetch does not work with Whisper endpoint
158160

159161
- ### API
160162

@@ -171,15 +173,15 @@ const App = () => {
171173

172174
- ###### Return Object
173175

174-
| Name | Type | Description |
175-
| -------------- | ----------------------------- | ----------------------------------------------------------------- |
176-
| recording | boolean | speech recording state |
177-
| speaking | boolean | detect when user is speaking |
178-
| transcript | [**Transcript**](#transcript) | object return after Whisper transcription complete |
179-
| transcripting | boolean | remove silence from speech and send request to OpenAI Whisper API |
180-
| pauseRecording | Promise | pause speech recording |
181-
| startRecording | Promise | start speech recording |
182-
| stopRecording | Promise | stop speech recording |
176+
| Name | Type | Description |
177+
| -------------- | ----------------------------- | ------------------------------------------------------------------------- |
178+
| recording | boolean | speech recording state |
179+
| speaking | boolean | detect when user is speaking |
180+
| transcribing | boolean | while removing silence from speech and send request to OpenAI Whisper API |
181+
| transcript | [**Transcript**](#transcript) | object return after Whisper transcription complete |
182+
| pauseRecording | Promise | pause speech recording |
183+
| startRecording | Promise | start speech recording |
184+
| stopRecording | Promise | stop speech recording |
183185

184186
- ###### Transcript
185187

@@ -188,6 +190,10 @@ const App = () => {
188190
| blob | Blob | recorded speech in JavaScript Blob |
189191
| text | string | transcribed text returned from Whisper API |
190192

193+
- ### Roadmap
194+
195+
- react-native support, will be export as use-whisper/native
196+
191197
---
192198

193199
**_Contact me for web or mobile app development using React or React Native_**

package.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@
2323
"author": "Sokdara Cheng <[email protected]> (https://chengsokdara.github.io)",
2424
"repository": "github:chengsokdara/use-whisper",
2525
"scripts": {
26-
"ts-types": "tsc",
27-
"build-dev": "rimraf lib && env NODE_ENV=development tsup",
28-
"build-prod": "rimraf dist && env NODE_ENV=production tsup",
29-
"build-watch": "rimraf lib && env NODE_ENV=development tsup --watch",
26+
"build-dev": "env NODE_ENV=development tsup",
27+
"build-prod": "env NODE_ENV=production tsup",
3028
"lint-fix": "eslint ./src --ext .ts,.tsx --quiet --fix --ignore-path ./.gitignore",
31-
"lint-format": "prettier --loglevel warn --write \"./**/*.{ts,tsx,css,md,json}\" ",
32-
"lint": "yarn lint-format && yarn lint-fix ",
29+
"lint-format": "prettier --loglevel warn --write \"./**/*.{ts,tsx,css,md,json}\"",
30+
"lint": "yarn lint-format && yarn lint-fix",
31+
"test": "yarn run tsc && yarn run lint",
3332
"prepublishOnly": "yarn run build-prod"
3433
},
3534
"dependencies": {
35+
"@chengsokdara/react-hooks-async": "^0.0.1",
3636
"@ffmpeg/ffmpeg": "^0.11.6",
3737
"axios": "^1.3.4",
3838
"hark": "^1.2.3",

src/configs.ts

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
export const defaultStopTimeout = 5_000
2+
3+
export const ffmpegCoreUrl =
4+
'https://unpkg.com/@ffmpeg/[email protected]/dist/ffmpeg-core.js'
5+
6+
export const silenceRemoveCommand =
7+
'silenceremove=start_periods=1:stop_periods=-1:start_threshold=-30dB:stop_threshold=-30dB:start_silence=2:stop_silence=2'
8+
9+
export const whisperApiEndpoint =
10+
'https://api.openai.com/v1/audio/transcriptions'

src/hooks.ts

-79
This file was deleted.

src/types.ts

+2-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ export type UseWhisperConfig = {
22
apiKey?: string
33
autoStart?: boolean
44
customServer?: string
5-
// pauseTimeout?: number
65
nonStop?: boolean
76
removeSilence?: boolean
87
stopTimeout?: number
@@ -14,20 +13,19 @@ export type CustomServerRequestBody = {
1413
}
1514

1615
export type UseWhisperTimeout = {
17-
pause?: NodeJS.Timeout
1816
stop?: NodeJS.Timeout
1917
}
2018

2119
export type UseWhisperTranscript = {
2220
blob: Blob
23-
text: string
21+
text?: string
2422
}
2523

2624
export type UseWhisperReturn = {
2725
recording: boolean
2826
speaking: boolean
27+
transcribing: boolean
2928
transcript?: UseWhisperTranscript
30-
transcripting: boolean
3129
pauseRecording: () => Promise<void>
3230
startRecording: () => Promise<void>
3331
stopRecording: () => Promise<void>

0 commit comments

Comments
 (0)