Skip to content

Commit 8736de7

Browse files
authored
Merge pull request #146 from Cloud-Code-AI/141-add-loader-for-model
fix: added model loading percentage in chrome
2 parents de738b3 + a85682c commit 8736de7

File tree

4 files changed

+179
-22
lines changed

4 files changed

+179
-22
lines changed

examples/tts-demo/src/App.tsx

+130-10
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,79 @@ const Status = styled.div`
108108
gap: 8px;
109109
`;
110110

111+
const ButtonGroup = styled.div`
112+
display: flex;
113+
gap: 1rem;
114+
justify-content: center;
115+
`;
116+
117+
const Select = styled.select`
118+
width: 100%;
119+
padding: 0.8rem;
120+
border-radius: 8px;
121+
background: #2a2a2a;
122+
color: white;
123+
border: 1px solid #444;
124+
margin-bottom: 1rem;
125+
126+
&:focus {
127+
outline: none;
128+
border-color: #4CAF50;
129+
}
130+
`;
131+
132+
const InputGroup = styled.div`
133+
display: flex;
134+
gap: 1rem;
135+
margin-bottom: 1rem;
136+
`;
137+
138+
const RangeInput = styled.input`
139+
width: 100%;
140+
background: #2a2a2a;
141+
-webkit-appearance: none;
142+
height: 8px;
143+
border-radius: 4px;
144+
margin: 10px 0;
145+
146+
&::-webkit-slider-thumb {
147+
-webkit-appearance: none;
148+
width: 20px;
149+
height: 20px;
150+
background: #4CAF50;
151+
border-radius: 50%;
152+
cursor: pointer;
153+
}
154+
`;
155+
156+
const Label = styled.label`
157+
color: #888;
158+
margin-bottom: 0.5rem;
159+
display: block;
160+
`;
161+
162+
const VOICE_OPTIONS = [
163+
{ id: 'af_bella', name: 'Bella', language: 'en-us', gender: 'Female' },
164+
{ id: 'af_nicole', name: 'Nicole', language: 'en-us', gender: 'Female' },
165+
{ id: 'af_sarah', name: 'Sarah', language: 'en-us', gender: 'Female' },
166+
{ id: 'af_sky', name: 'Sky', language: 'en-us', gender: 'Female' },
167+
{ id: 'am_adam', name: 'Adam', language: 'en-us', gender: 'Male' },
168+
{ id: 'am_michael', name: 'Michael', language: 'en-us', gender: 'Male' },
169+
{ id: 'bf_emma', name: 'Emma', language: 'en-gb', gender: 'Female' },
170+
{ id: 'bf_isabella', name: 'Isabella', language: 'en-gb', gender: 'Female' },
171+
{ id: 'bm_george', name: 'George', language: 'en-gb', gender: 'Male' },
172+
{ id: 'bm_lewis', name: 'Lewis', language: 'en-gb', gender: 'Male' },
173+
];
174+
111175
function App() {
112176
const [text, setText] = useState('');
113177
const [status, setStatus] = useState('');
114178
const [isLoading, setIsLoading] = useState(false);
115179
const [ttsAI] = useState(new BrowserAI());
116180
const [isModelLoaded, setIsModelLoaded] = useState(false);
181+
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
182+
const [selectedVoice, setSelectedVoice] = useState('af_bella');
183+
const [speed, setSpeed] = useState(1.0);
117184

118185
const loadModel = async () => {
119186
try {
@@ -145,6 +212,7 @@ function App() {
145212
if (audioData) {
146213
// Create a blob with WAV MIME type
147214
const blob = new Blob([audioData], { type: 'audio/wav' });
215+
setAudioBlob(blob); // Store the blob for download
148216
const audioUrl = URL.createObjectURL(blob);
149217

150218
// Create and play audio element
@@ -173,6 +241,19 @@ function App() {
173241
}
174242
};
175243

244+
const downloadAudio = () => {
245+
if (audioBlob) {
246+
const url = URL.createObjectURL(audioBlob);
247+
const a = document.createElement('a');
248+
a.href = url;
249+
a.download = 'generated-speech.wav';
250+
document.body.appendChild(a);
251+
a.click();
252+
document.body.removeChild(a);
253+
URL.revokeObjectURL(url);
254+
}
255+
};
256+
176257
return (
177258
<>
178259
<Banner>
@@ -197,23 +278,62 @@ function App() {
197278
</ButtonContent>
198279
</Button>
199280

281+
<InputGroup>
282+
<div style={{ flex: 1 }}>
283+
<Label>Voice</Label>
284+
<Select
285+
value={selectedVoice}
286+
onChange={(e) => setSelectedVoice(e.target.value)}
287+
disabled={!isModelLoaded || isLoading}
288+
>
289+
{VOICE_OPTIONS.map(voice => (
290+
<option key={voice.id} value={voice.id}>
291+
{voice.name} ({voice.language}, {voice.gender})
292+
</option>
293+
))}
294+
</Select>
295+
</div>
296+
<div style={{ flex: 1 }}>
297+
<Label>Speed: {speed.toFixed(1)}x</Label>
298+
<RangeInput
299+
type="range"
300+
min="0.2"
301+
max="2"
302+
step="0.1"
303+
value={speed}
304+
onChange={(e) => setSpeed(parseFloat(e.target.value))}
305+
disabled={!isModelLoaded || isLoading}
306+
/>
307+
</div>
308+
</InputGroup>
309+
200310
<TextArea
201311
value={text}
202312
onChange={(e) => setText(e.target.value)}
203313
placeholder="Enter text to convert to speech..."
204314
disabled={!isModelLoaded || isLoading}
205315
/>
206316

207-
<Button
208-
onClick={speak}
209-
disabled={!isModelLoaded || isLoading || !text.trim()}
210-
isLoading={isLoading && isModelLoaded}
211-
>
212-
<ButtonContent>
213-
{(isLoading && isModelLoaded) && <Spinner />}
214-
{isLoading ? 'Processing...' : 'Speak'}
215-
</ButtonContent>
216-
</Button>
317+
<ButtonGroup>
318+
<Button
319+
onClick={speak}
320+
disabled={!isModelLoaded || isLoading || !text.trim()}
321+
isLoading={isLoading && isModelLoaded}
322+
>
323+
<ButtonContent>
324+
{(isLoading && isModelLoaded) && <Spinner />}
325+
{isLoading ? 'Processing...' : 'Speak'}
326+
</ButtonContent>
327+
</Button>
328+
329+
{audioBlob && (
330+
<Button onClick={downloadAudio}>
331+
<ButtonContent>
332+
Download Audio
333+
</ButtonContent>
334+
</Button>
335+
)}
336+
</ButtonGroup>
217337

218338
{(status || isLoading) && (
219339
<Status>

extensions/chrome/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"preview": "vite preview"
1919
},
2020
"dependencies": {
21-
"@browserai/browserai": "^1.0.27",
21+
"@browserai/browserai": "^1.0.29",
2222
"@radix-ui/react-accordion": "^1.2.2",
2323
"@radix-ui/react-alert-dialog": "^1.1.5",
2424
"@radix-ui/react-aspect-ratio": "^1.1.1",

extensions/chrome/src/helpers/executors.tsx

+21-11
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export type StepStatus = 'pending' | 'running' | 'completed' | 'error';
2525
interface ExecuteWorkflowParams {
2626
nodes: WorkflowStep[];
2727
onProgress?: (message: string) => void;
28+
onModelLoadProgress?: (progress: number, eta: number) => void;
2829
setNodes: (updater: any) => void;
2930
isTestMode?: boolean;
3031
}
@@ -116,13 +117,17 @@ const nodeExecutors = {
116117
};
117118
},
118119

119-
'chatAgent': async (node: WorkflowStep, input: any) => {
120+
'chatAgent': async (node: WorkflowStep, input: any, params?: ExecuteWorkflowParams) => {
120121
try {
121122
const browserAI = new BrowserAI();
122123

123-
// Use the systemPrompt from nodeData or from previous system-prompt node
124-
// const systemPrompt = node.nodeData?.systemPrompt || input?.systemPrompt || '';
125-
await browserAI.loadModel(node.nodeData?.model || 'llama-3.2-1b-instruct');
124+
await browserAI.loadModel(node.nodeData?.model || 'llama-3.2-1b-instruct', {
125+
onProgress: (progress: any) => {
126+
const progressPercent = progress.progress || 0;
127+
const eta = progress.eta || 0;
128+
params?.onModelLoadProgress?.(progressPercent * 100, eta);
129+
}
130+
});
126131

127132
// Safely prepare the input
128133
let promptInput = '';
@@ -207,14 +212,18 @@ const nodeExecutors = {
207212
};
208213
},
209214

210-
'transcriptionAgent': async (node: WorkflowStep, input: any) => {
215+
'transcriptionAgent': async (node: WorkflowStep, input: any, params?: ExecuteWorkflowParams) => {
211216
try {
212217
console.debug("transcription-agent", node, input);
213218
const browserAI = new BrowserAI();
214219

215-
// Load the specified Whisper model or default to tiny
216-
const modelName = node.nodeData?.model || 'whisper-tiny-en';
217-
await browserAI.loadModel(modelName);
220+
await browserAI.loadModel(node.nodeData?.model || 'whisper-tiny-en', {
221+
onProgress: (progress: any) => {
222+
const progressPercent = progress.progress || 0;
223+
const eta = progress.eta || 0;
224+
params?.onModelLoadProgress?.(progressPercent * 100, eta);
225+
}
226+
});
218227

219228
// Extract audio data from input
220229
if (!input?.audioData) {
@@ -223,14 +232,14 @@ const nodeExecutors = {
223232

224233
// Transcribe the audio
225234
const transcription = await browserAI.transcribeAudio(input.audioData, {
226-
model: modelName,
235+
model: node.nodeData?.model || 'whisper-tiny-en',
227236
// Add any additional options here
228237
});
229238

230239
return {
231240
success: true,
232241
output: transcription,
233-
log: `Audio transcribed successfully using ${modelName}`
242+
log: `Audio transcribed successfully using ${node.nodeData?.model || 'whisper-tiny-en'}`
234243
};
235244
} catch (error) {
236245
console.error('TranscriptionAgent error:', error);
@@ -242,6 +251,7 @@ const nodeExecutors = {
242251
export const executeWorkflow = async ({
243252
nodes,
244253
onProgress,
254+
onModelLoadProgress,
245255
setNodes,
246256
}: ExecuteWorkflowParams): Promise<WorkflowResult> => {
247257
try {
@@ -322,7 +332,7 @@ export const executeWorkflow = async ({
322332

323333
console.debug("Final nodeInput:", nodeInput);
324334

325-
const result = await executor(node, nodeInput);
335+
const result = await executor(node, nodeInput, { onProgress, onModelLoadProgress, setNodes, nodes });
326336
console.debug("Node execution result:", result);
327337

328338
// Store output in workflow data

extensions/chrome/src/popup/workflow-view.tsx

+27
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ export function WorkflowView({ workflow, onBack }: WorkflowViewProps) {
5656
const [inputs, setInputs] = useState<Record<string, AudioInput | string>>({})
5757
const [finalOutput, setFinalOutput] = useState<string | null>(null)
5858
const [expandedInputs, setExpandedInputs] = useState<Record<string, boolean>>({})
59+
const [modelLoadProgress, setModelLoadProgress] = useState<number | null>(null);
60+
const [modelLoadEta, setModelLoadEta] = useState<number | null>(null);
5961

6062
useEffect(() => {
6163
console.log('Workflow data received:', workflow);
@@ -88,6 +90,8 @@ export function WorkflowView({ workflow, onBack }: WorkflowViewProps) {
8890
const handleExecute = async () => {
8991
setIsExecuting(true);
9092
setExecutionProgress('');
93+
setModelLoadProgress(null);
94+
setModelLoadEta(null);
9195

9296
try {
9397
// Process nodes and convert audio files to base64
@@ -127,6 +131,10 @@ export function WorkflowView({ workflow, onBack }: WorkflowViewProps) {
127131
onProgress: (progress: string) => {
128132
setExecutionProgress(progress);
129133
},
134+
onModelLoadProgress: (progress: number, eta: number) => {
135+
setModelLoadProgress(progress);
136+
setModelLoadEta(eta);
137+
},
130138
setNodes: (updatedNodes: WorkflowStep[]) => {
131139
setNodes(updatedNodes);
132140
}
@@ -275,6 +283,25 @@ export function WorkflowView({ workflow, onBack }: WorkflowViewProps) {
275283
</div>
276284

277285
<div className="flex-1 overflow-y-auto">
286+
{modelLoadProgress !== null && (
287+
<div className="p-2 bg-primary/10 text-primary text-sm sticky top-0 z-10">
288+
<div className="flex items-center gap-2">
289+
<div className="flex-1 h-1.5 bg-primary/20 rounded-full overflow-hidden">
290+
<div
291+
className="h-full bg-primary transition-all duration-300 rounded-full"
292+
style={{ width: `${modelLoadProgress}%` }}
293+
/>
294+
</div>
295+
<span className="text-xs whitespace-nowrap">
296+
Downloading model: {Math.round(modelLoadProgress)}%
297+
{modelLoadEta !== null && modelLoadEta > 0 && (
298+
` (${modelLoadEta.toFixed(1)}s remaining)`
299+
)}
300+
</span>
301+
</div>
302+
</div>
303+
)}
304+
278305
{executionProgress && (
279306
<div className="p-2 bg-primary/10 text-primary text-sm sticky top-0 z-10">
280307
{executionProgress}

0 commit comments

Comments
 (0)