Skip to content

Commit 76789f9

Browse files
committed
Added functionality to send contextual data without completing the current turn
1 parent 61655b6 commit 76789f9

File tree

5 files changed

+148
-5
lines changed

5 files changed

+148
-5
lines changed

README.md

+31
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,37 @@ Project consists of:
101101
- communication layer for processing audio in and out
102102
- a boilerplate view for starting to build your apps and view logs
103103

104+
## Handling Context with Incomplete Turns
105+
106+
When working with the Live API, you might want to send contextual text data while continuing to stream real-time input. This is useful when you want to provide additional context to the model without ending the current conversation turn.
107+
108+
### Example: Sending Context Without Completing a Turn
109+
110+
```typescript
111+
// Send context without completing the turn
112+
client.sendContext([{ text: "The user is looking at a laptop." }]);
113+
114+
// Continue with real-time input (will force complete the turn if needed)
115+
client.sendRealtimeInput([
116+
{
117+
mimeType: "audio/pcm;rate=16000",
118+
data: audioData,
119+
}
120+
]);
121+
122+
// Check if there's an incomplete turn
123+
if (client.hasIncompleteTurn()) {
124+
// Manually complete a turn if needed
125+
client.completeTurn();
126+
}
127+
```
128+
129+
### Differences Between send(), sendContext(), and sendRealtimeInput()
130+
131+
- `send(parts, turnComplete = true)`: Standard method to send text data. By default completes the turn.
132+
- `sendContext(parts)`: A convenience method that calls `send(parts, false)`. Used when providing context without completing the turn.
133+
- `sendRealtimeInput(chunks, completeTurn = true)`: Sends real-time media data. The `completeTurn` parameter determines if any incomplete turns should be automatically completed first.
134+
104135
## Available Scripts
105136

106137
In the project directory, you can run:

src/components/control-tray/ControlTray.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ function ControlTray({
9797
mimeType: "audio/pcm;rate=16000",
9898
data: base64,
9999
},
100-
]);
100+
], true);
101101
};
102102
if (connected && !muted && audioRecorder) {
103103
audioRecorder.on("data", onData).on("volume", setInVolume).start();
@@ -131,7 +131,7 @@ function ControlTray({
131131
ctx.drawImage(videoRef.current, 0, 0, canvas.width, canvas.height);
132132
const base64 = canvas.toDataURL("image/jpeg", 1.0);
133133
const data = base64.slice(base64.indexOf(",") + 1, Infinity);
134-
client.sendRealtimeInput([{ mimeType: "image/jpeg", data }]);
134+
client.sendRealtimeInput([{ mimeType: "image/jpeg", data }], true);
135135
}
136136
if (connected) {
137137
timeoutId = window.setTimeout(sendVideoFrame, 1000 / 0.5);

src/components/side-panel/SidePanel.tsx

+25-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export default function SidePanel() {
4242
value: string;
4343
label: string;
4444
} | null>(null);
45+
const [completeTurn, setCompleteTurn] = useState(true);
4546
const inputRef = useRef<HTMLTextAreaElement>(null);
4647

4748
//scroll the log to the bottom when new logs come in
@@ -70,7 +71,11 @@ export default function SidePanel() {
7071

7172
const handleSubmit = () => {
7273
if (isValidInput(textInput)) {
73-
client.send([{ text: textInput }]);
74+
if (completeTurn) {
75+
client.send([{ text: textInput }]);
76+
} else {
77+
client.sendContext([{ text: textInput }]);
78+
}
7479

7580
setTextInput("");
7681
if (inputRef.current) {
@@ -133,6 +138,25 @@ export default function SidePanel() {
133138
/>
134139
</div>
135140
<div className={cn("input-container", { disabled: !connected })}>
141+
<div className="turn-option">
142+
<label>
143+
<input
144+
type="checkbox"
145+
checked={completeTurn}
146+
onChange={(e) => setCompleteTurn(e.target.checked)}
147+
/>
148+
<span>Complete turn</span>
149+
</label>
150+
{client.hasIncompleteTurn() && (
151+
<button
152+
className="complete-turn-button"
153+
onClick={() => client.completeTurn()}
154+
title="Force complete the current turn"
155+
>
156+
Complete turn
157+
</button>
158+
)}
159+
</div>
136160
<div className="input-content">
137161
<textarea
138162
className="input-area"

src/components/side-panel/side-panel.scss

+34-1
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,46 @@
136136
}
137137

138138
.input-container {
139-
height: 50px;
139+
height: 80px;
140140
flex-grow: 0;
141141
flex-shrink: 0;
142142
border-top: 1px solid var(--Neutral-20);
143143
padding: 14px 25px;
144144
overflow: hidden;
145145

146+
.turn-option {
147+
display: flex;
148+
align-items: center;
149+
justify-content: space-between;
150+
margin-bottom: 8px;
151+
color: var(--Neutral-90);
152+
font-size: 12px;
153+
154+
label {
155+
display: flex;
156+
align-items: center;
157+
cursor: pointer;
158+
159+
input[type="checkbox"] {
160+
margin-right: 5px;
161+
}
162+
}
163+
164+
.complete-turn-button {
165+
background: var(--Neutral-20);
166+
border: none;
167+
color: var(--Neutral-90);
168+
padding: 2px 8px;
169+
border-radius: 4px;
170+
font-size: 11px;
171+
cursor: pointer;
172+
173+
&:hover {
174+
background: var(--Neutral-30);
175+
}
176+
}
177+
}
178+
146179
.input-content {
147180
position: relative;
148181
background: var(--Neutral-10);

src/lib/multimodal-live-client.ts

+56-1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
6969
public ws: WebSocket | null = null;
7070
protected config: LiveConfig | null = null;
7171
public url: string = "";
72+
private incompleteTurn: boolean = false;
73+
7274
public getConfig() {
7375
return { ...this.config };
7476
}
@@ -198,6 +200,7 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
198200
if (isTurnComplete(serverContent)) {
199201
this.log("server.send", "turnComplete");
200202
this.emit("turncomplete");
203+
this.incompleteTurn = false;
201204
//plausible theres more to the message, continue
202205
}
203206

@@ -239,7 +242,7 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
239242
/**
240243
* send realtimeInput, this is base64 chunks of "audio/pcm" and/or "image/jpg"
241244
*/
242-
sendRealtimeInput(chunks: GenerativeContentBlob[]) {
245+
sendRealtimeInput(chunks: GenerativeContentBlob[], completeTurn: boolean = true) {
243246
let hasAudio = false;
244247
let hasVideo = false;
245248
for (let i = 0; i < chunks.length; i++) {
@@ -263,6 +266,21 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
263266
? "video"
264267
: "unknown";
265268

269+
// If we're in the middle of a turn that's not complete, send a ClientContentMessage
270+
// with turnComplete=true first to close the previous turn
271+
if (completeTurn && this.incompleteTurn) {
272+
// Force-complete any previous turn by sending an empty ClientContentMessage
273+
const completionMessage: ClientContentMessage = {
274+
clientContent: {
275+
turns: [],
276+
turnComplete: true,
277+
},
278+
};
279+
this._sendDirect(completionMessage);
280+
this.log(`client.completeTurn`, "completing previous turn");
281+
this.incompleteTurn = false;
282+
}
283+
266284
const data: RealtimeInputMessage = {
267285
realtimeInput: {
268286
mediaChunks: chunks,
@@ -303,6 +321,18 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
303321

304322
this._sendDirect(clientContentRequest);
305323
this.log(`client.send`, clientContentRequest);
324+
325+
// Update the incomplete turn state
326+
this.incompleteTurn = !turnComplete;
327+
}
328+
329+
/**
330+
* Sends contextual text data without completing the turn.
331+
* This is useful when you want to provide context to the model
332+
* while continuing to stream realtime input.
333+
*/
334+
sendContext(parts: Part | Part[]) {
335+
this.send(parts, false);
306336
}
307337

308338
/**
@@ -316,4 +346,29 @@ export class MultimodalLiveClient extends EventEmitter<MultimodalLiveClientEvent
316346
const str = JSON.stringify(request);
317347
this.ws.send(str);
318348
}
349+
350+
/**
351+
* Checks if there's an incomplete turn and returns the current state.
352+
*/
353+
hasIncompleteTurn(): boolean {
354+
return this.incompleteTurn;
355+
}
356+
357+
/**
358+
* Forcibly completes any ongoing turn. Use this if you need to ensure
359+
* all incomplete turns are finished before starting something new.
360+
*/
361+
completeTurn(): void {
362+
if (this.incompleteTurn) {
363+
const completionMessage: ClientContentMessage = {
364+
clientContent: {
365+
turns: [],
366+
turnComplete: true,
367+
},
368+
};
369+
this._sendDirect(completionMessage);
370+
this.log(`client.completeTurn`, "manually completing turn");
371+
this.incompleteTurn = false;
372+
}
373+
}
319374
}

0 commit comments

Comments
 (0)