diff --git a/website/_blogs/2024-12-20-RealtimeAgent/index.mdx b/website/_blogs/2024-12-20-RealtimeAgent/index.mdx index 5180e4c08..26f1fd9ef 100644 --- a/website/_blogs/2024-12-20-RealtimeAgent/index.mdx +++ b/website/_blogs/2024-12-20-RealtimeAgent/index.mdx @@ -256,14 +256,16 @@ async def handle_media_stream(websocket: WebSocket): ) ``` 4. **Registering the Swarm** - The [**`RealtimeAgent`**](/docs/reference/agentchat/realtime_agent/realtime_agent) is linked to a Swarm of agents responsible for different customer service tasks. + The [**`RealtimeAgent`**](/docs/reference/agentchat/realtime_agent/realtime_agent) can be patched using `register_swarm` to be integrated into a swarm of agents. + - `realtime_agent`: [**`RealtimeAgent`**](/docs/reference/agentchat/realtime_agent/realtime_agent) which we want to include in the swarm. - `initial_agent`: The first agent to process incoming queries (e.g., a triage agent). - `agents`: A list of specialized agents for handling specific tasks like flight modifications, cancellations, or lost baggage. ```python - realtime_agent.register_swarm( + register_swarm( + realtime_agent=realtime_agent, initial_agent=triage_agent, agents=[triage_agent, flight_modification, flight_cancel, flight_change, lost_baggage], - ) + ) ``` 5. **Running the RealtimeAgent** The `run()` method is invoked to start the [**`RealtimeAgent`**](/docs/reference/agentchat/realtime_agent/realtime_agent), enabling it to handle real-time voice interactions and delegate tasks to the registered Swarm agents. @@ -308,7 +310,8 @@ async def handle_media_stream(websocket: WebSocket): audio_adapter=audio_adapter, ) - realtime_agent.register_swarm( + register_swarm( + realtime_agent=realtime_agent, initial_agent=triage_agent, agents=[triage_agent, flight_modification, flight_cancel, flight_change, lost_baggage], ) diff --git a/website/_blogs/2025-01-09-RealtimeAgent-over-WebRTC/index.mdx b/website/_blogs/2025-01-09-RealtimeAgent-over-WebRTC/index.mdx index 3be5485e8..012d66370 100644 --- a/website/_blogs/2025-01-09-RealtimeAgent-over-WebRTC/index.mdx +++ b/website/_blogs/2025-01-09-RealtimeAgent-over-WebRTC/index.mdx @@ -133,52 +133,66 @@ The [WebSocket](https://fastapi.tiangolo.com/advanced/websockets/) is responsibl ```javascript ws = new WebSocket(webSocketUrl); +ws.onopen = event => { + console.log("web socket opened") +} + ws.onmessage = async event => { - const message = JSON.parse(event.data); - console.info("Received Message from AG2 backend", message); - if (message.type === "ag2.init") { - await openRTC(message.config); // Starts the WebRTC connection - return; + const message = JSON.parse(event.data) + console.info("Received Message from AG2 backend", message) + const type = message.type + if (type == "ag2.init") { + await openRTC(message, resolve, reject) + return } + const messageJSON = JSON.stringify(message) if (dc) { - dc.send(JSON.stringify(message)); // Sends data via DataChannel + dc.send(messageJSON) } else { - console.log("DC not ready yet", message); + console.log("DC not ready yet, queueing", message) + quedMessages.push(messageJSON) } -}; +} ``` #### **WebRTC Setup** This block configures the [WebRTC](https://webrtc.org/) connection, adds audio tracks, and initializes the `DataChannel`. ```javascript -async function openRTC(data) { +async function openRTC(init_message, resolve, reject) { + const data = init_message.config; const EPHEMERAL_KEY = data.client_secret.value; - // Set up to play remote audio + // Set up to play remote audio from the model const audioEl = document.createElement("audio"); audioEl.autoplay = true; pc.ontrack = e => audioEl.srcObject = e.streams[0]; - // Add microphone input as local audio track - const ms = await navigator.mediaDevices.getUserMedia({ audio: true }); - pc.addTrack(ms.getTracks()[0]); - - // Create a DataChannel - dc = pc.createDataChannel("oai-events"); - dc.addEventListener("message", e => { - const message = JSON.parse(e.data); + // Add local audio track for microphone input in the browser + const ms = await navigator.mediaDevices.getUserMedia({ + audio: true + }); + const microphone = ms.getTracks()[0] + microphone.enabled = false; + pc.addTrack(microphone); + + // Set up data channel for sending and receiving events + const _dc = pc.createDataChannel("oai-events"); + _dc.addEventListener("message", (e) => { + // Realtime server events appear here! + const message = JSON.parse(e.data) if (message.type.includes("function")) { - ws.send(e.data); // Forward function messages to WebSocket + console.log("WebRTC function message", message) + ws.send(e.data) } }); - // Create and send an SDP offer + // Start the session using the Session Description Protocol (SDP) const offer = await pc.createOffer(); await pc.setLocalDescription(offer); - // Send the offer to OpenAI const baseUrl = "https://api.openai.com/v1/realtime"; - const sdpResponse = await fetch(`${baseUrl}?model=${data.model}`, { + const model = data.model; + const sdpResponse = await fetch(`${baseUrl}?model=${model}`, { method: "POST", body: offer.sdp, headers: { @@ -187,10 +201,26 @@ async function openRTC(data) { }, }); - // Set the remote SDP answer - const answer = { type: "answer", sdp: await sdpResponse.text() }; + const answer = { + type: "answer", + sdp: await sdpResponse.text(), + }; await pc.setRemoteDescription(answer); - console.log("Connected to OpenAI WebRTC"); + console.log("Connected to OpenAI WebRTC") + _dc.onopen = e => { + console.log("Data connection opened.") + for (const init_chunk of init_message.init) { + _dc.send(JSON.stringify(init_chunk)) + } + console.log("Sent init chunks to OpenAI WebRTC") + for (const qmsg of quedMessages) { + _dc.send(qmsg) + } + console.log("Sent queued messages to OpenAI WebRTC") + microphone.enabled = true; + dc = _dc + resolve() + } } ```