|
| 1 | +# Copyright (c) Microsoft. All rights reserved. |
| 2 | + |
| 3 | +import asyncio |
| 4 | +import logging |
| 5 | + |
| 6 | +from samples.concepts.realtime.utils import AudioPlayerWebsocket, AudioRecorderWebsocket, check_audio_devices |
| 7 | +from semantic_kernel.connectors.ai.open_ai import ( |
| 8 | + ListenEvents, |
| 9 | + OpenAIRealtime, |
| 10 | + OpenAIRealtimeExecutionSettings, |
| 11 | + TurnDetection, |
| 12 | +) |
| 13 | + |
| 14 | +logging.basicConfig(level=logging.WARNING) |
| 15 | +utils_log = logging.getLogger("samples.concepts.realtime.utils") |
| 16 | +utils_log.setLevel(logging.INFO) |
| 17 | +aiortc_log = logging.getLogger("aiortc") |
| 18 | +aiortc_log.setLevel(logging.WARNING) |
| 19 | +aioice_log = logging.getLogger("aioice") |
| 20 | +aioice_log.setLevel(logging.WARNING) |
| 21 | +logger = logging.getLogger(__name__) |
| 22 | +logger.setLevel(logging.INFO) |
| 23 | + |
| 24 | +# This simple sample demonstrates how to use the OpenAI Realtime API to create |
| 25 | +# a chat bot that can listen and respond directly through audio. |
| 26 | +# It requires installing: |
| 27 | +# - semantic-kernel[openai_realtime] |
| 28 | +# - pyaudio |
| 29 | +# - sounddevice |
| 30 | +# - pydub |
| 31 | +# - aiortc |
| 32 | +# e.g. pip install pyaudio sounddevice pydub |
| 33 | + |
| 34 | +# The characterics of your speaker and microphone are a big factor in a smooth conversation |
| 35 | +# so you may need to try out different devices for each. |
| 36 | +# you can also play around with the turn_detection settings to get the best results. |
| 37 | +# It has device id's set in the AudioRecorderStream and AudioPlayerAsync classes, |
| 38 | +# so you may need to adjust these for your system. |
| 39 | +# you can check the available devices by uncommenting line below the function |
| 40 | +check_audio_devices() |
| 41 | + |
| 42 | + |
| 43 | +async def main() -> None: |
| 44 | + # create the realtime client and optionally add the audio output function, this is optional |
| 45 | + # you can define the protocol to use, either "websocket" or "webrtc" |
| 46 | + # they will behave the same way, even though the underlying protocol is quite different |
| 47 | + audio_player = AudioPlayerWebsocket() |
| 48 | + realtime_client = OpenAIRealtime( |
| 49 | + "websocket", |
| 50 | + audio_output_callback=audio_player.client_callback, |
| 51 | + ) |
| 52 | + audio_recorder = AudioRecorderWebsocket(realtime_client=realtime_client) |
| 53 | + # Create the settings for the session |
| 54 | + settings = OpenAIRealtimeExecutionSettings( |
| 55 | + instructions=""" |
| 56 | + You are a chat bot. Your name is Mosscap and |
| 57 | + you have one goal: figure out what people need. |
| 58 | + Your full name, should you need to know it, is |
| 59 | + Splendid Speckled Mosscap. You communicate |
| 60 | + effectively, but you tend to answer with long |
| 61 | + flowery prose. |
| 62 | + """, |
| 63 | + voice="shimmer", |
| 64 | + turn_detection=TurnDetection(type="server_vad", create_response=True, silence_duration_ms=800, threshold=0.8), |
| 65 | + ) |
| 66 | + # the context manager calls the create_session method on the client and start listening to the audio stream |
| 67 | + print("Mosscap (transcript): ", end="") |
| 68 | + |
| 69 | + async with realtime_client, audio_player, audio_recorder: |
| 70 | + await realtime_client.update_session(settings=settings, create_response=True) |
| 71 | + |
| 72 | + async for event in realtime_client.receive(): |
| 73 | + match event.event_type: |
| 74 | + # this can be used as an alternative to the callback function used above, |
| 75 | + # the callback is faster and smoother |
| 76 | + # case "audio": |
| 77 | + # await audio_player.add_audio(event.audio) |
| 78 | + case "text": |
| 79 | + print(event.text.text, end="") |
| 80 | + case "service": |
| 81 | + # OpenAI Specific events |
| 82 | + if event.service_type == ListenEvents.SESSION_UPDATED: |
| 83 | + print("Session updated") |
| 84 | + if event.service_type == ListenEvents.RESPONSE_CREATED: |
| 85 | + print("") |
| 86 | + if event.service_type == ListenEvents.ERROR: |
| 87 | + logger.error(event.event) |
| 88 | + |
| 89 | + |
| 90 | +if __name__ == "__main__": |
| 91 | + print( |
| 92 | + "Instruction: start speaking, when you stop the API should detect you finished and start responding. " |
| 93 | + "Press ctrl + c to stop the program." |
| 94 | + ) |
| 95 | + asyncio.run(main()) |
0 commit comments