|
| 1 | +# |
| 2 | +# This demo will join a Daily meeting and, given a text file with senteces (one |
| 3 | +# per line), will translate text into audio using Deepgram's Text-To-Speech API |
| 4 | +# and will send it into the meeting. |
| 5 | +# |
| 6 | +# The demo requires a Deepgram API key set in the DG_API_KEY environment variable. |
| 7 | +# |
| 8 | +# See https://developers.deepgram.com/docs/text-to-speech |
| 9 | +# |
| 10 | +# Usage: python3 deepgram_speech_to_text.py -m MEETING_URL -i FILE |
| 11 | +# |
| 12 | + |
| 13 | +import argparse |
| 14 | +import os |
| 15 | +import time |
| 16 | + |
| 17 | +from daily import * |
| 18 | +from deepgram import ( |
| 19 | + DeepgramClient, |
| 20 | + SpeakOptions, |
| 21 | +) |
| 22 | + |
| 23 | +parser = argparse.ArgumentParser() |
| 24 | +parser.add_argument("-m", "--meeting", required=True, help="Meeting URL") |
| 25 | +parser.add_argument( |
| 26 | + "-i", |
| 27 | + "--input", |
| 28 | + required=True, |
| 29 | + help="File with sentences (one per line)") |
| 30 | +args = parser.parse_args() |
| 31 | + |
| 32 | +Daily.init() |
| 33 | + |
| 34 | +# We create a virtual microphone device so we can read audio samples from the |
| 35 | +# meeting. |
| 36 | +microphone = Daily.create_microphone_device( |
| 37 | + "my-mic", sample_rate=16000, channels=1) |
| 38 | + |
| 39 | +client = CallClient() |
| 40 | + |
| 41 | +print() |
| 42 | +print(f"Joining {args.meeting} ...") |
| 43 | + |
| 44 | +# Join and tell our call client that we will be using our new virtual |
| 45 | +# microphone. |
| 46 | +client.join(args.meeting, client_settings={ |
| 47 | + "inputs": { |
| 48 | + "microphone": { |
| 49 | + "isEnabled": True, |
| 50 | + "settings": { |
| 51 | + "deviceId": "my-mic" |
| 52 | + } |
| 53 | + } |
| 54 | + } |
| 55 | +}) |
| 56 | + |
| 57 | +# Make sure we are joined. It would be better to use join() completion |
| 58 | +# callback. |
| 59 | +time.sleep(3) |
| 60 | + |
| 61 | +sentences_file = open(args.input, "r") |
| 62 | + |
| 63 | +deepgram = DeepgramClient(api_key=os.getenv("DG_API_KEY")) |
| 64 | + |
| 65 | +speak_options = SpeakOptions( |
| 66 | + model="aura-asteria-en", |
| 67 | + encoding="linear16", |
| 68 | + sample_rate="16000", |
| 69 | + container="none" |
| 70 | +) |
| 71 | + |
| 72 | +print() |
| 73 | + |
| 74 | +for sentence in sentences_file.readlines(): |
| 75 | + print(f"Processing: {sentence.strip()}") |
| 76 | + print() |
| 77 | + |
| 78 | + speak_source = { |
| 79 | + "text": sentence.strip() |
| 80 | + } |
| 81 | + |
| 82 | + response = deepgram.speak.v("1").stream(speak_source, speak_options) |
| 83 | + |
| 84 | + # Send all the audio frames to the microphone. |
| 85 | + microphone.write_frames(response.stream.read()) |
| 86 | + |
| 87 | +# Let everything finish |
| 88 | +time.sleep(2) |
| 89 | + |
| 90 | +client.leave() |
| 91 | +client.release() |
0 commit comments