-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlatency.py
175 lines (140 loc) · 6.95 KB
/
latency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
This script will run cursor latency analysis on all wav files in the ./audio folder relative to this file
Each file will be streamed to a local instance of On Premise Deepgram in chunks of size REALTIME_RESOLUTION
When the speech_final message is received we compare the start + duration (The Transcript Cursor) with the last chunk of audio we sent (The Audio Cursor)
We collect the latencies for each speech_final message and once the file is processed we calculate the median of those latencies and print them out in csv format
Once we have all the median latencies we can calculate the P95 using an external tool like Google Sheets
You can run this and output to a log file like this
python3 -u latency.py > log.txt &
"""
import argparse
import asyncio
import base64
import json
import sys
import wave
import websockets
import time
import statistics
import os
# Location of all the wav files
directory = 'audio'
# Mimic sending a real-time stream by sending this many seconds of audio at a time.
REALTIME_RESOLUTION = 0.02 # 20ms
ENDPOINTING = 100 # 100ms of silence will trigger speech_final
MODEL = 'nova-2-general'
ENCODING = 'linear16'
MULTICHANNEL = 'false' # We are testing single channel audio
INTERIM_RESULTS = 'true' # We need this enabled for speech_final to work
results = []
async def run(file, data, channels, sample_width, sample_rate):
# How many bytes are contained in one second of audio.
byte_rate = sample_width * sample_rate * channels
audio_cursor = 0.
latencies = []
async with websockets.connect(
# Testing against local on prem instance
#f'ws://localhost:8080/v1/listen?channels={channels}&sample_rate={sample_rate}&encoding={ENCODING}&multichannel={MULTICHANNEL}&interim_results={INTERIM_RESULTS}&model={MODEL}&endpointing={ENDPOINTING}'
# Testing against hosted Deepgram
f'wss://api.deepgram.com/v1/listen?channels={channels}&sample_rate={sample_rate}&encoding={ENCODING}&multichannel={MULTICHANNEL}&interim_results={INTERIM_RESULTS}&model={MODEL}&endpointing={ENDPOINTING}',
extra_headers={
'Authorization': 'Token {}'.format('REPLACE_WITH_DEEPGRAM_API_KEY')
}
) as ws:
async def sender(ws):
""" Sends the data, mimicking a real-time connection.
"""
nonlocal data, audio_cursor
try:
# Keep track of when we started
start = time.time()
while len(data):
# How many bytes are in `REALTIME_RESOLUTION` seconds of audio?
i = int(byte_rate * REALTIME_RESOLUTION)
chunk, data = data[:i], data[i:]
# Send the data
await ws.send(chunk)
# Move the audio cursor
audio_cursor += REALTIME_RESOLUTION
# Since sleep is not perfect we need to adjust each sleep duration to maintain the correct speed of sending audio chunks
end_now = time.time()
duration_now = end_now - start
delta = duration_now - audio_cursor
# Mimic real-time by waiting `REALTIME_RESOLUTION` seconds before the next packet.
sleepTime = REALTIME_RESOLUTION - delta
# Need to sleep a little to give the receiver time to process incoming messages
if sleepTime < 0:
sleepTime = 0.005
# sleep so the next audio chunk is sent on time
await asyncio.sleep(sleepTime)
# A CloseStream message tells Deepgram that no more audio
# will be sent. Deepgram will close the connection once all
# audio has finished processing.
await ws.send(json.dumps({
"type": "CloseStream"
}))
except Exception as e:
print(f'Error while sending: {e}')
raise
async def receiver(ws):
""" Print out the messages received from the server.
"""
try:
nonlocal audio_cursor, latencies
transcript_cursor = 0.
async for msg in ws:
msg = json.loads(msg)
if 'request_id' in msg:
# This is the final metadata message. It gets sent as the
# very last message by Deepgram during a clean shutdown.
# There is no transcript in it.
continue
if msg['speech_final']:
transcript_cursor = msg['start'] + msg['duration']
# Get the current delta between the end of the last transcript and the audio cursor
cursor_latency = audio_cursor - transcript_cursor
# keep track of the latency values
latencies.append(cursor_latency)
# Debug
# print(f'Measuring... Audio cursor = {audio_cursor:.3f}, Transcript cursor = {transcript_cursor:.3f}, Cursor Latency: {cursor_latency:.3f}')
except Exception as e:
print(f'Error while recieving: {e}')
raise
try:
if len(latencies) > 0:
median_latency = statistics.median(latencies)
print(f'{file}, {median_latency:.4f}')
else:
print(f'{file}, No speech_final detected!')
except Exception as e:
print(f'Error printing stats: {e}')
raise
await asyncio.wait([
asyncio.ensure_future(sender(ws)),
asyncio.ensure_future(receiver(ws))
])
###############################################################################
def main():
""" Entrypoint for the example."
"""
files = os.listdir(directory)
files.sort()
print(f'File, Median')
for filename in files:
file = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(file):
# make sure its a wav file
if file.endswith('.wav'):
# Open the audio file.
with wave.open(file, 'rb') as fh:
(channels, sample_width, sample_rate, num_samples, _, _) = fh.getparams()
assert sample_width == 2, 'WAV data must be 16-bit.'
data = fh.readframes(num_samples)
# Debug
# print(f'Channels = {channels}, Sample Rate = {sample_rate} Hz, Sample width = {sample_width} bytes, Size = {len(data)} bytes', file=sys.stderr)
# Run the test.
asyncio.get_event_loop().run_until_complete(run(file, data, channels, sample_width, sample_rate))
###############################################################################
if __name__ == '__main__':
sys.exit(main() or 0)