Skip to content

Commit

Permalink
com.rest.elevenlabs 3.5.1 (#121)
Browse files Browse the repository at this point in the history
- Fixed generated clip playback for non-streaming clips
- Updated usages of Task.Delay with WebGL friendly Awaiters.DelayAsync
- Updated TextToSpeechDemo
- Updated Deps
- Updated Unit Tests
  • Loading branch information
StephenHodgson authored Mar 8, 2025
1 parent e7c08f9 commit 5b1e657
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 44 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ TextMesh Pro/
UIElementsSchema/
*packages-lock.json
ProjectSettings/SceneTemplateSettings.json
boot.config

# ============ #
# Certificates #
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,62 +57,61 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, in
public Guid TextHash { get; private set; }

[SerializeField]
private AudioClip audioClip;
private string cachedPath;

[Preserve]
public AudioClip AudioClip
public string CachedPath => cachedPath;

public ReadOnlyMemory<byte> ClipData { get; }

public float[] ClipSamples
{
get
{
if (audioClip == null && !ClipData.IsEmpty)
if (clipSamples != null)
{
var samples = ClipSamples;

if (samples is { Length: > 0 })
{
audioClip = AudioClip.Create(Id, samples.Length, 1, SampleRate, false);
audioClip.SetData(samples, 0);
}
return clipSamples;
}

if (audioClip == null)
if (ClipData.IsEmpty)
{
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
return Array.Empty<float>();
}

return audioClip;
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), inputSampleRate: SampleRate, outputSampleRate: AudioSettings.outputSampleRate);
return clipSamples;

}
}
private float[] clipSamples;

public int SampleRate { get; }

[SerializeField]
private string cachedPath;
private AudioClip audioClip;

[Preserve]
public string CachedPath => cachedPath;

public ReadOnlyMemory<byte> ClipData { get; }

private float[] clipSamples;

public float[] ClipSamples
public AudioClip AudioClip
{
get
{
if (!ClipData.IsEmpty)
if (audioClip == null &&
ClipSamples is { Length: > 0 })
{
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), PCMFormatSize.SixteenBit, SampleRate, AudioSettings.outputSampleRate);
audioClip = AudioClip.Create(Id, ClipSamples.Length, 1, AudioSettings.outputSampleRate, false);
audioClip.SetData(ClipSamples, 0);
}
else if (audioClip != null)

if (audioClip == null)
{
clipSamples = new float[audioClip.samples];
audioClip.GetData(clipSamples, 0);
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
}

return clipSamples;
return audioClip;
}
}

public int SampleRate { get; }
public float Length => ClipSamples.Length / (float)AudioSettings.outputSampleRate;

public void OnBeforeSerialize() => textHash = TextHash.ToString();

Expand All @@ -130,6 +129,12 @@ var path when path.EndsWith(".mp3") => AudioType.MPEG,
_ => AudioType.UNKNOWN
};

if (audioType == AudioType.UNKNOWN)
{
Debug.LogWarning($"Unable to load cached audio clip at {cachedPath}");
return null;
}

return await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
using Utilities.Async;
using Utilities.WebRequestRest;
using Debug = UnityEngine.Debug;

Expand Down Expand Up @@ -148,7 +149,7 @@ private async Task<DubbingProjectMetadata> WaitForDubbingCompletionAsync(Dubbing
Debug.Log($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
}

await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
await Awaiters.DelayAsync(pollingInterval, cancellationToken).ConfigureAwait(true);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Func
var audioData = request.WithTimestamps ? accumulatedPCMData!.ToArray() : response.Data;
var cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);

return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), frequency, cachedPath)
{
TimestampedTranscriptCharacters = accumulatedTranscriptData?.ToArray() ?? Array.Empty<TimestampedTranscriptCharacter>()
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
using ElevenLabs.TextToSpeech;
using ElevenLabs.Voices;
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
using Utilities.Async;
using Utilities.Audio;
using Debug = UnityEngine.Debug;

namespace ElevenLabs.Demo
{
Expand Down Expand Up @@ -63,18 +65,29 @@ private async void Start()
}

var request = new TextToSpeechRequest(voice, message, model: Model.FlashV2_5, outputFormat: OutputFormat.PCM_24000);
var stopwatch = Stopwatch.StartNew();
var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
await streamAudioSource.BufferCallbackAsync(partialClip.ClipSamples);
}, cancellationToken: destroyCancellationToken);
await new WaitUntil(() => streamAudioSource.IsEmpty || destroyCancellationToken.IsCancellationRequested);
destroyCancellationToken.ThrowIfCancellationRequested();
((AudioSource)streamAudioSource).clip = voiceClip.AudioClip;
var elapsedTime = (float)stopwatch.Elapsed.TotalSeconds;
var playbackTime = voiceClip.Length - elapsedTime;

if (debug)
{
Debug.Log($"Elapsed time: {elapsedTime:F} seconds");
Debug.Log($"voice clip length: {voiceClip.Length:F} seconds");
Debug.Log($"playback time: {playbackTime:F} seconds");
}

await Awaiters.DelayAsync(TimeSpan.FromSeconds(playbackTime + 1f), destroyCancellationToken);

if (debug)
{
Debug.Log($"Full clip: {voiceClip.Id}");
}

((AudioSource)streamAudioSource).PlayOneShot(voiceClip);
}
catch (Exception e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public async Task Test_01_TextToSpeech()
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Debug.Log(voiceClip.Id);
}

Expand All @@ -36,9 +37,10 @@ public async Task Test_02_StreamTextToSpeech()
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Assert.IsNotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.IsNotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
}

[Test]
Expand All @@ -50,9 +52,10 @@ public async Task Test_03_TextToSpeech_Transcription()
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
Debug.Log("| Character | Start Time | End Time |");
Expand Down Expand Up @@ -88,9 +91,10 @@ public async Task Test_04_StreamTextToSpeech_Transcription()
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Assert.IsNotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.IsNotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
}

Expand All @@ -111,10 +115,11 @@ public async Task Test_05_LanguageEnforced_TextToSpeech()
languageCode: "cs");
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
}
}
}
9 changes: 4 additions & 5 deletions ElevenLabs/Packages/com.rest.elevenlabs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
"version": "3.5.0",
"version": "3.5.1",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
Expand All @@ -17,10 +17,9 @@
"url": "https://github.com/StephenHodgson"
},
"dependencies": {
"com.utilities.rest": "3.3.1",
"com.utilities.audio": "2.2.1",
"com.utilities.encoder.ogg": "4.2.0",
"com.utilities.encoder.wav": "2.2.0"
"com.utilities.rest": "3.3.2",
"com.utilities.encoder.ogg": "4.2.1",
"com.utilities.encoder.wav": "2.2.1"
},
"samples": [
{
Expand Down
2 changes: 1 addition & 1 deletion ElevenLabs/Packages/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"com.unity.ide.rider": "3.0.34",
"com.unity.ide.visualstudio": "2.0.22",
"com.unity.test-framework": "1.3.5",
"com.utilities.buildpipeline": "1.6.0"
"com.utilities.buildpipeline": "1.6.1"
},
"scopedRegistries": [
{
Expand Down

0 comments on commit 5b1e657

Please sign in to comment.