Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

com.rest.elevenlabs 3.5.1 #121

Merged
merged 2 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ TextMesh Pro/
UIElementsSchema/
*packages-lock.json
ProjectSettings/SceneTemplateSettings.json
boot.config

# ============ #
# Certificates #
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,62 +57,61 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, in
public Guid TextHash { get; private set; }

[SerializeField]
private AudioClip audioClip;
private string cachedPath;

[Preserve]
public AudioClip AudioClip
public string CachedPath => cachedPath;

public ReadOnlyMemory<byte> ClipData { get; }

public float[] ClipSamples
{
get
{
if (audioClip == null && !ClipData.IsEmpty)
if (clipSamples != null)
{
var samples = ClipSamples;

if (samples is { Length: > 0 })
{
audioClip = AudioClip.Create(Id, samples.Length, 1, SampleRate, false);
audioClip.SetData(samples, 0);
}
return clipSamples;
}

if (audioClip == null)
if (ClipData.IsEmpty)
{
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
return Array.Empty<float>();
}

return audioClip;
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), inputSampleRate: SampleRate, outputSampleRate: AudioSettings.outputSampleRate);
return clipSamples;

}
}
private float[] clipSamples;

public int SampleRate { get; }

[SerializeField]
private string cachedPath;
private AudioClip audioClip;

[Preserve]
public string CachedPath => cachedPath;

public ReadOnlyMemory<byte> ClipData { get; }

private float[] clipSamples;

public float[] ClipSamples
public AudioClip AudioClip
{
get
{
if (!ClipData.IsEmpty)
if (audioClip == null &&
ClipSamples is { Length: > 0 })
{
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), PCMFormatSize.SixteenBit, SampleRate, AudioSettings.outputSampleRate);
audioClip = AudioClip.Create(Id, ClipSamples.Length, 1, AudioSettings.outputSampleRate, false);
audioClip.SetData(ClipSamples, 0);
}
else if (audioClip != null)

if (audioClip == null)
{
clipSamples = new float[audioClip.samples];
audioClip.GetData(clipSamples, 0);
Debug.LogError($"{nameof(audioClip)} is null, try loading it with LoadCachedAudioClipAsync");
}

return clipSamples;
return audioClip;
}
}

public int SampleRate { get; }
public float Length => ClipSamples.Length / (float)AudioSettings.outputSampleRate;

public void OnBeforeSerialize() => textHash = TextHash.ToString();

Expand All @@ -130,6 +129,12 @@ var path when path.EndsWith(".mp3") => AudioType.MPEG,
_ => AudioType.UNKNOWN
};

if (audioType == AudioType.UNKNOWN)
{
Debug.LogWarning($"Unable to load cached audio clip at {cachedPath}");
return null;
}

return await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
using Utilities.Async;
using Utilities.WebRequestRest;
using Debug = UnityEngine.Debug;

Expand Down Expand Up @@ -148,7 +149,7 @@ private async Task<DubbingProjectMetadata> WaitForDubbingCompletionAsync(Dubbing
Debug.Log($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
}

await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
await Awaiters.DelayAsync(pollingInterval, cancellationToken).ConfigureAwait(true);
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Func
var audioData = request.WithTimestamps ? accumulatedPCMData!.ToArray() : response.Data;
var cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);

return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), frequency, cachedPath)
{
TimestampedTranscriptCharacters = accumulatedTranscriptData?.ToArray() ?? Array.Empty<TimestampedTranscriptCharacter>()
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
using ElevenLabs.TextToSpeech;
using ElevenLabs.Voices;
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using UnityEngine;
using Utilities.Async;
using Utilities.Audio;
using Debug = UnityEngine.Debug;

namespace ElevenLabs.Demo
{
Expand Down Expand Up @@ -63,18 +65,29 @@ private async void Start()
}

var request = new TextToSpeechRequest(voice, message, model: Model.FlashV2_5, outputFormat: OutputFormat.PCM_24000);
var stopwatch = Stopwatch.StartNew();
var voiceClip = await api.TextToSpeechEndpoint.TextToSpeechAsync(request, async partialClip =>
{
await streamAudioSource.BufferCallbackAsync(partialClip.ClipSamples);
}, cancellationToken: destroyCancellationToken);
await new WaitUntil(() => streamAudioSource.IsEmpty || destroyCancellationToken.IsCancellationRequested);
destroyCancellationToken.ThrowIfCancellationRequested();
((AudioSource)streamAudioSource).clip = voiceClip.AudioClip;
var elapsedTime = (float)stopwatch.Elapsed.TotalSeconds;
var playbackTime = voiceClip.Length - elapsedTime;

if (debug)
{
Debug.Log($"Elapsed time: {elapsedTime:F} seconds");
Debug.Log($"voice clip length: {voiceClip.Length:F} seconds");
Debug.Log($"playback time: {playbackTime:F} seconds");
}

await Awaiters.DelayAsync(TimeSpan.FromSeconds(playbackTime + 1f), destroyCancellationToken);

if (debug)
{
Debug.Log($"Full clip: {voiceClip.Id}");
}

((AudioSource)streamAudioSource).PlayOneShot(voiceClip);
}
catch (Exception e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public async Task Test_01_TextToSpeech()
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Debug.Log(voiceClip.Id);
}

Expand All @@ -36,9 +37,10 @@ public async Task Test_02_StreamTextToSpeech()
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Assert.IsNotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.IsNotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
}

[Test]
Expand All @@ -50,9 +52,10 @@ public async Task Test_03_TextToSpeech_Transcription()
var request = new TextToSpeechRequest(voice, "The quick brown fox jumps over the lazy dog.", withTimestamps: true);
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.NotNull(voiceClip.TimestampedTranscriptCharacters);
Assert.IsNotEmpty(voiceClip.TimestampedTranscriptCharacters);
Debug.Log("| Character | Start Time | End Time |");
Expand Down Expand Up @@ -88,9 +91,10 @@ public async Task Test_04_StreamTextToSpeech_Transcription()
Assert.NotNull(partialClips);
Assert.IsNotEmpty(partialClips);
Assert.NotNull(voiceClip);
Assert.IsNotNull(voiceClip.AudioClip);
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.IsNotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.AreEqual(characters.ToArray(), voiceClip.TimestampedTranscriptCharacters);
}

Expand All @@ -111,10 +115,11 @@ public async Task Test_05_LanguageEnforced_TextToSpeech()
languageCode: "cs");
var voiceClip = await ElevenLabsClient.TextToSpeechEndpoint.TextToSpeechAsync(request);
Assert.NotNull(voiceClip);
Assert.NotNull(voiceClip.AudioClip);
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
Debug.Log(voiceClip.Id);
Debug.Log(voiceClip.CachedPath);
Assert.NotNull(voiceClip.AudioClip);
Assert.AreEqual(voiceClip.AudioClip.length, voiceClip.Length, 0.01);
Assert.IsTrue(string.IsNullOrWhiteSpace(voiceClip.CachedPath));
}
}
}
9 changes: 4 additions & 5 deletions ElevenLabs/Packages/com.rest.elevenlabs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
"version": "3.5.0",
"version": "3.5.1",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
Expand All @@ -17,10 +17,9 @@
"url": "https://github.com/StephenHodgson"
},
"dependencies": {
"com.utilities.rest": "3.3.1",
"com.utilities.audio": "2.2.1",
"com.utilities.encoder.ogg": "4.2.0",
"com.utilities.encoder.wav": "2.2.0"
"com.utilities.rest": "3.3.2",
"com.utilities.encoder.ogg": "4.2.1",
"com.utilities.encoder.wav": "2.2.1"
},
"samples": [
{
Expand Down
2 changes: 1 addition & 1 deletion ElevenLabs/Packages/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"com.unity.ide.rider": "3.0.34",
"com.unity.ide.visualstudio": "2.0.22",
"com.unity.test-framework": "1.3.5",
"com.utilities.buildpipeline": "1.6.0"
"com.utilities.buildpipeline": "1.6.1"
},
"scopedRegistries": [
{
Expand Down
Loading