Skip to content

Commit

Permalink
com.rest.elevenlabs 3.5.0 (#117)
Browse files Browse the repository at this point in the history
- added TextToSpeechRequest.ctr overload
  - added seed property
  - added applyTextNormalization property
  - removed depricated optimizeStreamingLatency property
- added VoiceSettings.ctr overload
  - added speed property
- fix audio decoding bug in GeneratedClip
- added TextToSpeechAsync overload with Func(VoiceClip, Task) callback
- updated TextToSpeechDemo with StreamAudioSource example
- updated deps
  - com.utilities.rest -> 3.3.1
  - com.utilities.audio -> 2.2.1
  - com.utilities.encoder.wav -> 2.2.0
  - com.utilities.encoder.ogg -> 4.2.0
  • Loading branch information
StephenHodgson authored Mar 2, 2025
1 parent 689ad11 commit e7c08f9
Show file tree
Hide file tree
Showing 14 changed files with 348 additions and 152 deletions.
58 changes: 47 additions & 11 deletions .github/workflows/unity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,71 @@ on:
branches:
- 'main'
pull_request:
types: [opened, reopened, synchronize, ready_for_review]
branches:
- '*'
- '**'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ (github.event_name == 'pull_request' || github.event.action == 'synchronize') }}
cancel-in-progress: ${{(github.event_name == 'pull_request' || github.event.action == 'synchronize')}}
jobs:
build:
permissions:
checks: write
pull-requests: write
name: ${{ matrix.os }} ${{ matrix.unity-version }} ${{ matrix.build-target }}
runs-on: ${{ matrix.os }}
if: github.event_name != 'pull_request' || !github.event.pull_request.draft
permissions:
contents: read
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-15]
unity-versions: [2021.x, 2022.x, 6000.x]
include:
include: # for each os specify the build targets
- os: ubuntu-latest
unity-version: 2022.x
build-target: Android
- os: ubuntu-latest
unity-version: 6000.x
build-target: Android
- os: ubuntu-latest
unity-version: 2022.x
build-target: StandaloneLinux64
- os: ubuntu-latest
unity-version: 6000.x
build-target: StandaloneLinux64
- os: ubuntu-latest
unity-version: 2022.x
build-target: WebGL
- os: ubuntu-latest
unity-version: 6000.x
build-target: WebGL
- os: windows-latest
unity-version: 2022.x
build-target: StandaloneWindows64
- os: windows-latest
unity-version: 6000.x
build-target: StandaloneWindows64
- os: macos-15
- os: windows-latest
unity-version: 2022.x
build-target: WSAPlayer
- os: windows-latest
unity-version: 6000.x
build-target: WSAPlayer
- os: macos-latest
unity-version: 2022.x
build-target: iOS
- os: macos-latest
unity-version: 6000.x
build-target: iOS
- os: macos-latest
unity-version: 2022.x
build-target: StandaloneOSX
- os: macos-latest
unity-version: 6000.x
build-target: StandaloneOSX
steps:
- uses: actions/checkout@v4
- uses: RageAgainstThePixel/unity-setup@v1
with:
unity-version: ${{ matrix.unity-versions }}
unity-version: ${{ matrix.unity-version }}
build-targets: ${{ matrix.build-target }}
- uses: RageAgainstThePixel/activate-unity-license@v1
with:
Expand All @@ -56,5 +92,5 @@ jobs:
- uses: actions/upload-artifact@v4
if: success() || failure()
with:
name: '${{ github.run_number }}.${{ github.run_attempt }}-${{ matrix.os }}-${{ matrix.unity-versions }}-${{ matrix.build-target }}-Artifacts'
name: '${{ github.run_number }}.${{ github.run_attempt }}-${{ matrix.os }}-${{ matrix.unity-version }}-${{ matrix.build-target }}-Artifacts'
path: '${{ github.workspace }}/**/*.log'
20 changes: 17 additions & 3 deletions ElevenLabs/Packages/com.rest.elevenlabs/Documentation~/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ The recommended installation method is though the unity package manager and [Ope

### Via Unity Package Manager and OpenUPM

#### Terminal

```bash
openupm add com.rest.elevenlabs
```

#### Manual

- Open your Unity project settings
- Select the `Package Manager`
![scoped-registries](images/package-manager-scopes.png)
Expand All @@ -33,9 +41,11 @@ The recommended installation method is though the unity package manager and [Ope

### Via Unity Package Manager and Git url

> [!WARNING]
> This repo has dependencies on other repositories! You are responsible for adding these on your own.
- Open your Unity Package Manager
- Add package from git url: `https://github.com/RageAgainstThePixel/com.rest.elevenlabs.git#upm`
> Note: this repo has dependencies on other repositories! You are responsible for adding these on your own.
- [com.utilities.async](https://github.com/RageAgainstThePixel/com.utilities.async)
- [com.utilities.extensions](https://github.com/RageAgainstThePixel/com.utilities.extensions)
- [com.utilities.audio](https://github.com/RageAgainstThePixel/com.utilities.audio)
Expand All @@ -45,7 +55,11 @@ The recommended installation method is though the unity package manager and [Ope

---

## Documentation
## [Documentation](https://rageagainstthepixel.github.io/ElevenLabs-DotNet)

> Check out our new api docs!
<https://rageagainstthepixel.github.io/ElevenLabs-DotNet>

### Table of Contents

Expand Down Expand Up @@ -290,7 +304,7 @@ var request = new TextToSpeechRequest(voice, message, model: Model.EnglishTurboV
var voiceClip = await api.TextToSpeechEndpoint.StreamTextToSpeechAsync(request, partialClip =>
{
// Note: check demo scene for best practices
// on how to handle playback with OnAudioFilterRead
// on how to handle playback with StreamAudioSource.cs
partialClips.Enqueue(partialClip);
});
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ private void RenderSpeechSynthesis()

if (EditorGUI.EndChangeCheck())
{
currentVoiceSettings = new VoiceSettings(voiceSettingsSliderValues.x, voiceSettingsSliderValues.y, useSpeakerBoost, voiceSettingsSliderValues.z);
currentVoiceSettings = new VoiceSettings(voiceSettingsSliderValues.x, voiceSettingsSliderValues.y, voiceSettingsSliderValues.z, useSpeakerBoost);
}

EditorGUILayout.EndHorizontal();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ internal GeneratedClip(string id, string text, ReadOnlyMemory<byte> clipData, in
SampleRate = sampleRate;
}

private readonly ReadOnlyMemory<byte> audioData;

[SerializeField]
private string id;

Expand All @@ -66,11 +64,15 @@ public AudioClip AudioClip
{
get
{
if (audioClip == null && !audioData.IsEmpty)
if (audioClip == null && !ClipData.IsEmpty)
{
var pcmData = PCMEncoder.Decode(audioData.ToArray());
audioClip = AudioClip.Create(Id, pcmData.Length, 1, SampleRate, false);
audioClip.SetData(pcmData, 0);
var samples = ClipSamples;

if (samples is { Length: > 0 })
{
audioClip = AudioClip.Create(Id, samples.Length, 1, SampleRate, false);
audioClip.SetData(samples, 0);
}
}

if (audioClip == null)
Expand Down Expand Up @@ -98,7 +100,7 @@ public float[] ClipSamples
{
if (!ClipData.IsEmpty)
{
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray());
clipSamples ??= PCMEncoder.Decode(ClipData.ToArray(), PCMFormatSize.SixteenBit, SampleRate, AudioSettings.outputSampleRate);
}
else if (audioClip != null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,7 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Canc
audioData = response.Data;
}

string cachedPath = null;

if (request.CacheFormat != CacheFormat.None)
{
cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);
}
var cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);

return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
{
Expand All @@ -125,6 +120,22 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Canc
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="VoiceClip"/>.</returns>
public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Action<VoiceClip> partialClipCallback, CancellationToken cancellationToken = default)
{
return await TextToSpeechAsync(request, async voiceClip =>
{
partialClipCallback.Invoke(voiceClip);
await Task.Yield();
}, cancellationToken).ConfigureAwait(false);
}

/// <summary>
/// Converts text to synthesized speech.
/// </summary>
/// <param name="request"><see cref="TextToSpeechRequest"/>.</param>
/// <param name="partialClipCallback">Partial <see cref="VoiceClip"/> callback with streaming data.</param>
/// <param name="cancellationToken">Optional, <see cref="CancellationToken"/>.</param>
/// <returns><see cref="VoiceClip"/>.</returns>
public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Func<VoiceClip, Task> partialClipCallback, CancellationToken cancellationToken = default)
{
if (request.OutputFormat is not OutputFormat.PCM_16000 and not OutputFormat.PCM_22050 and not OutputFormat.PCM_24000 and not OutputFormat.PCM_44100)
{
Expand Down Expand Up @@ -167,19 +178,14 @@ public async Task<VoiceClip> TextToSpeechAsync(TextToSpeechRequest request, Acti
}

var audioData = request.WithTimestamps ? accumulatedPCMData!.ToArray() : response.Data;
string cachedPath = null;

if (request.CacheFormat != CacheFormat.None)
{
cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);
}
var cachedPath = await SaveAudioToCache(audioData, clipId, request.Voice, request.OutputFormat, request.CacheFormat, cancellationToken).ConfigureAwait(true);

return new VoiceClip(clipId, request.Text, request.Voice, new ReadOnlyMemory<byte>(audioData), request.OutputFormat.GetSampleRate(), cachedPath)
{
TimestampedTranscriptCharacters = accumulatedTranscriptData?.ToArray() ?? Array.Empty<TimestampedTranscriptCharacter>()
};

void StreamCallback(Response partialResponse)
async void StreamCallback(Response partialResponse)
{
try
{
Expand All @@ -188,7 +194,7 @@ void StreamCallback(Response partialResponse)
throw new ArgumentException("Failed to parse clip id!");
}

partialClipCallback.Invoke(new VoiceClip($"{clipId}_{++part}", request.Text, request.Voice, new ReadOnlyMemory<byte>(partialResponse.Data), frequency));
await partialClipCallback.Invoke(new VoiceClip($"{clipId}_{++part}", request.Text, request.Voice, new ReadOnlyMemory<byte>(partialResponse.Data), frequency));
}
catch (Exception e)
{
Expand Down Expand Up @@ -264,16 +270,24 @@ private static Dictionary<string, string> CreateRequestParameters(TextToSpeechRe
{ OutputFormatParameter, request.OutputFormat.ToString().ToLower() }
};

#pragma warning disable CS0618 // Type or member is obsolete
if (request.OptimizeStreamingLatency.HasValue)
{
parameters.Add(OptimizeStreamingLatencyParameter, request.OptimizeStreamingLatency.Value.ToString());
}
#pragma warning restore CS0618 // Type or member is obsolete

return parameters;
}

private static async Task<string> SaveAudioToCache(byte[] audioData, string clipId, Voice voice, OutputFormat outputFormat, CacheFormat cacheFormat, CancellationToken cancellationToken)
{
#if PLATFORM_WEBGL
await Task.Yield();
return null;
#else
if (cacheFormat == CacheFormat.None) { return null; }

string extension;
AudioType audioType;

Expand All @@ -294,7 +308,6 @@ private static async Task<string> SaveAudioToCache(byte[] audioData, string clip
extension = "ogg";
audioType = AudioType.OGGVORBIS;
break;
case CacheFormat.None:
default:
throw new ArgumentOutOfRangeException(nameof(cacheFormat), cacheFormat, null);
}
Expand Down Expand Up @@ -325,6 +338,7 @@ private static async Task<string> SaveAudioToCache(byte[] audioData, string clip
}

return cachedPath;
#endif // PLATFORM_WEBGL
}

}
Expand Down
Loading

0 comments on commit e7c08f9

Please sign in to comment.