Skip to content

Commit

Permalink
Add YuE (music gen) from fal.ai (#2801)
Browse files Browse the repository at this point in the history
* YuE first test (fal.ai provider)

* pass genres as extra parameter

* docstring

* things
  • Loading branch information
Wauplin authored Jan 31, 2025
1 parent 07e1adb commit 63aada5
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 2 deletions.
31 changes: 31 additions & 0 deletions src/huggingface_hub/inference/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2750,6 +2750,37 @@ def text_to_speech(
... )
>>> Path("hello.flac").write_bytes(audio)
```
Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
```py
>>> from huggingface_hub import InferenceClient
>>> lyrics = '''
... [verse]
... In the town where I was born
... Lived a man who sailed to sea
... And he told us of his life
... In the land of submarines
... So we sailed on to the sun
... 'Til we found a sea of green
... And we lived beneath the waves
... In our yellow submarine
... [chorus]
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... '''
>>> genres = "pavarotti-style tenor voice"
>>> client = InferenceClient(
... provider="fal-ai",
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
>>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres})
>>> with open("output.mp3", "wb") as f:
... f.write(audio)
```
"""
provider_helper = get_provider_helper(self.provider, task="text-to-speech")
request_parameters = provider_helper.prepare_request(
Expand Down
31 changes: 31 additions & 0 deletions src/huggingface_hub/inference/_generated/_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2808,6 +2808,37 @@ async def text_to_speech(
... )
>>> Path("hello.flac").write_bytes(audio)
```
Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
```py
>>> from huggingface_hub import InferenceClient
>>> lyrics = '''
... [verse]
... In the town where I was born
... Lived a man who sailed to sea
... And he told us of his life
... In the land of submarines
... So we sailed on to the sun
... 'Til we found a sea of green
... And we lived beneath the waves
... In our yellow submarine
... [chorus]
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... We all live in a yellow submarine
... Yellow submarine, yellow submarine
... '''
>>> genres = "pavarotti-style tenor voice"
>>> client = InferenceClient(
... provider="fal-ai",
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
... api_key=...,
... )
>>> audio = client.text_to_speech(lyrics, extra_parameters={"genres": genres})
>>> with open("output.mp3", "wb") as f:
... f.write(audio)
```
"""
provider_helper = get_provider_helper(self.provider, task="text-to-speech")
request_parameters = provider_helper.prepare_request(
Expand Down
10 changes: 8 additions & 2 deletions src/huggingface_hub/inference/_providers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from typing import Dict, Literal

from .._common import TaskProviderHelper
from .fal_ai import FalAIAutomaticSpeechRecognitionTask, FalAITextToImageTask, FalAITextToVideoTask
from .fal_ai import (
FalAIAutomaticSpeechRecognitionTask,
FalAITextToImageTask,
FalAITextToSpeechTask,
FalAITextToVideoTask,
)
from .hf_inference import HFInferenceBinaryInputTask, HFInferenceConversational, HFInferenceTask
from .replicate import ReplicateTask, ReplicateTextToSpeechTask
from .sambanova import SambanovaConversationalTask
Expand All @@ -18,8 +23,9 @@

PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
"fal-ai": {
"text-to-image": FalAITextToImageTask(),
"automatic-speech-recognition": FalAIAutomaticSpeechRecognitionTask(),
"text-to-image": FalAITextToImageTask(),
"text-to-speech": FalAITextToSpeechTask(),
"text-to-video": FalAITextToVideoTask(),
},
"hf-inference": {
Expand Down
18 changes: 18 additions & 0 deletions src/huggingface_hub/inference/_providers/fal_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
"stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
"Warlord-K/Sana-1024": "fal-ai/sana",
},
"text-to-speech": {
"m-a-p/YuE-s1-7B-anneal-en-cot": "fal-ai/yue",
},
"text-to-video": {
"genmo/mochi-1-preview": "fal-ai/mochi-v1",
"tencent/HunyuanVideo": "fal-ai/hunyuan-video",
Expand Down Expand Up @@ -146,6 +149,21 @@ def get_response(self, response: Union[bytes, Dict]) -> Any:
return get_session().get(url).content


class FalAITextToSpeechTask(FalAITask):
def __init__(self):
super().__init__("text-to-speech")

def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]:
return {
"lyrics": inputs,
**{k: v for k, v in parameters.items() if v is not None},
}

def get_response(self, response: Union[bytes, Dict]) -> Any:
url = _as_dict(response)["audio"]["url"]
return get_session().get(url).content


class FalAITextToVideoTask(FalAITask):
def __init__(self):
super().__init__("text-to-video")
Expand Down

0 comments on commit 63aada5

Please sign in to comment.