Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions abogen/tts_camb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

import logging
import os
from dataclasses import dataclass
from typing import Any, Iterator, Optional

import numpy as np

from abogen.tts_supertonic import _ensure_float32_mono, _split_text


logger = logging.getLogger(__name__)


DEFAULT_CAMB_MODELS = ("mars-flash", "mars-pro", "mars-instruct")
DEFAULT_CAMB_VOICE_ID = 147320
DEFAULT_CAMB_LANGUAGE = "en-us"


@dataclass
class CambSegment:
graphemes: str
audio: np.ndarray


class CambPipeline:
"""Adapter that mimics Kokoro/SuperTonic's pipeline iteration interface for Camb AI."""

def __init__(
self,
*,
sample_rate: int,
api_key: Optional[str] = None,
model: str = "mars-flash",
voice_id: int = DEFAULT_CAMB_VOICE_ID,
language: str = DEFAULT_CAMB_LANGUAGE,
max_chunk_length: int = 500,
) -> None:
self.sample_rate = int(sample_rate)
self.model = model if model in DEFAULT_CAMB_MODELS else "mars-flash"
self.voice_id = int(voice_id)
self.language = language or DEFAULT_CAMB_LANGUAGE
self.max_chunk_length = int(max_chunk_length)

resolved_key = api_key or os.environ.get("CAMB_API_KEY") or ""
if not resolved_key:
raise RuntimeError(
"Camb AI API key is required. Set the CAMB_API_KEY environment variable "
"or provide an API key in the settings."
)

try:
from camb.client import CambAI # type: ignore[import-not-found]
except Exception as exc:
raise RuntimeError(
"camb-sdk is not installed. Install it with `pip install camb-sdk`."
) from exc

self._client = CambAI(api_key=resolved_key)

def __call__(
self,
text: str,
*,
voice: Any,
speed: float,
split_pattern: Optional[str] = None,
model: Optional[str] = None,
language: Optional[str] = None,
) -> Iterator[CambSegment]:
from camb.types.stream_tts_output_configuration import StreamTtsOutputConfiguration # type: ignore[import-not-found]
from camb.types.stream_tts_voice_settings import StreamTtsVoiceSettings # type: ignore[import-not-found]

voice_id = self.voice_id
if isinstance(voice, int):
voice_id = voice
elif isinstance(voice, str):
try:
voice_id = int(voice)
except (TypeError, ValueError):
pass

speech_model = model or self.model
if speech_model not in DEFAULT_CAMB_MODELS:
speech_model = "mars-flash"

lang = language or self.language
speed_value = float(speed) if speed is not None else 1.0
speed_value = max(0.5, min(2.0, speed_value))

chunks = _split_text(
text, split_pattern=split_pattern, max_chunk_length=self.max_chunk_length
)

for chunk in chunks:
try:
stream = self._client.text_to_speech.tts(
text=chunk,
voice_id=voice_id,
language=lang,
speech_model=speech_model,
output_configuration=StreamTtsOutputConfiguration(
format="pcm_f32le",
sample_rate=self.sample_rate,
),
voice_settings=StreamTtsVoiceSettings(
speaking_rate=speed_value,
),
)

# Collect all streamed bytes for this chunk.
raw_bytes = b"".join(stream)
if not raw_bytes:
logger.warning("Camb AI returned empty audio for chunk: %s", chunk[:60])
continue

audio = np.frombuffer(raw_bytes, dtype="<f4").astype("float32", copy=False)
audio = _ensure_float32_mono(audio)

if audio.size == 0:
continue

yield CambSegment(graphemes=chunk, audio=audio)

except Exception as exc:
logger.error("Camb AI synthesis failed for chunk: %s — %s", chunk[:60], exc)
raise
26 changes: 25 additions & 1 deletion abogen/voice_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def normalize_profile_entry(entry: Any) -> Dict[str, Any]:
return {}

provider = str(entry.get("provider") or "kokoro").strip().lower()
if provider not in {"kokoro", "supertonic"}:
if provider not in {"kokoro", "supertonic", "camb"}:
provider = "kokoro"

language = str(entry.get("language") or "a").strip().lower() or "a"
Expand All @@ -123,6 +123,30 @@ def normalize_profile_entry(entry: Any) -> Dict[str, Any]:
),
}

if provider == "camb":
voice_id = entry.get("voice_id") or entry.get("camb_voice_id") or 147320
try:
voice_id = int(voice_id)
except (TypeError, ValueError):
voice_id = 147320
model = str(entry.get("model") or entry.get("camb_model") or "mars-flash").strip()
if model not in {"mars-flash", "mars-pro", "mars-instruct"}:
model = "mars-flash"
speed = 1.0
raw_speed = entry.get("speed") or entry.get("camb_speed")
if raw_speed is not None:
try:
speed = max(0.5, min(2.0, float(raw_speed)))
except (TypeError, ValueError):
pass
return {
"provider": "camb",
"language": language,
"voice_id": voice_id,
"model": model,
"speed": speed,
}

voices = _normalize_voice_entries(entry.get("voices", []))
if not voices:
return {}
Expand Down
40 changes: 38 additions & 2 deletions abogen/webui/conversion_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from abogen.pronunciation_store import increment_usage
from abogen.llm_client import LLMClientError
from abogen.tts_supertonic import DEFAULT_SUPERTONIC_VOICES, SupertonicPipeline
from abogen.tts_camb import CambPipeline

from .service import Job, JobStatus

Expand Down Expand Up @@ -126,6 +127,12 @@ def _infer_provider_from_spec(value: Any, fallback: str = "kokoro") -> str:
return "supertonic"
if "*" in raw or "+" in raw:
return "kokoro"
# Pure integer values indicate a Camb AI voice ID.
try:
int(raw)
return "camb"
except (TypeError, ValueError):
pass
return fallback


Expand Down Expand Up @@ -1573,7 +1580,7 @@ def run_conversion_job(job: Job) -> None:
def get_pipeline(provider: str) -> Any:
nonlocal kokoro_cache_ready
provider_norm = str(provider or "kokoro").strip().lower() or "kokoro"
if provider_norm not in {"kokoro", "supertonic"}:
if provider_norm not in {"kokoro", "supertonic", "camb"}:
provider_norm = "kokoro"

existing = pipelines.get(provider_norm)
Expand All @@ -1588,6 +1595,16 @@ def get_pipeline(provider: str) -> Any:
)
return pipelines[provider_norm]

if provider_norm == "camb":
pipelines[provider_norm] = CambPipeline(
sample_rate=SAMPLE_RATE,
api_key=getattr(job, "camb_api_key", None),
model=getattr(job, "camb_model", "mars-flash") or "mars-flash",
voice_id=int(getattr(job, "camb_voice_id", 147320) or 147320),
language=getattr(job, "camb_language", "en-us") or "en-us",
)
return pipelines[provider_norm]

# Kokoro
cfg = load_config()
disable_gpu = not job.use_gpu or not cfg.get("use_gpu", True)
Expand Down Expand Up @@ -1621,13 +1638,19 @@ def resolve_voice_target(raw_spec: str) -> tuple[str, str, Optional[float], Opti
steps = int(entry.get("total_steps") or getattr(job, "supertonic_total_steps", 5) or 5)
speed = float(entry.get("speed") or getattr(job, "speed", 1.0) or 1.0)
return "supertonic", _supertonic_voice_from_spec(voice, getattr(job, "voice", "M1")), speed, steps
if provider == "camb":
voice_id = str(entry.get("voice_id") or getattr(job, "camb_voice_id", 147320) or 147320)
speed = float(entry.get("speed") or getattr(job, "speed", 1.0) or 1.0)
return "camb", voice_id, speed, None
formula = _formula_from_kokoro_entry(entry)
return "kokoro", formula or spec, None, None

fallback_provider = str(getattr(job, "tts_provider", "kokoro") or "kokoro").strip().lower() or "kokoro"
inferred = _infer_provider_from_spec(spec, fallback=fallback_provider)
if inferred == "supertonic":
return "supertonic", _supertonic_voice_from_spec(spec, getattr(job, "voice", "M1")), None, None
if inferred == "camb":
return "camb", spec, None, None
return "kokoro", spec, None, None

def resolve_voice_choice(raw_spec: str) -> tuple[str, str, Any, Optional[float], Optional[int]]:
Expand Down Expand Up @@ -1849,7 +1872,20 @@ def emit_text(
local_segments = 0

provider = str(tts_provider or getattr(job, "tts_provider", "kokoro") or "kokoro").strip().lower() or "kokoro"
if provider == "supertonic":
if provider == "camb":
camb_pipeline = get_pipeline("camb")
voice_id = voice_choice
try:
voice_id = int(voice_choice)
except (TypeError, ValueError):
voice_id = int(getattr(job, "camb_voice_id", 147320) or 147320)
segment_iter = camb_pipeline(
normalized,
voice=voice_id,
speed=float(speed_override if speed_override is not None else job.speed),
split_pattern=split_pattern,
)
elif provider == "supertonic":
supertonic_pipeline = get_pipeline("supertonic")
voice_name = _supertonic_voice_from_spec(voice_choice, getattr(job, "voice", "M1"))
segment_iter = supertonic_pipeline(
Expand Down
48 changes: 45 additions & 3 deletions abogen/webui/routes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def api_save_voice_profile() -> ResponseReturnValue:
if profile is None:
# Speaker Studio payload format
provider = str(payload.get("provider") or "kokoro").strip().lower()
if provider not in {"kokoro", "supertonic"}:
if provider not in {"kokoro", "supertonic", "camb"}:
provider = "kokoro"
if provider == "supertonic":
profile = {
Expand All @@ -73,6 +73,14 @@ def api_save_voice_profile() -> ResponseReturnValue:
"total_steps": payload.get("total_steps") or payload.get("supertonic_total_steps"),
"speed": payload.get("speed") or payload.get("supertonic_speed"),
}
elif provider == "camb":
profile = {
"provider": "camb",
"language": str(payload.get("camb_language") or payload.get("language") or "en-us").strip().lower() or "en-us",
"voice_id": payload.get("camb_voice_id") or payload.get("voice_id") or 147320,
"model": payload.get("camb_model") or payload.get("model") or "mars-flash",
"speed": payload.get("speed") or 1.0,
}
else:
profile = {
"provider": "kokoro",
Expand Down Expand Up @@ -168,7 +176,17 @@ def api_voice_profiles_preview() -> ResponseReturnValue:
resolved_provider = provider or "kokoro"

profiles = load_profiles()
if resolved_provider == "supertonic" and not profile_name:
camb_model = str(payload.get("camb_model") or "").strip() or settings.get("camb_model", "mars-flash")
camb_voice_id = int(payload.get("camb_voice_id") or settings.get("camb_voice_id", 147320))
camb_api_key = str(payload.get("camb_api_key") or "").strip() or settings.get("camb_api_key") or None
camb_language = str(payload.get("camb_language") or "").strip() or settings.get("camb_language", "en-us")

if resolved_provider == "camb" and not profile_name:
voice_spec = str(payload.get("camb_voice_id") or payload.get("voice_id") or camb_voice_id)
camb_model = str(payload.get("camb_model") or payload.get("model") or camb_model)
camb_language = str(payload.get("camb_language") or camb_language)
speed = coerce_float(payload.get("speed"), speed)
elif resolved_provider == "supertonic" and not profile_name:
voice_spec = str(payload.get("voice") or payload.get("supertonic_voice") or "M1").strip() or "M1"
# Allow per-speaker overrides via payload.
supertonic_total_steps = int(payload.get("supertonic_total_steps") or payload.get("total_steps") or supertonic_total_steps)
Expand All @@ -183,6 +201,11 @@ def api_voice_profiles_preview() -> ResponseReturnValue:
voice_spec = str(normalized_entry.get("voice") or "M1")
supertonic_total_steps = int(normalized_entry.get("total_steps") or supertonic_total_steps)
speed = float(normalized_entry.get("speed") or speed)
elif resolved_provider == "camb":
camb_voice_id = int(normalized_entry.get("voice_id") or camb_voice_id)
camb_model = str(normalized_entry.get("model") or camb_model)
voice_spec = str(camb_voice_id)
speed = float(normalized_entry.get("speed") or speed)
else:
voice_spec = formula_from_profile(normalized_entry) or ""
language = str(normalized_entry.get("language") or language)
Expand All @@ -209,6 +232,10 @@ def api_voice_profiles_preview() -> ResponseReturnValue:
use_gpu=use_gpu,
tts_provider=resolved_provider,
supertonic_total_steps=supertonic_total_steps,
camb_model=camb_model,
camb_voice_id=camb_voice_id,
camb_api_key=camb_api_key,
camb_language=camb_language,
max_seconds=max_seconds,
)
except Exception as exc:
Expand All @@ -230,7 +257,12 @@ def api_speaker_preview() -> ResponseReturnValue:
use_gpu = settings.get("use_gpu", False)

base_spec, speaker_name = split_profile_spec(voice)
resolved_provider = tts_provider if tts_provider in {"kokoro", "supertonic"} else ""
camb_model = str(payload.get("camb_model") or "").strip() or settings.get("camb_model", "mars-flash")
camb_voice_id = int(payload.get("camb_voice_id") or settings.get("camb_voice_id", 147320))
camb_api_key = str(payload.get("camb_api_key") or "").strip() or settings.get("camb_api_key") or None
camb_language = str(payload.get("camb_language") or "").strip() or settings.get("camb_language", "en-us")

resolved_provider = tts_provider if tts_provider in {"kokoro", "supertonic", "camb"} else ""

if speaker_name:
entry = normalize_profile_entry(load_profiles().get(speaker_name))
Expand All @@ -241,6 +273,12 @@ def api_speaker_preview() -> ResponseReturnValue:
supertonic_total_steps = int(entry.get("total_steps") or supertonic_total_steps)
if speed_value is None:
speed = coerce_float(entry.get("speed"), speed)
elif resolved_provider == "camb":
camb_voice_id = int(entry.get("voice_id") or camb_voice_id)
camb_model = str(entry.get("model") or camb_model)
voice = str(camb_voice_id)
if speed_value is None:
speed = coerce_float(entry.get("speed"), speed)
elif resolved_provider == "kokoro":
voice = formula_from_profile(entry) or (base_spec or voice)

Expand Down Expand Up @@ -270,6 +308,10 @@ def api_speaker_preview() -> ResponseReturnValue:
,
tts_provider=resolved_provider,
supertonic_total_steps=supertonic_total_steps or int(settings.get("supertonic_total_steps") or 5),
camb_model=camb_model,
camb_voice_id=camb_voice_id,
camb_api_key=camb_api_key,
camb_language=camb_language,
pronunciation_overrides=pronunciation_overrides,
manual_overrides=manual_overrides,
speakers=speakers,
Expand Down
13 changes: 13 additions & 0 deletions abogen/webui/routes/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ def update_settings() -> ResponseReturnValue:
current["supertonic_speed"] = max(0.7, min(2.0, float(form.get("supertonic_speed", current.get("supertonic_speed", 1.0)))))
except (TypeError, ValueError):
pass
camb_model = (form.get("camb_model") or "").strip().lower()
if camb_model in {"mars-flash", "mars-pro", "mars-instruct"}:
current["camb_model"] = camb_model
try:
current["camb_voice_id"] = max(1, int(form.get("camb_voice_id", current.get("camb_voice_id", 147320))))
except (TypeError, ValueError):
pass
camb_api_key = form.get("camb_api_key")
if camb_api_key is not None:
current["camb_api_key"] = camb_api_key.strip()
camb_language = (form.get("camb_language") or "").strip().lower()
if camb_language:
current["camb_language"] = camb_language
current["output_format"] = (form.get("output_format") or "mp3").strip()
current["subtitle_mode"] = (form.get("subtitle_mode") or "Disabled").strip()
current["subtitle_format"] = (form.get("subtitle_format") or "srt").strip()
Expand Down
Loading