Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,17 @@ ABOGEN_GID=1000
# Optional: Seed the web UI with working defaults for the LLM-powered
# text normalization features. Leave these blank to configure everything
# from the Settings page.

# --- Ollama (local) ---
ABOGEN_LLM_BASE_URL=http://localhost:11434 # Supply the server root; /v1 is added automatically.
ABOGEN_LLM_API_KEY=ollama
ABOGEN_LLM_MODEL=llama3.1:8b

# --- MiniMax Cloud ---
# ABOGEN_LLM_BASE_URL=https://api.minimax.io/v1
# ABOGEN_LLM_API_KEY=your-minimax-api-key
# ABOGEN_LLM_MODEL=MiniMax-M2.7

ABOGEN_LLM_TIMEOUT=45
ABOGEN_LLM_CONTEXT_MODE=sentence
# For custom prompts, keep the text on a single line or escape newlines.
Expand Down
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,22 @@ docker run --rm \
## `LLM-assisted text normalization`
Abogen can hand tricky apostrophes and contractions to an OpenAI-compatible large language model. Configure it from **Settings → LLM**:

1. Enter the base URL for your endpoint (Ollama, OpenAI proxy, etc.) and an API key if required. Use the server root (for Ollama: `http://localhost:11434`)—Abogen appends `/v1/...` automatically, but it also accepts inputs that already end in `/v1`.
2. Click **Refresh models** to load the catalog, pick a default model, and adjust the timeout or prompt template.
1. Pick a **Provider** from the dropdown (MiniMax, OpenAI, DeepSeek, Ollama) to auto-fill the endpoint and available models, or choose *Custom endpoint* to enter any OpenAI-compatible URL manually.
2. Enter an API key if required, then click **Refresh models** to load the catalog. Pick a default model and adjust the timeout or prompt template.
3. Use the preview box to test the prompt, then save the settings. The Normalization panel can synthesize a short audio preview with the current configuration.

When you are running inside Docker or a CI pipeline, seed the form automatically with `ABOGEN_LLM_*` variables in your `.env` file. The `.env.example` file includes sample values for a local Ollama server.
### Supported providers

| Provider | Base URL | Models |
|----------|----------|--------|
| **MiniMax** | `https://api.minimax.io/v1` | MiniMax-M2.7, MiniMax-M2.5-highspeed, … |
| **OpenAI** | `https://api.openai.com/v1` | gpt-4o, gpt-4o-mini, … |
| **DeepSeek** | `https://api.deepseek.com/v1` | deepseek-chat, deepseek-reasoner |
| **Ollama** | `http://localhost:11434/v1` | *(local models)* |

Any service that exposes `/v1/chat/completions` (e.g. LM Studio, vLLM, text-generation-webui) also works via *Custom endpoint*.

When you are running inside Docker or a CI pipeline, seed the form automatically with `ABOGEN_LLM_*` variables in your `.env` file. The `.env.example` file includes sample values for a local Ollama server and MiniMax Cloud.

## `Audiobookshelf integration`
Abogen can push finished audiobooks directly into Audiobookshelf. Configure this under **Settings → Integrations → Audiobookshelf** by providing:
Expand Down
99 changes: 99 additions & 0 deletions abogen/llm_providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Built-in LLM provider presets for quick configuration.

Each preset bundles the endpoint URL, a list of known models, and the
environment variable that typically holds the API key. The Web UI
uses these presets so users can pick a provider from a dropdown instead
of typing the URL manually.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Dict, List, Sequence, Tuple


@dataclass(frozen=True)
class LLMProviderPreset:
"""A preconfigured cloud or local LLM endpoint."""

id: str
name: str
base_url: str
api_key_env: str = ""
api_key_hint: str = ""
models: Tuple[str, ...] = ()

def to_dict(self) -> Dict[str, object]:
return {
"id": self.id,
"name": self.name,
"base_url": self.base_url,
"api_key_env": self.api_key_env,
"api_key_hint": self.api_key_hint,
"models": list(self.models),
}


_BUILTIN_PRESETS: Tuple[LLMProviderPreset, ...] = (
LLMProviderPreset(
id="minimax",
name="MiniMax",
base_url="https://api.minimax.io/v1",
api_key_env="MINIMAX_API_KEY",
api_key_hint="Get your key at https://platform.minimax.io",
models=(
"MiniMax-M1",
"MiniMax-Text-01",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
),
),
LLMProviderPreset(
id="openai",
name="OpenAI",
base_url="https://api.openai.com/v1",
api_key_env="OPENAI_API_KEY",
api_key_hint="Get your key at https://platform.openai.com/api-keys",
models=(
"gpt-4o",
"gpt-4o-mini",
"gpt-4.1",
"gpt-4.1-mini",
"gpt-4.1-nano",
),
),
LLMProviderPreset(
id="deepseek",
name="DeepSeek",
base_url="https://api.deepseek.com/v1",
api_key_env="DEEPSEEK_API_KEY",
api_key_hint="Get your key at https://platform.deepseek.com",
models=(
"deepseek-chat",
"deepseek-reasoner",
),
),
LLMProviderPreset(
id="ollama",
name="Ollama (local)",
base_url="http://localhost:11434/v1",
api_key_env="",
api_key_hint='Use "ollama" or leave blank',
models=(),
),
)


def get_provider_presets() -> Sequence[LLMProviderPreset]:
"""Return all built-in provider presets."""
return _BUILTIN_PRESETS


def get_provider_by_id(provider_id: str) -> LLMProviderPreset | None:
"""Look up a single preset by its identifier."""
for preset in _BUILTIN_PRESETS:
if preset.id == provider_id:
return preset
return None
1 change: 1 addition & 0 deletions abogen/normalization_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)

_SETTINGS_DEFAULTS: Dict[str, Any] = {
"llm_provider": "",
"llm_base_url": "",
"llm_api_key": "",
"llm_model": "",
Expand Down
2 changes: 2 additions & 0 deletions abogen/webui/routes/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from abogen.webui.debug_tts_runner import run_debug_tts_wavs
from abogen.debug_tts_samples import DEBUG_TTS_SAMPLES
from abogen.utils import get_user_output_path, load_config
from abogen.llm_providers import get_provider_presets

settings_bp = Blueprint("settings", __name__)

Expand Down Expand Up @@ -216,6 +217,7 @@ def settings_page() -> str | ResponseReturnValue:
save_locations=save_locations,
default_output_dir=default_output_dir,
llm_ready=llm_ready(load_settings()),
llm_provider_presets=[p.to_dict() for p in get_provider_presets()],
debug_samples=DEBUG_TTS_SAMPLES,
debug_manifest=debug_manifest,
)
Expand Down
3 changes: 2 additions & 1 deletion abogen/webui/routes/utils/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def settings_defaults() -> Dict[str, Any]:
"speaker_analysis_threshold": _DEFAULT_ANALYSIS_THRESHOLD,
"speaker_pronunciation_sentence": "This is {{name}} speaking.",
"speaker_random_languages": [],
"llm_provider": "",
"llm_base_url": llm_env_defaults.get("llm_base_url", ""),
"llm_api_key": llm_env_defaults.get("llm_api_key", ""),
"llm_model": llm_env_defaults.get("llm_model", ""),
Expand Down Expand Up @@ -344,7 +345,7 @@ def normalize_setting_value(key: str, value: Any, defaults: Dict[str, Any]) -> A
if key == "llm_prompt":
candidate = str(value or "").strip()
return candidate if candidate else defaults[key]
if key in {"llm_base_url", "llm_api_key", "llm_model"}:
if key in {"llm_provider", "llm_base_url", "llm_api_key", "llm_model"}:
return str(value or "").strip()
if key == "speaker_random_languages":
if isinstance(value, (list, tuple, set)):
Expand Down
52 changes: 52 additions & 0 deletions abogen/webui/static/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -367,16 +367,67 @@ function collectLLMFields() {
const prompt = form.querySelector('#llm_prompt');
const timeout = form.querySelector('#llm_timeout');
const context = form.querySelector('input[name="llm_context_mode"]:checked');
const provider = form.querySelector('#llm_provider');
return {
base_url: baseUrl ? baseUrl.value.trim() : '',
api_key: apiKey ? apiKey.value.trim() : '',
model: model ? model.value.trim() : '',
prompt: prompt ? prompt.value : '',
context_mode: context ? context.value : 'sentence',
timeout: timeout ? parseNumber(timeout.value, 30) : 30,
provider: provider ? provider.value : '',
};
}

function getProviderPresets() {
const select = form.querySelector('#llm_provider');
if (!select || !select.dataset.presets) {
return [];
}
try {
return JSON.parse(select.dataset.presets);
} catch (_) {
return [];
}
}

function applyProviderPreset(providerId) {
const presets = getProviderPresets();
const preset = presets.find((p) => p.id === providerId);
const baseUrlInput = form.querySelector('#llm_base_url');
const apiKeyInput = form.querySelector('#llm_api_key');
const apiKeyHint = document.querySelector('#llm_api_key_hint');

if (!preset) {
if (apiKeyHint) {
apiKeyHint.innerHTML = 'Leave blank or use <code>ollama</code> for local servers that do not require keys.';
}
return;
}

if (baseUrlInput) {
baseUrlInput.value = preset.base_url;
}
if (apiKeyHint && preset.api_key_hint) {
apiKeyHint.textContent = preset.api_key_hint;
}
if (preset.models && preset.models.length) {
const models = preset.models.map((id) => ({ id, label: id }));
updateModelOptions(models);
}
updateLLMNavState();
}

function initProviderDropdown() {
const providerSelect = form.querySelector('#llm_provider');
if (!providerSelect) {
return;
}
providerSelect.addEventListener('change', () => {
applyProviderPreset(providerSelect.value);
});
}

function updateModelOptions(models) {
const select = form.querySelector('#llm_model');
if (!select) {
Expand Down Expand Up @@ -879,4 +930,5 @@ if (form) {
initFolderPicker();
initContractionModal();
initLLMStateWatchers();
initProviderDropdown();
}
12 changes: 11 additions & 1 deletion abogen/webui/templates/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,16 @@ <h1 class="card__title">Application Settings</h1>
<section class="settings-panel" data-section="llm">
<fieldset class="settings__section">
<legend>Endpoint</legend>
<div class="field">
<label for="llm_provider">Provider</label>
<select id="llm_provider" name="llm_provider" data-presets='{{ llm_provider_presets | tojson }}'>
<option value="">Custom endpoint</option>
{% for preset in llm_provider_presets %}
<option value="{{ preset.id }}" {% if settings.llm_provider == preset.id %}selected{% endif %}>{{ preset.name }}</option>
{% endfor %}
</select>
<p class="hint">Pick a cloud provider to auto-fill the endpoint, or choose <em>Custom endpoint</em> to enter any OpenAI-compatible URL.</p>
</div>
<div class="field">
<label for="llm_base_url">Base URL</label>
<input type="url" id="llm_base_url" name="llm_base_url" value="{{ settings.llm_base_url }}" placeholder="https://localhost:11434/v1">
Expand All @@ -284,7 +294,7 @@ <h1 class="card__title">Application Settings</h1>
<div class="field">
<label for="llm_api_key">API Key</label>
<input type="text" id="llm_api_key" name="llm_api_key" value="{{ settings.llm_api_key }}" autocomplete="off" placeholder="ollama">
<p class="hint">Leave blank or use <code>ollama</code> for local servers that do not require keys.</p>
<p class="hint" id="llm_api_key_hint">Leave blank or use <code>ollama</code> for local servers that do not require keys.</p>
</div>
<div class="field field--inline">
<div class="field__group">
Expand Down
76 changes: 76 additions & 0 deletions tests/test_llm_providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Tests for the LLM provider presets module."""

from __future__ import annotations

import pytest

from abogen.llm_providers import (
LLMProviderPreset,
get_provider_presets,
get_provider_by_id,
)


def test_get_provider_presets_returns_non_empty():
presets = get_provider_presets()
assert len(presets) >= 4


def test_minimax_preset_exists():
preset = get_provider_by_id("minimax")
assert preset is not None
assert preset.name == "MiniMax"
assert preset.base_url == "https://api.minimax.io/v1"
assert preset.api_key_env == "MINIMAX_API_KEY"
assert len(preset.models) >= 1
assert "MiniMax-M2.7" in preset.models


def test_openai_preset_exists():
preset = get_provider_by_id("openai")
assert preset is not None
assert preset.base_url == "https://api.openai.com/v1"


def test_ollama_preset_has_no_models():
preset = get_provider_by_id("ollama")
assert preset is not None
assert preset.models == ()


def test_get_provider_by_id_returns_none_for_unknown():
assert get_provider_by_id("nonexistent") is None
assert get_provider_by_id("") is None


def test_preset_ids_are_unique():
presets = get_provider_presets()
ids = [p.id for p in presets]
assert len(ids) == len(set(ids))


def test_to_dict_has_required_keys():
preset = get_provider_by_id("minimax")
d = preset.to_dict()
assert set(d.keys()) == {"id", "name", "base_url", "api_key_env", "api_key_hint", "models"}
assert isinstance(d["models"], list)
assert d["id"] == "minimax"


def test_preset_is_frozen():
preset = get_provider_by_id("minimax")
with pytest.raises(AttributeError):
preset.name = "changed"


def test_all_presets_have_base_url():
for preset in get_provider_presets():
assert preset.base_url, f"Preset {preset.id!r} missing base_url"


def test_normalization_settings_includes_llm_provider():
"""The llm_provider key must exist in the settings defaults."""
from abogen.normalization_settings import _SETTINGS_DEFAULTS

assert "llm_provider" in _SETTINGS_DEFAULTS
assert _SETTINGS_DEFAULTS["llm_provider"] == ""
35 changes: 35 additions & 0 deletions tests/test_llm_providers_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Integration tests for the LLM provider presets in the settings pipeline."""

from __future__ import annotations

from abogen.llm_providers import get_provider_presets, get_provider_by_id
from abogen.normalization_settings import (
_extract_settings,
build_llm_configuration,
)


def test_extract_settings_preserves_llm_provider():
"""When llm_provider is supplied it must survive _extract_settings()."""
extracted = _extract_settings({"llm_provider": "minimax"})
assert extracted["llm_provider"] == "minimax"


def test_extract_settings_defaults_llm_provider_to_empty():
extracted = _extract_settings({})
assert extracted["llm_provider"] == ""


def test_build_llm_configuration_with_minimax_preset():
"""Simulate choosing the MiniMax preset and building the LLM config."""
preset = get_provider_by_id("minimax")
settings = _extract_settings({
"llm_provider": preset.id,
"llm_base_url": preset.base_url,
"llm_api_key": "test-key",
"llm_model": preset.models[0],
})
config = build_llm_configuration(settings)
assert config.base_url == "https://api.minimax.io/v1"
assert config.api_key == "test-key"
assert config.model == preset.models[0]