diff --git a/.env.example b/.env.example index 884c01c..f59cb76 100644 --- a/.env.example +++ b/.env.example @@ -13,6 +13,7 @@ GOOGLE_API_KEY= # Google Imagen images, Google Cloud TTS (700+ voic ELEVENLABS_API_KEY= # TTS narration, music generation, sound effects OPENAI_API_KEY= # OpenAI TTS fallback and DALL-E image generation XAI_API_KEY= # Grok image generation/editing and Grok video generation +MINIMAX_API_KEY= # MiniMax chat (MiniMax-M2.7) + TTS (speech-2.8-hd / turbo) — get one at https://platform.minimax.io/ # Piper local voices do not require env vars; install `piper-tts` via pip # --- Music --- diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index a5cea35..e0f203e 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -39,6 +39,7 @@ GOOGLE_API_KEY= # Google TTS + Google Imagen ELEVENLABS_API_KEY= # TTS, music, sound effects (10K chars/month free) OPENAI_API_KEY= # OpenAI TTS + DALL-E 3 images XAI_API_KEY= # xAI Grok image generation/editing + Grok video generation +MINIMAX_API_KEY= # MiniMax chat (MiniMax-M2.7) + TTS (speech-2.8-hd / turbo) # MULTI-MODEL GATEWAY (one key, 6+ tools) FAL_KEY= # FLUX, Recraft, Kling, Veo, MiniMax video @@ -262,6 +263,96 @@ Google TTS offers 700+ voices across 50+ languages. Voice names follow the patte --- +### MiniMax — Chat + TTS + +> **Cost-effective chat and TTS under one API key.** MiniMax-M2.7 is a frontier-class model accessible via an OpenAI-compatible API. The same key also unlocks the TTS provider. No subscription required. + +**Tools unlocked:** `minimax_tts`; LLM provider via `lib/providers` (set `llm.provider: minimax` in `config.yaml`) +**Env var:** `MINIMAX_API_KEY` + +#### Chat Models + +| Model | Notes | +|-------|-------| +| `MiniMax-M2.7` | Peak performance. Ultimate value. (default) | +| `MiniMax-M2.7-highspeed` | Same capability, faster and more agile | + +**API reference:** + +#### Chat Usage + +Set `llm.provider: minimax` in `config.yaml` (or override per-run) to route +OpenMontage's LLM calls through MiniMax: + +```yaml +# config.yaml +llm: + provider: minimax + model: MiniMax-M2.7 # optional — MiniMax-M2.7 is the default + temperature: 1.0 # MiniMax range: (0.0, 1.0] — 0 is not accepted + max_tokens: 4096 +``` + +Then in Python: + +```python +from lib.config_model import OpenMontageConfig +from lib.providers import build_provider + +config = OpenMontageConfig.load() +provider = build_provider(config.llm) +response = provider.chat([{"role": "user", "content": "Hello!"}]) +``` + +**Env var:** `MINIMAX_API_KEY` — get one at + +--- + +### MiniMax — TTS + +> **Cost-effective TTS with a broad voice catalogue.** The same `MINIMAX_API_KEY` covers TTS. No subscription required. + +**Tools unlocked:** `minimax_tts` +**Env var:** `MINIMAX_API_KEY` + +#### Setup + +1. Go to [platform.minimax.io](https://platform.minimax.io/) and create an account +2. Navigate to **API Keys** in your account settings +3. Create a key and copy it +4. Add to `.env`: `MINIMAX_API_KEY=your-key-here` + +#### TTS Pricing + +| Model | Price per 1M characters | +|-------|------------------------| +| `speech-2.8-hd` | ~$100.00 | +| `speech-2.8-turbo` | ~$50.00 | + +**Free tier:** MiniMax offers a free trial quota for new accounts. Check [platform.minimax.io/docs/guides/pricing-paygo](https://platform.minimax.io/docs/guides/pricing-paygo) for current rates. + +#### Supported Voices (English) + +| Voice ID | Style | +|----------|-------| +| `English_expressive_narrator` | Expressive narration (default) | +| `English_Graceful_Lady` | Elegant female | +| `English_Insightful_Speaker` | Calm male | +| `English_radiant_girl` | Upbeat female | +| `English_Persuasive_Man` | Authoritative male | +| `English_Lucky_Robot` | Sci-fi robot | + +Full voice list: [platform.minimax.io/faq/system-voice-id](https://platform.minimax.io/faq/system-voice-id) + +#### TTS Models + +| Model | Speed | Quality | +|-------|-------|---------| +| `speech-2.8-hd` | Standard | Highest similarity (recommended default) | +| `speech-2.8-turbo` | Fast | High quality, lower latency | + +--- + ### Runway — Gen-3/Gen-4 Video > **Highest-rated AI video quality.** #1 on Elo rankings. Professional-grade video generation with Gen-3 Alpha Turbo, Gen-4 Turbo, and Gen-4 Aleph models. diff --git a/lib/providers/__init__.py b/lib/providers/__init__.py index e69de29..3699582 100644 --- a/lib/providers/__init__.py +++ b/lib/providers/__init__.py @@ -0,0 +1,71 @@ +"""LLM provider registry for OpenMontage. + +Usage +----- +Import :func:`build_provider` to get a provider instance from a +:class:`~lib.config_model.LLMConfig`:: + + from lib.config_model import OpenMontageConfig + from lib.providers import build_provider + + config = OpenMontageConfig.load() + provider = build_provider(config.llm) + response = provider.chat([{"role": "user", "content": "Hello!"}]) + +Supported ``llm.provider`` values +---------------------------------- +* ``minimax`` — MiniMax-M2.7 via OpenAI-compatible API + +More providers can be registered here as the project grows. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from lib.providers.base import BaseLLMProvider +from lib.providers.minimax import MiniMaxProvider + +if TYPE_CHECKING: + from lib.config_model import LLMConfig + +__all__ = [ + "BaseLLMProvider", + "MiniMaxProvider", + "build_provider", +] + +_REGISTRY: dict[str, type[BaseLLMProvider]] = { + "minimax": MiniMaxProvider, +} + + +def build_provider(llm_config: "LLMConfig") -> BaseLLMProvider: + """Instantiate the LLM provider described by *llm_config*. + + Parameters + ---------- + llm_config: + The ``llm`` section of :class:`~lib.config_model.OpenMontageConfig`. + + Returns + ------- + BaseLLMProvider + A ready-to-use provider instance. + + Raises + ------ + ValueError + When ``llm_config.provider`` names an unsupported provider. + """ + key = llm_config.provider.lower() + cls = _REGISTRY.get(key) + if cls is None: + supported = ", ".join(sorted(_REGISTRY)) + raise ValueError( + f"Unsupported LLM provider {key!r}. " + f"Supported values: {supported}. " + "For other providers (anthropic, openai, gemini, …) use the " + "native SDK of the coding assistant driving OpenMontage." + ) + return cls() diff --git a/lib/providers/base.py b/lib/providers/base.py new file mode 100644 index 0000000..5a341c5 --- /dev/null +++ b/lib/providers/base.py @@ -0,0 +1,34 @@ +"""Abstract base class for LLM provider clients. + +Each provider wraps an HTTP API and exposes a unified chat() interface +that mirrors the OpenAI chat completions contract. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + + +class BaseLLMProvider(ABC): + """Minimal contract every LLM provider must satisfy.""" + + #: Short machine-readable provider name (e.g. "minimax", "openai") + name: str = "" + + @abstractmethod + def chat(self, messages: list[dict[str, Any]], **kwargs: Any) -> dict[str, Any]: + """Send a chat completion request and return the raw API response. + + Args: + messages: List of role/content message dicts (OpenAI format). + **kwargs: Extra parameters forwarded to the underlying API + (model, temperature, max_tokens, …). + + Returns: + The raw response body as a Python dict. + """ + + def get_info(self) -> dict[str, Any]: + """Return provider metadata.""" + return {"name": self.name} diff --git a/lib/providers/minimax.py b/lib/providers/minimax.py new file mode 100644 index 0000000..e906a6c --- /dev/null +++ b/lib/providers/minimax.py @@ -0,0 +1,146 @@ +"""MiniMax chat-model provider (OpenAI-compatible interface). + +Wraps the MiniMax /v1/chat/completions endpoint so that any part of +OpenMontage that needs a general-purpose LLM client can route to MiniMax +by setting ``llm.provider: minimax`` in *config.yaml*. + +Models supported +---------------- +* ``MiniMax-M2.7`` – Peak performance. Ultimate value. +* ``MiniMax-M2.7-highspeed`` – Same capability, faster and more agile. + +API reference +------------- +https://platform.minimax.io/docs/api-reference/text-openai-api + +Environment variable +-------------------- +``MINIMAX_API_KEY`` — obtain a key at https://platform.minimax.io/ +""" + +from __future__ import annotations + +import os +from typing import Any + +import requests + +from lib.providers.base import BaseLLMProvider + +_CHAT_ENDPOINT = "https://api.minimax.io/v1/chat/completions" + +MINIMAX_MODELS = [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", +] + +_DEFAULT_MODEL = "MiniMax-M2.7" +_DEFAULT_TEMPERATURE = 1.0 # MiniMax range is (0.0, 1.0]; 0 is not accepted + + +class MiniMaxProvider(BaseLLMProvider): + """MiniMax chat provider using the OpenAI-compatible API. + + Parameters + ---------- + api_key: + MiniMax API key. Falls back to the ``MINIMAX_API_KEY`` environment + variable when not supplied explicitly. + base_url: + Override the default endpoint root (useful for testing or proxies). + """ + + name = "minimax" + + def __init__( + self, + api_key: str | None = None, + base_url: str = "https://api.minimax.io", + ) -> None: + self._api_key = api_key or os.environ.get("MINIMAX_API_KEY", "") + self._base_url = base_url.rstrip("/") + + # ------------------------------------------------------------------ + # Public interface + # ------------------------------------------------------------------ + + @property + def is_available(self) -> bool: + """Return True if an API key is configured.""" + return bool(self._api_key) + + def chat( + self, + messages: list[dict[str, Any]], + *, + model: str = _DEFAULT_MODEL, + temperature: float = _DEFAULT_TEMPERATURE, + max_tokens: int = 4096, + **kwargs: Any, + ) -> dict[str, Any]: + """Send a chat completion request to MiniMax. + + Parameters + ---------- + messages: + Conversation history in OpenAI format + (``[{"role": "user", "content": "..."}]``). + model: + One of :data:`MINIMAX_MODELS`. Defaults to ``MiniMax-M2.7``. + temperature: + Sampling temperature in the range ``(0.0, 1.0]``. + Values of 0 are not accepted by the MiniMax API. + max_tokens: + Maximum tokens in the completion. + **kwargs: + Additional fields forwarded verbatim to the API payload. + + Returns + ------- + dict + Raw response body from the MiniMax API (OpenAI-compatible shape). + + Raises + ------ + EnvironmentError + When no API key is configured. + requests.HTTPError + On non-2xx responses from the API. + """ + if not self._api_key: + raise EnvironmentError( + "MINIMAX_API_KEY is not set. " + "Get a key at https://platform.minimax.io/" + ) + + # Clamp temperature: MiniMax rejects 0; cap at 1.0 + temperature = max(0.01, min(float(temperature), 1.0)) + + payload: dict[str, Any] = { + "model": model, + "messages": messages, + "temperature": temperature, + "max_tokens": max_tokens, + **kwargs, + } + + response = requests.post( + f"{self._base_url}/v1/chat/completions", + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=120, + ) + response.raise_for_status() + return response.json() + + def get_info(self) -> dict[str, Any]: + return { + "name": self.name, + "models": MINIMAX_MODELS, + "default_model": _DEFAULT_MODEL, + "base_url": self._base_url, + "available": self.is_available, + } diff --git a/tests/tools/test_minimax_provider.py b/tests/tools/test_minimax_provider.py new file mode 100644 index 0000000..0c46a03 --- /dev/null +++ b/tests/tools/test_minimax_provider.py @@ -0,0 +1,338 @@ +"""Unit tests for the MiniMax LLM chat provider (lib/providers/minimax.py). + +These tests do not require a MINIMAX_API_KEY or real network access. +All HTTP calls are mocked via unittest.mock. +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# Ensure the project root is on sys.path +PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from lib.providers import MiniMaxProvider, build_provider +from lib.providers.minimax import ( + MINIMAX_MODELS, + _CHAT_ENDPOINT, + _DEFAULT_MODEL, + _DEFAULT_TEMPERATURE, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_chat_response(content: str = "Hello!") -> MagicMock: + """Return a mock requests.Response for a successful chat completion.""" + mock_resp = MagicMock() + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "id": "chatcmpl-test-id", + "object": "chat.completion", + "model": "MiniMax-M2.7", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": content}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + } + return mock_resp + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def provider() -> MiniMaxProvider: + return MiniMaxProvider() + + +@pytest.fixture(autouse=True) +def _clear_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + """Ensure MINIMAX_API_KEY is unset by default.""" + monkeypatch.delenv("MINIMAX_API_KEY", raising=False) + + +# --------------------------------------------------------------------------- +# Availability +# --------------------------------------------------------------------------- + +class TestAvailability: + def test_unavailable_without_api_key(self, provider: MiniMaxProvider) -> None: + assert not provider.is_available + + def test_available_with_env_key( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + p = MiniMaxProvider() + assert p.is_available + + def test_available_with_explicit_key(self) -> None: + p = MiniMaxProvider(api_key="explicit-key") + assert p.is_available + + +# --------------------------------------------------------------------------- +# Metadata +# --------------------------------------------------------------------------- + +class TestMetadata: + def test_name_is_minimax(self, provider: MiniMaxProvider) -> None: + assert provider.name == "minimax" + + def test_default_model_in_models_list(self) -> None: + assert _DEFAULT_MODEL in MINIMAX_MODELS + + def test_models_list_contains_highspeed(self) -> None: + assert "MiniMax-M2.7-highspeed" in MINIMAX_MODELS + + def test_default_temperature_is_one(self) -> None: + assert _DEFAULT_TEMPERATURE == 1.0 + + def test_endpoint_uses_correct_domain(self) -> None: + assert "api.minimax.io" in _CHAT_ENDPOINT + assert "api.minimax.chat" not in _CHAT_ENDPOINT + + def test_get_info_contains_models(self, provider: MiniMaxProvider) -> None: + info = provider.get_info() + assert info["name"] == "minimax" + assert "MiniMax-M2.7" in info["models"] + assert info["default_model"] == "MiniMax-M2.7" + + def test_base_url_is_international(self, provider: MiniMaxProvider) -> None: + assert provider.get_info()["base_url"].startswith("https://api.minimax.io") + + +# --------------------------------------------------------------------------- +# chat() — no API key +# --------------------------------------------------------------------------- + +class TestChatWithoutKey: + def test_raises_environment_error(self, provider: MiniMaxProvider) -> None: + with pytest.raises(EnvironmentError, match="MINIMAX_API_KEY"): + provider.chat([{"role": "user", "content": "Hi"}]) + + +# --------------------------------------------------------------------------- +# chat() — happy path (mocked HTTP) +# --------------------------------------------------------------------------- + +class TestChatSuccess: + @patch("lib.providers.minimax.requests") + def test_returns_response_dict( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response("Greetings") + p = MiniMaxProvider() + + result = p.chat([{"role": "user", "content": "Hello"}]) + + assert result["choices"][0]["message"]["content"] == "Greetings" + + @patch("lib.providers.minimax.requests") + def test_uses_correct_endpoint( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}]) + + url = mock_requests.post.call_args[0][0] + assert "api.minimax.io" in url + assert url.endswith("/v1/chat/completions") + + @patch("lib.providers.minimax.requests") + def test_sends_bearer_auth_header( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + api_key = "minimax-key-abc" + monkeypatch.setenv("MINIMAX_API_KEY", api_key) + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}]) + + headers = mock_requests.post.call_args[1]["headers"] + assert headers["Authorization"] == f"Bearer {api_key}" + + @patch("lib.providers.minimax.requests") + def test_default_model_in_payload( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}]) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["model"] == "MiniMax-M2.7" + + @patch("lib.providers.minimax.requests") + def test_highspeed_model_forwarded( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}], model="MiniMax-M2.7-highspeed") + + payload = mock_requests.post.call_args[1]["json"] + assert payload["model"] == "MiniMax-M2.7-highspeed" + + @patch("lib.providers.minimax.requests") + def test_messages_forwarded( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + msgs = [{"role": "system", "content": "Be brief."}, {"role": "user", "content": "Hi"}] + + p.chat(msgs) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["messages"] == msgs + + +# --------------------------------------------------------------------------- +# Temperature clamping +# --------------------------------------------------------------------------- + +class TestTemperatureClamping: + @patch("lib.providers.minimax.requests") + def test_zero_is_clamped_above_zero( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """MiniMax does not accept temperature=0; it must be clamped.""" + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}], temperature=0) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["temperature"] > 0 + + @patch("lib.providers.minimax.requests") + def test_temperature_above_one_is_clamped( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}], temperature=2.0) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["temperature"] <= 1.0 + + @patch("lib.providers.minimax.requests") + def test_valid_temperature_preserved( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_chat_response() + p = MiniMaxProvider() + + p.chat([{"role": "user", "content": "Hi"}], temperature=0.7) + + payload = mock_requests.post.call_args[1]["json"] + assert abs(payload["temperature"] - 0.7) < 1e-9 + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +class TestChatErrors: + @patch("lib.providers.minimax.requests") + def test_http_error_propagates( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + import requests as req_lib + + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_resp = MagicMock() + mock_resp.raise_for_status.side_effect = req_lib.HTTPError("401 Unauthorized") + mock_requests.post.return_value = mock_resp + p = MiniMaxProvider() + + with pytest.raises(req_lib.HTTPError): + p.chat([{"role": "user", "content": "Hi"}]) + + @patch("lib.providers.minimax.requests") + def test_network_exception_propagates( + self, + mock_requests: MagicMock, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.side_effect = ConnectionError("network down") + p = MiniMaxProvider() + + with pytest.raises(ConnectionError): + p.chat([{"role": "user", "content": "Hi"}]) + + +# --------------------------------------------------------------------------- +# build_provider factory +# --------------------------------------------------------------------------- + +class TestBuildProvider: + def test_returns_minimax_provider(self) -> None: + from lib.config_model import LLMConfig + + cfg = LLMConfig(provider="minimax") + p = build_provider(cfg) + assert isinstance(p, MiniMaxProvider) + + def test_unsupported_provider_raises(self) -> None: + from lib.config_model import LLMConfig + + cfg = LLMConfig(provider="anthropic") + with pytest.raises(ValueError, match="Unsupported LLM provider"): + build_provider(cfg) + + def test_provider_name_is_case_insensitive(self) -> None: + from lib.config_model import LLMConfig + + cfg = LLMConfig(provider="MiniMax") + p = build_provider(cfg) + assert isinstance(p, MiniMaxProvider) diff --git a/tests/tools/test_minimax_tts.py b/tests/tools/test_minimax_tts.py new file mode 100644 index 0000000..d9c7d88 --- /dev/null +++ b/tests/tools/test_minimax_tts.py @@ -0,0 +1,305 @@ +"""Unit tests for the MiniMax TTS provider tool (tools/audio/minimax_tts.py). + +These tests do not require a MINIMAX_API_KEY or network access. +All HTTP calls are mocked via unittest.mock. +""" +from __future__ import annotations + +import json +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from tools.audio.minimax_tts import MINIMAX_VOICE_IDS, MiniMaxTTS, _TTS_ENDPOINT +from tools.base_tool import ToolStatus + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture() +def tool() -> MiniMaxTTS: + return MiniMaxTTS() + + +@pytest.fixture(autouse=True) +def _clear_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + """Ensure MINIMAX_API_KEY is unset by default; individual tests set it.""" + monkeypatch.delenv("MINIMAX_API_KEY", raising=False) + + +# --------------------------------------------------------------------------- +# Status / availability +# --------------------------------------------------------------------------- + +class TestToolStatus: + def test_unavailable_without_api_key(self, tool: MiniMaxTTS) -> None: + assert tool.get_status() == ToolStatus.UNAVAILABLE + + def test_available_with_api_key( + self, tool: MiniMaxTTS, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + assert tool.get_status() == ToolStatus.AVAILABLE + + +# --------------------------------------------------------------------------- +# Metadata checks +# --------------------------------------------------------------------------- + +class TestToolMetadata: + def test_capability_is_tts(self, tool: MiniMaxTTS) -> None: + assert tool.capability == "tts" + + def test_provider_is_minimax(self, tool: MiniMaxTTS) -> None: + assert tool.provider == "minimax" + + def test_default_model_is_hd(self, tool: MiniMaxTTS) -> None: + schema_props = tool.input_schema["properties"] + assert schema_props["model"]["default"] == "speech-2.8-hd" + + def test_voice_ids_are_non_empty(self) -> None: + assert len(MINIMAX_VOICE_IDS) > 0 + for vid in MINIMAX_VOICE_IDS: + assert isinstance(vid, str) and vid + + def test_endpoint_uses_correct_domain(self) -> None: + assert "api.minimax.io" in _TTS_ENDPOINT + assert "api.minimax.chat" not in _TTS_ENDPOINT + + +# --------------------------------------------------------------------------- +# Cost estimation +# --------------------------------------------------------------------------- + +class TestCostEstimation: + def test_cost_scales_with_text_length(self, tool: MiniMaxTTS) -> None: + short_cost = tool.estimate_cost({"text": "Hi"}) + long_cost = tool.estimate_cost({"text": "Hi" * 1000}) + assert long_cost > short_cost + + def test_cost_is_non_negative(self, tool: MiniMaxTTS) -> None: + assert tool.estimate_cost({"text": ""}) >= 0 + + +# --------------------------------------------------------------------------- +# Execute — missing API key +# --------------------------------------------------------------------------- + +class TestExecuteWithoutKey: + def test_returns_failure_without_api_key(self, tool: MiniMaxTTS) -> None: + result = tool.execute({"text": "Hello"}) + assert not result.success + assert "MINIMAX_API_KEY" in result.error + + +# --------------------------------------------------------------------------- +# Execute — happy path (mocked HTTP) +# --------------------------------------------------------------------------- + +def _make_api_response(hex_audio: str = "494433") -> MagicMock: + """Return a mock requests.Response for the MiniMax TTS API.""" + mock_resp = MagicMock() + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": {"audio": hex_audio, "status": 2}, + "base_resp": {"status_code": 0, "status_msg": "success"}, + } + return mock_resp + + +class TestExecuteSuccess: + @patch("tools.audio.minimax_tts.requests") + def test_writes_audio_file( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_requests.post.return_value = _make_api_response("494433") + + output = tmp_path / "out.mp3" + result = tool.execute({"text": "Hello world", "output_path": str(output)}) + + assert result.success, result.error + assert output.exists() + assert output.read_bytes() == bytes.fromhex("494433") + + @patch("tools.audio.minimax_tts.requests") + def test_uses_correct_endpoint( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "sk-test") + mock_requests.post.return_value = _make_api_response() + + tool.execute({"text": "Test", "output_path": str(tmp_path / "out.mp3")}) + + call_args = mock_requests.post.call_args + assert call_args[0][0] == _TTS_ENDPOINT + + @patch("tools.audio.minimax_tts.requests") + def test_sends_bearer_auth_header( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + api_key = "minimax-test-key-123" + monkeypatch.setenv("MINIMAX_API_KEY", api_key) + mock_requests.post.return_value = _make_api_response() + + tool.execute({"text": "Test", "output_path": str(tmp_path / "out.mp3")}) + + headers = mock_requests.post.call_args[1]["headers"] + assert headers["Authorization"] == f"Bearer {api_key}" + + @patch("tools.audio.minimax_tts.requests") + def test_result_contains_provider_info( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_requests.post.return_value = _make_api_response() + + result = tool.execute({"text": "Hello", "output_path": str(tmp_path / "out.mp3")}) + + assert result.success + assert result.data["provider"] == "minimax" + assert result.data["model"] == "speech-2.8-hd" + + @patch("tools.audio.minimax_tts.requests") + def test_default_voice_is_used_when_not_specified( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_requests.post.return_value = _make_api_response() + + tool.execute({"text": "Hello", "output_path": str(tmp_path / "out.mp3")}) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["voice_setting"]["voice_id"] == "English_expressive_narrator" + + @patch("tools.audio.minimax_tts.requests") + def test_custom_voice_and_model_forwarded( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_requests.post.return_value = _make_api_response() + + tool.execute({ + "text": "Custom", + "voice_id": "English_Lucky_Robot", + "model": "speech-2.8-turbo", + "output_path": str(tmp_path / "out.mp3"), + }) + + payload = mock_requests.post.call_args[1]["json"] + assert payload["voice_setting"]["voice_id"] == "English_Lucky_Robot" + assert payload["model"] == "speech-2.8-turbo" + + +# --------------------------------------------------------------------------- +# Execute — API error handling +# --------------------------------------------------------------------------- + +class TestExecuteErrors: + @patch("tools.audio.minimax_tts.requests") + def test_api_status_code_error( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_resp = MagicMock() + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": {}, + "base_resp": {"status_code": 1004, "status_msg": "Auth failed"}, + } + mock_requests.post.return_value = mock_resp + + result = tool.execute({"text": "Hello", "output_path": str(tmp_path / "out.mp3")}) + + assert not result.success + assert "1004" in result.error + + @patch("tools.audio.minimax_tts.requests") + def test_empty_audio_response( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_resp = MagicMock() + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": {"audio": ""}, + "base_resp": {"status_code": 0, "status_msg": "success"}, + } + mock_requests.post.return_value = mock_resp + + result = tool.execute({"text": "Hello", "output_path": str(tmp_path / "out.mp3")}) + + assert not result.success + assert "empty" in result.error.lower() + + @patch("tools.audio.minimax_tts.requests") + def test_network_exception_is_caught( + self, + mock_requests: MagicMock, + tool: MiniMaxTTS, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("MINIMAX_API_KEY", "test-key") + mock_requests.post.side_effect = ConnectionError("network down") + + result = tool.execute({"text": "Hello", "output_path": str(tmp_path / "out.mp3")}) + + assert not result.success + assert "MiniMax TTS failed" in result.error + + +# --------------------------------------------------------------------------- +# Hex decoding correctness +# --------------------------------------------------------------------------- + +class TestHexDecoding: + def test_hex_roundtrip(self) -> None: + original = b"\x49\x44\x33\x00\x00" + hex_str = original.hex() + assert bytes.fromhex(hex_str) == original + + def test_non_base64_decode(self) -> None: + """Verify we use hex, not base64.""" + import base64 + hex_data = "494433" + hex_decoded = bytes.fromhex(hex_data) + # base64 decoding of the same string yields different bytes + b64_decoded = base64.b64decode(hex_data + "==") + assert hex_decoded != b64_decoded diff --git a/tools/audio/minimax_tts.py b/tools/audio/minimax_tts.py new file mode 100644 index 0000000..6a53934 --- /dev/null +++ b/tools/audio/minimax_tts.py @@ -0,0 +1,250 @@ +"""MiniMax text-to-speech provider tool. + +Uses the MiniMax T2A v2 API (https://api.minimax.io/v1/t2a_v2). +Audio is returned as hex-encoded bytes. +Requires MINIMAX_API_KEY environment variable. +""" + +from __future__ import annotations + +import os +import time +from pathlib import Path +from typing import Any + +import requests + +from tools.base_tool import ( + BaseTool, + Determinism, + ExecutionMode, + ResourceProfile, + RetryPolicy, + ToolResult, + ToolRuntime, + ToolStability, + ToolStatus, + ToolTier, +) + +_TTS_ENDPOINT = "https://api.minimax.io/v1/t2a_v2" + +# A representative set of English system voices. Full list at: +# https://platform.minimax.io/faq/system-voice-id +MINIMAX_VOICE_IDS = [ + "English_Graceful_Lady", + "English_Insightful_Speaker", + "English_radiant_girl", + "English_Persuasive_Man", + "English_Lucky_Robot", + "English_expressive_narrator", +] + + +class MiniMaxTTS(BaseTool): + name = "minimax_tts" + version = "0.1.0" + tier = ToolTier.VOICE + capability = "tts" + provider = "minimax" + stability = ToolStability.EXPERIMENTAL + execution_mode = ExecutionMode.SYNC + determinism = Determinism.STOCHASTIC + runtime = ToolRuntime.API + + dependencies = [] + install_instructions = ( + "Set the MINIMAX_API_KEY environment variable:\n" + " export MINIMAX_API_KEY=your_key_here\n" + "Get a key at https://platform.minimax.io/" + ) + fallback = "openai_tts" + fallback_tools = ["openai_tts", "piper_tts"] + agent_skills = ["text-to-speech"] + + capabilities = [ + "text_to_speech", + "voice_selection", + "multilingual", + ] + supports = { + "voice_cloning": False, + "multilingual": True, + "offline": False, + "native_audio": True, + "streaming": True, + } + best_for = [ + "cost-effective narration with a broad voice catalogue", + "multilingual productions using a single API key", + ] + not_good_for = [ + "fully offline production", + "voice cloning workflows", + ] + + input_schema = { + "type": "object", + "required": ["text"], + "properties": { + "text": { + "type": "string", + "description": "Text to convert to speech (max 10,000 characters)", + }, + "voice_id": { + "type": "string", + "default": "English_expressive_narrator", + "description": ( + "MiniMax system voice ID. " + "See https://platform.minimax.io/faq/system-voice-id for the full list." + ), + }, + "model": { + "type": "string", + "default": "speech-2.8-hd", + "enum": ["speech-2.8-hd", "speech-2.8-turbo", "speech-2.6-hd", "speech-2.6-turbo"], + "description": "TTS model to use. speech-2.8-hd is the recommended default.", + }, + "speed": { + "type": "number", + "default": 1.0, + "minimum": 0.5, + "maximum": 2.0, + "description": "Speech rate multiplier.", + }, + "vol": { + "type": "number", + "default": 1.0, + "minimum": 0.1, + "maximum": 10.0, + "description": "Volume level.", + }, + "pitch": { + "type": "integer", + "default": 0, + "minimum": -12, + "maximum": 12, + "description": "Pitch adjustment in semitones.", + }, + "format": { + "type": "string", + "default": "mp3", + "enum": ["mp3", "pcm", "flac"], + "description": "Audio output format.", + }, + "sample_rate": { + "type": "integer", + "default": 32000, + "enum": [8000, 16000, 22050, 24000, 32000, 44100], + "description": "Audio sample rate in Hz.", + }, + "output_path": {"type": "string"}, + }, + } + + resource_profile = ResourceProfile( + cpu_cores=1, ram_mb=256, vram_mb=0, disk_mb=50, network_required=True + ) + retry_policy = RetryPolicy(max_retries=2, retryable_errors=["rate_limit", "timeout"]) + idempotency_key_fields = ["text", "voice_id", "model", "format"] + side_effects = ["writes audio file to output_path", "calls MiniMax API"] + user_visible_verification = ["Listen to generated audio for intelligibility and tone"] + + def get_status(self) -> ToolStatus: + if os.environ.get("MINIMAX_API_KEY"): + return ToolStatus.AVAILABLE + return ToolStatus.UNAVAILABLE + + def estimate_cost(self, inputs: dict[str, Any]) -> float: + # Approx $0.10 per 1,000 characters + return round(len(inputs.get("text", "")) * 0.0001, 4) + + def execute(self, inputs: dict[str, Any]) -> ToolResult: + api_key = os.environ.get("MINIMAX_API_KEY") + if not api_key: + return ToolResult( + success=False, + error="No MiniMax API key. " + self.install_instructions, + ) + + start = time.time() + try: + result = self._generate(inputs, api_key) + except Exception as exc: + return ToolResult(success=False, error=f"MiniMax TTS failed: {exc}") + + result.duration_seconds = round(time.time() - start, 2) + result.cost_usd = self.estimate_cost(inputs) + return result + + def _generate(self, inputs: dict[str, Any], api_key: str) -> ToolResult: + text = inputs["text"] + voice_id = inputs.get("voice_id", "English_expressive_narrator") + model = inputs.get("model", "speech-2.8-hd") + fmt = inputs.get("format", "mp3") + sample_rate = inputs.get("sample_rate", 32000) + + payload: dict[str, Any] = { + "model": model, + "text": text, + "stream": False, + "voice_setting": { + "voice_id": voice_id, + "speed": inputs.get("speed", 1.0), + "vol": inputs.get("vol", 1.0), + "pitch": inputs.get("pitch", 0), + }, + "audio_setting": { + "sample_rate": sample_rate, + "format": fmt, + "channel": 1, + }, + } + if fmt == "mp3": + payload["audio_setting"]["bitrate"] = 128000 + + response = requests.post( + _TTS_ENDPOINT, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=120, + ) + response.raise_for_status() + + data = response.json() + base_resp = data.get("base_resp", {}) + status_code = base_resp.get("status_code", -1) + if status_code != 0: + status_msg = base_resp.get("status_msg", "unknown error") + return ToolResult( + success=False, + error=f"MiniMax TTS API error {status_code}: {status_msg}", + ) + + hex_audio = data.get("data", {}).get("audio", "") + if not hex_audio: + return ToolResult(success=False, error="MiniMax TTS returned empty audio.") + + audio_bytes = bytes.fromhex(hex_audio) + + output_path = Path(inputs.get("output_path", f"minimax_tts.{fmt}")) + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(audio_bytes) + + return ToolResult( + success=True, + data={ + "provider": self.provider, + "model": model, + "voice_id": voice_id, + "text_length": len(text), + "audio_bytes": len(audio_bytes), + "format": fmt, + "output": str(output_path), + }, + artifacts=[str(output_path)], + model=model, + )