digitalsamba · octo-patch · Mar 30, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -136,6 +136,9 @@ python tools/voiceover.py --scene-dir public/audio/scenes --json
 # Using Qwen3-TTS (self-hosted, free alternative to ElevenLabs)
 python tools/voiceover.py --provider qwen3 --tone warm --scene-dir public/audio/scenes --json
 
+# Using MiniMax Cloud TTS (no GPU required, cloud API)
+python tools/voiceover.py --provider minimax --minimax-voice English_Graceful_Lady --scene-dir public/audio/scenes --json
+
 # Single file (legacy)
 python tools/voiceover.py --script SCRIPT.md --output out.mp3
 ```
@@ -163,6 +166,17 @@ python tools/qwen3_tts.py --list-tones    # neutral, warm, professional, excited
 
 Temperature controls expressiveness: `--temperature 1.2` (more expressive) or `--temperature 0.4` (more consistent).
 
+### MiniMax Cloud TTS (Standalone)
+
+```bash
+python tools/minimax_tts.py --text "Hello world" --output hello.mp3
+python tools/minimax_tts.py --text "Hello world" --voice English_Persuasive_Man --output hello.mp3
+python tools/minimax_tts.py --text "Hello world" --model turbo --output fast.mp3
+python tools/minimax_tts.py --list-voices   # 12 voices: English + Chinese
+```
+
+Two models: `hd` (speech-2.8-hd, high quality) and `turbo` (speech-2.8-turbo, faster). No GPU required — runs entirely in the cloud via MiniMax API.
+
 ### Cloud GPU Providers
 
 All cloud GPU tools support two providers via `--cloud runpod|modal`. RunPod is the default. Modal was added as a reliability fallback after RunPod outages, and offers faster cold starts.

diff --git a/README.md b/README.md
@@ -173,6 +173,10 @@ python tools/voiceover.py --script script.md --output voiceover.mp3
 python tools/voiceover.py --provider qwen3 --speaker Ryan --scene-dir public/audio/scenes --json
 python tools/qwen3_tts.py --text "Hello world" --tone warm --output hello.mp3
 
+# Generate voiceover (MiniMax Cloud TTS — no GPU required)
+python tools/voiceover.py --provider minimax --minimax-voice English_Persuasive_Man --scene-dir public/audio/scenes --json
+python tools/minimax_tts.py --text "Hello world" --voice English_Graceful_Lady --output hello.mp3
+
 # Generate background music (ElevenLabs)
 python tools/music.py --prompt "Upbeat corporate" --duration 120 --output music.mp3
 

diff --git a/brands/default/voice.json b/brands/default/voice.json
@@ -14,5 +14,9 @@
     "tone": "",
     "instruct": "",
     "clone": null
+  },
+  "minimax": {
+    "voice": "English_Graceful_Lady",
+    "model": "hd"
   }
 }