KittenML · akx · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*.egg-info
+*.py[cod]
+*.wav
diff --git a/README.md b/README.md
@@ -24,9 +24,7 @@ Kitten TTS is an open-source realistic text-to-speech model with just 15 million
 pip install https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 ```
 
-
-
- ### Basic Usage 
+### API Usage
 
 ```
 from kittentts import KittenTTS
@@ -42,6 +40,12 @@ sf.write('output.wav', audio, 24000)
 
 ```
 
+### CLI usage
+
+```
+kittentts --output output.wav --text "This high quality TTS model works without a GPU"
+```
+
 
 
 

diff --git a/kittentts/__main__.py b/kittentts/__main__.py
@@ -0,0 +1,61 @@
+import argparse
+import datetime
+import io
+import sys
+
+voices = [
+    "expr-voice-2-m",
+    "expr-voice-2-f",
+    "expr-voice-3-m",
+    "expr-voice-3-f",
+    "expr-voice-4-m",
+    "expr-voice-4-f",
+    "expr-voice-5-m",
+    "expr-voice-5-f",
+]
+
+
+def run(*, model: str, voice: str, output: str, text: str, speed: float=1.0) -> datetime.timedelta:
+    from kittentts import KittenTTS
+    import soundfile as sf
+
+    m = KittenTTS(model)
+    t0 = datetime.datetime.now()
+    audio = m.generate(text, voice=voice, speed=speed)
+    if output == "-":
+        # sf requires a seekable buffer for writing.
+        bio = io.BytesIO()
+        sf.write(bio, audio, 24000, format="WAV", subtype="PCM_16")
+        sys.stdout.buffer.write(bio.getvalue())
+    else:
+        sf.write(output, audio, 24000)
+    t1 = datetime.datetime.now()
+    return t1 - t0
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(prog="kittentts", description="Run Kitten TTS model")
+    ap.add_argument("--model", default="KittenML/kitten-tts-nano-0.1", help="Model to use")
+    ap.add_argument("--text", required=True, help="Text to synthesize")
+    ap.add_argument("--voice", default="expr-voice-2-f", help="Voice to use", choices=voices)
+    ap.add_argument("--speed", type=float, default=1.0, help="Speech speed (1.0 = normal)")
+    ap.add_argument("--output", help="Output audio file (- for stdout; use with care)")
+
+    args = ap.parse_args()
+
+    if not args.output:
+        ts = datetime.datetime.now().isoformat(timespec="seconds").replace(":", "-")
+        args.output = f"{args.voice}-{ts}.wav"
+
+    gen_time = run(
+        model=args.model,
+        voice=args.voice,
+        output=args.output,
+        text=args.text,
+        speed=args.speed,
+    )
+    print(f"Generated audio in {gen_time}, saved to {args.output}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,9 @@ dependencies = [
     "huggingface_hub",
 ]
 
+[project.scripts]
+kittentts = "kittentts.__main__:main"
+
 [project.urls]
 Homepage = "https://github.com/kittenml/kittentts"
 Repository = "https://github.com/kittenml/kittentts"

diff --git a/setup.py b/setup.py
@@ -43,4 +43,9 @@
         "Bug Reports": "https://github.com/kittenml/kittentts/issues",
         "Source": "https://github.com/kittenml/kittentts",
     },
+    entry_points={
+        "console_scripts": [
+            "kittentts=kittentts.__main__:main",
+        ],
+    },
 )