Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
venv
__pycache__
.pytest_cache
.mypy_cache
.git
.gitignore
*.pyc
*.pyo
*.pyd
*.swp
*.swo
dist
build
*.egg-info
*.onnx
*.npz
*.wav
*.mp3
*.flac
*.log
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
venv
__pycache__
*.pyc
*.pyo
*.pyd
*.pyw
*.pyz
*.pywz
*.pyzw
*.pyzwz
*.pyzwzw
27 changes: 27 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM python:3.13-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1

# System dependencies for phonemizer/soundfile/onnxruntime
RUN apt-get update && apt-get install -y --no-install-recommends \
espeak-ng \
libsndfile1 \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Copy project files (entire context; .dockerignore controls exclusions)
COPY . .

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir fastapi uvicorn[standard] phonemizer

EXPOSE 8000

CMD ["uvicorn", "run:app", "--host", "0.0.0.0", "--port", "8000"]


66 changes: 0 additions & 66 deletions LICENSE

This file was deleted.

9 changes: 0 additions & 9 deletions MANIFEST.in

This file was deleted.

15 changes: 15 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: "3.9"

services:
kittentts:
build:
context: .
image: kittentts:latest
container_name: kittentts
ports:
- "8000:8000"
# Persist HF cache to avoid re-downloading models
volumes:
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
restart: unless-stopped

Binary file added output.wav
Binary file not shown.
41 changes: 0 additions & 41 deletions pyproject.toml

This file was deleted.

96 changes: 88 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,88 @@
num2words
spacy
espeakng_loader
misaki[en]>=0.9.4
onnxruntime
soundfile
numpy
huggingface_hub
annotated-types==0.7.0
anyio==4.10.0
attrs==25.3.0
babel==2.17.0
blis==1.3.0
catalogue==2.0.10
certifi==2025.8.3
cffi==1.17.1
charset-normalizer==3.4.2
click==8.2.1
cloudpathlib==0.21.1
colorama==0.4.6
coloredlogs==15.0.1
confection==0.1.5
csvw==3.5.1
cymem==2.0.11
dlinfo==2.0.0
docopt==0.6.2
espeakng-loader==0.2.4
fastapi==0.116.1
filelock==3.18.0
flatbuffers==25.2.10
fsspec==2025.7.0
h11==0.16.0
hf-xet==1.1.7
huggingface-hub==0.34.3
humanfriendly==10.0
idna==3.10
isodate==0.7.2
Jinja2==3.1.6
joblib==1.5.1
jsonschema==4.25.0
jsonschema-specifications==2025.4.1
langcodes==3.5.0
language-tags==1.2.0
language_data==1.3.0
marisa-trie==1.2.1
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
misaki==0.7.4
mpmath==1.3.0
murmurhash==1.0.13
num2words==0.5.14
numpy==2.3.2
onnxruntime==1.22.1
packaging==25.0
phonemizer==3.3.0
preshed==3.0.10
protobuf==6.31.1
pycparser==2.22
pydantic==2.11.7
pydantic_core==2.33.2
Pygments==2.19.2
pyparsing==3.2.3
python-dateutil==2.9.0.post0
PyYAML==6.0.2
rdflib==7.1.4
referencing==0.36.2
regex==2025.7.34
requests==2.32.4
rfc3986==1.5.0
rich==14.1.0
rpds-py==0.27.0
segments==2.3.0
setuptools==80.9.0
shellingham==1.5.4
six==1.17.0
smart_open==7.3.0.post1
sniffio==1.3.1
soundfile==0.13.1
spacy==3.8.7
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.5.1
starlette==0.47.2
sympy==1.14.0
thinc==8.3.6
tqdm==4.67.1
typer==0.16.0
typing-inspection==0.4.1
typing_extensions==4.14.1
uritemplate==4.2.0
urllib3==2.5.0
uvicorn==0.35.0
wasabi==1.1.3
weasel==0.4.1
wrapt==1.17.2
50 changes: 50 additions & 0 deletions run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from kittentts import KittenTTS
import soundfile as sf
import io

app = FastAPI()




@app.get("/tts")
@app.post("/tts")
def tts(text: str, voice: str = "expr-voice-4-f" ,):
# Reuse a single model instance across requests
global _model
if "_model" not in globals():
_model = KittenTTS("KittenML/kitten-tts-nano-0.1")

# Normalize inputs
text = text.strip()
voice = voice.strip()

audio = _model.generate(text, voice=voice)

buffer = io.BytesIO()
sf.write(buffer, audio, 24000, format="WAV")
buffer.seek(0)

return StreamingResponse(
buffer,
media_type="audio/wav",
headers={"Content-Disposition": 'inline; filename="tts.wav"'}
)

# m = KittenTTS("KittenML/kitten-tts-nano-0.1")


# audio = m.generate("This high quality TTS model works without a GPU", voice='expr-voice-2-f' )

# available_voices : [ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f', 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f' ]



# Save the audio
# import soundfile as sf
# sf.write('output.wav', audio, 24000)


# /Users/saeedanwar/code/KittenTTS/venv/bin/python -m uvicorn run:app --host 127.0.0.1 --port 8000 --reload
46 changes: 0 additions & 46 deletions setup.py

This file was deleted.