helm repo add olah https://surajssd.github.io/k8s-hf-mirror
helm repo update
helm upgrade -i --wait \
--create-namespace \
--namespace olah \
olah \
olah/olahEnsure that the vLLM deployment has the following environment variable set:
export HF_ENDPOINT=http://olah.olah:18090Start a local port-forward to the service:
kubectl -n olah port-forward svc/olah 18090Install the huggingface-cli:
virtualenv venv
source venv/bin/activate
pip install -U "huggingface_hub[cli]"Run the following to use the deployment as a cache:
export HF_ENDPOINT=http://localhost:18090
rm -rf ~/.cache/huggingface/hub/models--facebook--mms-tts-sml/
huggingface-cli download facebook/mms-tts-sml