diff --git a/docs/install.md b/docs/install.md index eea136783..843328537 100644 --- a/docs/install.md +++ b/docs/install.md @@ -14,6 +14,8 @@ The repo is developed and tested on `Ubuntu 20.04` and `Python 3.9`. git clone https://github.com/myshell-ai/MeloTTS.git cd MeloTTS pip install -e . +# use pip below for Intel XPU +# pip install -r requirements-intel.txt python -m unidic download ``` If you encountered issues in macOS install, try the [Docker Install](#docker-install) @@ -112,7 +114,7 @@ from melo.api import TTS speed = 1.0 # CPU is sufficient for real-time inference. -# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps' +# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps' or 'xpu' device = 'auto' # Will automatically use GPU if available # English diff --git a/melo/api.py b/melo/api.py index 236ea8f17..6ebc0d025 100644 --- a/melo/api.py +++ b/melo/api.py @@ -28,9 +28,12 @@ def __init__(self, if device == 'auto': device = 'cpu' if torch.cuda.is_available(): device = 'cuda' - if torch.backends.mps.is_available(): device = 'mps' + elif torch.xpu.is_available(): device = 'xpu' + elif torch.backends.mps.is_available(): device = 'mps' if 'cuda' in device: assert torch.cuda.is_available() + if 'xpu' in device: + assert torch.xpu.is_available() # config_path = hps = load_or_download_config(language, use_hf=use_hf, config_path=config_path) @@ -123,7 +126,8 @@ def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_s del x_tst, tones, lang_ids, bert, ja_bert, x_tst_lengths, speakers # audio_list.append(audio) - torch.cuda.empty_cache() + if torch.cuda.is_available(): torch.cuda.empty_cache() + if torch.xpu.is_available(): torch.xpu.empty_cache() audio = self.audio_numpy_concat(audio_list, sr=self.hps.data.sampling_rate, speed=speed) if output_path is None: diff --git a/melo/preprocess_text.py b/melo/preprocess_text.py index 19dfb7c10..009d75542 100644 --- a/melo/preprocess_text.py +++ b/melo/preprocess_text.py @@ -45,6 +45,9 @@ def main( if cleaned_path is None: cleaned_path = metadata + ".cleaned" + + if torch.cuda.is_available(): device = "cuda:0" + elif torch.xpu.is_available(): device = "xpu" if clean: out_file = open(cleaned_path, "w", encoding="utf-8") @@ -52,7 +55,7 @@ def main( for line in tqdm(open(metadata, encoding="utf-8").readlines()): try: utt, spk, language, text = line.strip().split("|") - norm_text, phones, tones, word2ph, bert = clean_text_bert(text, language, device='cuda:0') + norm_text, phones, tones, word2ph, bert = clean_text_bert(text, language, device=device) for ph in phones: if ph not in symbols and ph not in new_symbols: new_symbols.append(ph) diff --git a/melo/text/chinese_bert.py b/melo/text/chinese_bert.py index ae454c46d..dd2245223 100644 --- a/melo/text/chinese_bert.py +++ b/melo/text/chinese_bert.py @@ -26,7 +26,8 @@ def get_bert_feature(text, word2ph, device=None, model_id='hfl/chinese-roberta-w ): device = "mps" if not device: - device = "cuda" + if torch.cuda.is_available(): device = "cuda" + elif torch.xpu.is_available(): device = "xpu" with torch.no_grad(): inputs = tokenizer(text, return_tensors="pt") diff --git a/melo/text/chinese_mix.py b/melo/text/chinese_mix.py index b4e149fb3..93a61f933 100644 --- a/melo/text/chinese_mix.py +++ b/melo/text/chinese_mix.py @@ -243,7 +243,9 @@ def _g2p_v2(segments): text = text_normalize(text) print(text) phones, tones, word2ph = g2p(text, impl='v2') - bert = get_bert_feature(text, word2ph, device='cuda:0') + if torch.cuda.is_available(): device = "cuda:0" + elif torch.xpu.is_available(): device = "xpu" + bert = get_bert_feature(text, word2ph, device=device) print(phones) import pdb; pdb.set_trace() diff --git a/melo/text/english_bert.py b/melo/text/english_bert.py index a473d6de7..e717d1105 100644 --- a/melo/text/english_bert.py +++ b/melo/text/english_bert.py @@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None): ): device = "mps" if not device: - device = "cuda" + if torch.cuda.is_available(): device = "cuda" + elif torch.xpu.is_available(): device = "xpu" if model is None: model = AutoModelForMaskedLM.from_pretrained(model_id).to( device diff --git a/melo/text/french_bert.py b/melo/text/french_bert.py index f30d12dc3..53d243b32 100644 --- a/melo/text/french_bert.py +++ b/melo/text/french_bert.py @@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None): ): device = "mps" if not device: - device = "cuda" + if torch.cuda.is_available(): device = "cuda" + elif torch.xpu.is_available(): device = "xpu" if model is None: model = AutoModelForMaskedLM.from_pretrained(model_id).to( device diff --git a/melo/text/japanese_bert.py b/melo/text/japanese_bert.py index 315fef2fb..669ba31ab 100644 --- a/melo/text/japanese_bert.py +++ b/melo/text/japanese_bert.py @@ -16,7 +16,8 @@ def get_bert_feature(text, word2ph, device=None, model_id='tohoku-nlp/bert-base- ): device = "mps" if not device: - device = "cuda" + if torch.cuda.is_available(): device = "cuda" + elif torch.xpu.is_available(): device = "xpu" if model_id not in models: model = AutoModelForMaskedLM.from_pretrained(model_id).to( device diff --git a/melo/text/korean.py b/melo/text/korean.py index 5674fe9f8..2c9ee4ba2 100644 --- a/melo/text/korean.py +++ b/melo/text/korean.py @@ -138,7 +138,7 @@ def g2p(norm_text): assert len(word2ph) == len(tokenized) + 2 return phones, tones, word2ph -def get_bert_feature(text, word2ph, device='cuda'): +def get_bert_feature(text, word2ph, device=None): from . import japanese_bert return japanese_bert.get_bert_feature(text, word2ph, device=device, model_id=model_id) @@ -189,4 +189,4 @@ def get_bert_feature(text, word2ph, device='cuda'): # conv = kakasi.getConverter() # katakana_text = conv.do('ええ、僕はおきなと申します。こちらの小さいわらべは杏子。ご挨拶が遅れてしまいすみません。あなたの名は?') # Replace with your Chinese text -# print(katakana_text) # Output: ニーハオセカイ \ No newline at end of file +# print(katakana_text) # Output: ニーハオセカイ diff --git a/melo/text/spanish_bert.py b/melo/text/spanish_bert.py index 8b6551101..36f6734e3 100644 --- a/melo/text/spanish_bert.py +++ b/melo/text/spanish_bert.py @@ -15,7 +15,8 @@ def get_bert_feature(text, word2ph, device=None): ): device = "mps" if not device: - device = "cuda" + if torch.cuda.is_available(): device = "cuda" + elif torch.xpu.is_available(): device = "xpu" if model is None: model = AutoModelForMaskedLM.from_pretrained(model_id).to( device diff --git a/requirements-intel.txt b/requirements-intel.txt new file mode 100644 index 000000000..666c5df6d --- /dev/null +++ b/requirements-intel.txt @@ -0,0 +1,31 @@ +--extra-index-url https://download.pytorch.org/whl/test/xpu +torch +torchaudio +txtsplit +cached_path +transformers==4.27.4 +num2words==0.5.12 +unidic_lite==1.0.8 +unidic==1.1.0 +mecab-python3==1.0.9 +pykakasi==2.2.1 +fugashi==1.3.0 +g2p_en==2.1.0 +anyascii==0.3.2 +jamo==0.4.1 +gruut[de,es,fr]==2.4.0 +g2pkk>=0.1.1 +librosa==0.9.1 +pydub==0.25.1 +eng_to_ipa==0.0.2 +inflect==7.0.0 +unidecode==1.3.7 +pypinyin==0.50.0 +cn2an==0.5.22 +jieba==0.42.1 +gradio +langid==1.1.6 +tqdm +tensorboard==2.16.2 +loguru==0.7.2 +python-mecab-ko