diff --git a/src/main/api/camb-tts.js b/src/main/api/camb-tts.js new file mode 100644 index 0000000..64e4075 --- /dev/null +++ b/src/main/api/camb-tts.js @@ -0,0 +1,95 @@ +import axios from 'axios' +import { cambConfig } from '../config/config.js' +import log from '../logger.js' +import fs from 'fs' +import path from 'path' +import FormData from 'form-data' + +const CAMB_API_BASE = 'https://client.camb.ai/apis' + +function getHeaders() { + return { + 'x-api-key': cambConfig.apiKey, + 'Content-Type': 'application/json' + } +} + +/** + * Generate audio using CAMB AI streaming TTS + * @param {Object} param - { text, voice_id, language, speech_model } + * @returns {Promise} audio buffer + */ +export async function makeAudio(param) { + log.debug('~ camb makeAudio ~ param:', JSON.stringify(param)) + + const response = await axios.post( + `${CAMB_API_BASE}/tts-stream`, + { + text: param.text, + voice_id: param.voice_id || cambConfig.defaultVoiceId, + language: param.language || 'en-us', + speech_model: param.speech_model || 'mars-flash', + output_configuration: { format: 'wav' } + }, + { + headers: getHeaders(), + responseType: 'arraybuffer', + timeout: 60000 + } + ) + + return response.data +} + +/** + * Clone a voice using CAMB AI + * @param {Object} param - { audioFilePath, voiceName, gender, language } + * @returns {Promise<{ voice_id: number }>} + */ +// Map BCP-47 language codes to CAMB numeric IDs for voice cloning +const LANG_ID_MAP = { + 'en-us': 1, 'es-es': 54, 'fr-fr': 76, 'de-de': 31, + 'ja-jp': 88, 'hi-in': 81, 'pt-br': 111, 'zh-cn': 139, + 'ko-kr': 93, 'it-it': 86, 'nl-nl': 48, 'ru-ru': 120, +} + +export async function cloneVoice(param) { + log.debug('~ camb cloneVoice ~ param:', JSON.stringify({ ...param, audioFilePath: '...' })) + log.debug('~ camb cloneVoice ~ apiKey present:', !!cambConfig.apiKey) + + const langId = LANG_ID_MAP[param.language] || param.language || 1 + + const form = new FormData() + form.append('file', fs.createReadStream(param.audioFilePath)) + form.append('voice_name', param.voiceName || `voice_${Date.now()}`) + form.append('gender', String(param.gender || 1)) + form.append('language', String(langId)) + form.append('enhance_audio', 'true') + + try { + const response = await axios.post(`${CAMB_API_BASE}/create-custom-voice`, form, { + headers: { + 'x-api-key': cambConfig.apiKey, + ...form.getHeaders() + }, + timeout: 120000 + }) + log.debug('~ camb cloneVoice ~ response:', JSON.stringify(response.data)) + return response.data + } catch (err) { + log.error('~ camb cloneVoice ~ error:', err.response?.status, err.response?.data || err.message) + throw err + } +} + +/** + * List all available voices + * @returns {Promise} + */ +export async function listVoices() { + const response = await axios.get(`${CAMB_API_BASE}/list-voices`, { + headers: getHeaders() + }) + + return response.data +} diff --git a/src/main/config/config.js b/src/main/config/config.js index 9c81735..d7259fd 100644 --- a/src/main/config/config.js +++ b/src/main/config/config.js @@ -1,5 +1,20 @@ import path from 'path' import os from 'os' +import fs from 'fs' + +// Load .env from parent camb-ai-work directory as fallback +const parentEnvPath = path.resolve(process.cwd(), '..', '.env') +try { + if (fs.existsSync(parentEnvPath)) { + const content = fs.readFileSync(parentEnvPath, 'utf-8') + for (const line of content.split('\n')) { + const match = line.match(/^([^#=]+)=(.*)$/) + if (match && !process.env[match[1].trim()]) { + process.env[match[1].trim()] = match[2].trim() + } + } + } +} catch { /* ignore */ } const isDev = process.env.NODE_ENV === 'development' const isWin = process.platform === 'win32' @@ -9,6 +24,16 @@ export const serviceUrl = { tts: isDev ? 'http://192.168.4.204:18180' : 'http://127.0.0.1:18180' } +// TTS provider: 'fish' (default, local Fish Speech) or 'camb' (CAMB AI cloud) +export const ttsProvider = process.env.TTS_PROVIDER || 'fish' + +export const cambConfig = { + apiKey: process.env.CAMB_API_KEY || '', + defaultVoiceId: parseInt(process.env.CAMB_VOICE_ID || '147320', 10), + defaultLanguage: process.env.CAMB_LANGUAGE || 'en-us', + speechModel: process.env.CAMB_SPEECH_MODEL || 'mars-flash' +} + export const assetPath = { model: isWin ? path.join('D:', 'duix_avatar_data', 'face2face', 'temp') diff --git a/src/main/dao/voice.js b/src/main/dao/voice.js index fe0f5ff..15742a4 100644 --- a/src/main/dao/voice.js +++ b/src/main/dao/voice.js @@ -23,6 +23,22 @@ export function insert({ origin_audio_path, lang, asr_format_audio_url, referenc return id } +export function insertCamb({ origin_audio_path, lang, camb_voice_id }) { + const db = connect() + const stmt = db.prepare( + `insert into voice (origin_audio_path, lang, camb_voice_id, created_at) + values (?,?,?,?);` + ) + const info = stmt.run( + origin_audio_path, + lang, + camb_voice_id, + Date.now() + ) + const id = info.lastInsertRowid + return id +} + export function selectByID(id) { const db = connect() const stmt = db.prepare('SELECT * FROM voice WHERE id = ?') diff --git a/src/main/db/sql.js b/src/main/db/sql.js index e1edfdd..cec47ee 100644 --- a/src/main/db/sql.js +++ b/src/main/db/sql.js @@ -65,5 +65,11 @@ export default [ script: `alter table video add voice_id integer; ` + }, + { + version: 4, + script: `alter table voice + add camb_voice_id integer; + ` } ] diff --git a/src/main/service/voice.js b/src/main/service/voice.js index 69478f8..6a3228b 100644 --- a/src/main/service/voice.js +++ b/src/main/service/voice.js @@ -1,9 +1,10 @@ -import { selectAll, insert, selectByID } from '../dao/voice.js' +import { selectAll, insert, insertCamb, selectByID } from '../dao/voice.js' import { preprocessAndTran, makeAudio as makeAudioApi } from '../api/tts.js' +import { makeAudio as cambMakeAudio, cloneVoice as cambCloneVoice } from '../api/camb-tts.js' import crypto from 'crypto' import fs from 'fs' import path from 'path' -import { assetPath } from '../config/config.js' +import { assetPath, ttsProvider, cambConfig } from '../config/config.js' import log from '../logger.js' import { ipcMain } from 'electron' import dayjs from 'dayjs' @@ -16,6 +17,23 @@ export function getAllTimbre() { export async function train(path, lang = 'zh') { path = path.replace(/\\/g, '/') // 将路径中的\替换为/ + + if (ttsProvider === 'camb') { + // Use CAMB AI voice cloning + const res = await cambCloneVoice({ + audioFilePath: path, + voiceName: `voice_${Date.now()}`, + language: cambConfig.defaultLanguage + }) + log.debug('~ train (camb) ~ res:', res) + const cambVoiceId = res.voice_id + if (!cambVoiceId) { + return false + } + return insertCamb({ origin_audio_path: path, lang, camb_voice_id: cambVoiceId }) + } + + // Default: Fish Speech const res = await preprocessAndTran({ format: path.split('.').pop(), reference_audio: path, @@ -47,6 +65,27 @@ export async function makeAudio({voiceId, text, targetDir}) { const uuid = crypto.randomUUID() const voice = selectByID(voiceId) + if (ttsProvider === 'camb' && voice.camb_voice_id) { + // Use CAMB AI TTS with cloned voice + try { + const audioBuffer = await cambMakeAudio({ + text, + voice_id: voice.camb_voice_id, + language: cambConfig.defaultLanguage, + speech_model: cambConfig.speechModel + }) + if (!fs.existsSync(targetDir)) { + fs.mkdirSync(targetDir, { recursive: true }) + } + fs.writeFileSync(path.join(targetDir, `${uuid}.wav`), Buffer.from(audioBuffer), 'binary') + return `${uuid}.wav` + } catch (error) { + log.error('Error generating audio via CAMB:', error) + throw error + } + } + + // Default: Fish Speech return makeAudioApi({ speaker: uuid, text,