Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions src/main/api/camb-tts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import axios from 'axios'
import { cambConfig } from '../config/config.js'
import log from '../logger.js'
import fs from 'fs'
import path from 'path'
import FormData from 'form-data'

const CAMB_API_BASE = 'https://client.camb.ai/apis'

function getHeaders() {
return {
'x-api-key': cambConfig.apiKey,
'Content-Type': 'application/json'
}
}

/**
* Generate audio using CAMB AI streaming TTS
* @param {Object} param - { text, voice_id, language, speech_model }
* @returns {Promise<Buffer>} audio buffer
*/
export async function makeAudio(param) {
log.debug('~ camb makeAudio ~ param:', JSON.stringify(param))

const response = await axios.post(
`${CAMB_API_BASE}/tts-stream`,
{
text: param.text,
voice_id: param.voice_id || cambConfig.defaultVoiceId,
language: param.language || 'en-us',
speech_model: param.speech_model || 'mars-flash',
output_configuration: { format: 'wav' }
},
{
headers: getHeaders(),
responseType: 'arraybuffer',
timeout: 60000
}
)

return response.data
}

/**
* Clone a voice using CAMB AI
* @param {Object} param - { audioFilePath, voiceName, gender, language }
* @returns {Promise<{ voice_id: number }>}
*/
// Map BCP-47 language codes to CAMB numeric IDs for voice cloning
const LANG_ID_MAP = {
'en-us': 1, 'es-es': 54, 'fr-fr': 76, 'de-de': 31,
'ja-jp': 88, 'hi-in': 81, 'pt-br': 111, 'zh-cn': 139,
'ko-kr': 93, 'it-it': 86, 'nl-nl': 48, 'ru-ru': 120,
}

export async function cloneVoice(param) {
log.debug('~ camb cloneVoice ~ param:', JSON.stringify({ ...param, audioFilePath: '...' }))
log.debug('~ camb cloneVoice ~ apiKey present:', !!cambConfig.apiKey)

const langId = LANG_ID_MAP[param.language] || param.language || 1

const form = new FormData()
form.append('file', fs.createReadStream(param.audioFilePath))
form.append('voice_name', param.voiceName || `voice_${Date.now()}`)
form.append('gender', String(param.gender || 1))
form.append('language', String(langId))
form.append('enhance_audio', 'true')

try {
const response = await axios.post(`${CAMB_API_BASE}/create-custom-voice`, form, {
headers: {
'x-api-key': cambConfig.apiKey,
...form.getHeaders()
},
timeout: 120000
})
log.debug('~ camb cloneVoice ~ response:', JSON.stringify(response.data))
return response.data
} catch (err) {
log.error('~ camb cloneVoice ~ error:', err.response?.status, err.response?.data || err.message)
throw err
}
}

/**
* List all available voices
* @returns {Promise<Array>}
*/
export async function listVoices() {
const response = await axios.get(`${CAMB_API_BASE}/list-voices`, {
headers: getHeaders()
})

return response.data
}
25 changes: 25 additions & 0 deletions src/main/config/config.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
import path from 'path'
import os from 'os'
import fs from 'fs'

// Load .env from parent camb-ai-work directory as fallback
const parentEnvPath = path.resolve(process.cwd(), '..', '.env')
try {
if (fs.existsSync(parentEnvPath)) {
const content = fs.readFileSync(parentEnvPath, 'utf-8')
for (const line of content.split('\n')) {
const match = line.match(/^([^#=]+)=(.*)$/)
if (match && !process.env[match[1].trim()]) {
process.env[match[1].trim()] = match[2].trim()
}
}
}
} catch { /* ignore */ }

const isDev = process.env.NODE_ENV === 'development'
const isWin = process.platform === 'win32'
Expand All @@ -9,6 +24,16 @@ export const serviceUrl = {
tts: isDev ? 'http://192.168.4.204:18180' : 'http://127.0.0.1:18180'
}

// TTS provider: 'fish' (default, local Fish Speech) or 'camb' (CAMB AI cloud)
export const ttsProvider = process.env.TTS_PROVIDER || 'fish'

export const cambConfig = {
apiKey: process.env.CAMB_API_KEY || '',
defaultVoiceId: parseInt(process.env.CAMB_VOICE_ID || '147320', 10),
defaultLanguage: process.env.CAMB_LANGUAGE || 'en-us',
speechModel: process.env.CAMB_SPEECH_MODEL || 'mars-flash'
}

export const assetPath = {
model: isWin
? path.join('D:', 'duix_avatar_data', 'face2face', 'temp')
Expand Down
16 changes: 16 additions & 0 deletions src/main/dao/voice.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ export function insert({ origin_audio_path, lang, asr_format_audio_url, referenc
return id
}

export function insertCamb({ origin_audio_path, lang, camb_voice_id }) {
const db = connect()
const stmt = db.prepare(
`insert into voice (origin_audio_path, lang, camb_voice_id, created_at)
values (?,?,?,?);`
)
const info = stmt.run(
origin_audio_path,
lang,
camb_voice_id,
Date.now()
)
const id = info.lastInsertRowid
return id
}

export function selectByID(id) {
const db = connect()
const stmt = db.prepare('SELECT * FROM voice WHERE id = ?')
Expand Down
6 changes: 6 additions & 0 deletions src/main/db/sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,11 @@ export default [
script: `alter table video
add voice_id integer;
`
},
{
version: 4,
script: `alter table voice
add camb_voice_id integer;
`
}
]
43 changes: 41 additions & 2 deletions src/main/service/voice.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { selectAll, insert, selectByID } from '../dao/voice.js'
import { selectAll, insert, insertCamb, selectByID } from '../dao/voice.js'
import { preprocessAndTran, makeAudio as makeAudioApi } from '../api/tts.js'
import { makeAudio as cambMakeAudio, cloneVoice as cambCloneVoice } from '../api/camb-tts.js'
import crypto from 'crypto'
import fs from 'fs'
import path from 'path'
import { assetPath } from '../config/config.js'
import { assetPath, ttsProvider, cambConfig } from '../config/config.js'
import log from '../logger.js'
import { ipcMain } from 'electron'
import dayjs from 'dayjs'
Expand All @@ -16,6 +17,23 @@ export function getAllTimbre() {

export async function train(path, lang = 'zh') {
path = path.replace(/\\/g, '/') // 将路径中的\替换为/

if (ttsProvider === 'camb') {
// Use CAMB AI voice cloning
const res = await cambCloneVoice({
audioFilePath: path,
voiceName: `voice_${Date.now()}`,
language: cambConfig.defaultLanguage
})
log.debug('~ train (camb) ~ res:', res)
const cambVoiceId = res.voice_id
if (!cambVoiceId) {
return false
}
return insertCamb({ origin_audio_path: path, lang, camb_voice_id: cambVoiceId })
}

// Default: Fish Speech
const res = await preprocessAndTran({
format: path.split('.').pop(),
reference_audio: path,
Expand Down Expand Up @@ -47,6 +65,27 @@ export async function makeAudio({voiceId, text, targetDir}) {
const uuid = crypto.randomUUID()
const voice = selectByID(voiceId)

if (ttsProvider === 'camb' && voice.camb_voice_id) {
// Use CAMB AI TTS with cloned voice
try {
const audioBuffer = await cambMakeAudio({
text,
voice_id: voice.camb_voice_id,
language: cambConfig.defaultLanguage,
speech_model: cambConfig.speechModel
})
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true })
}
fs.writeFileSync(path.join(targetDir, `${uuid}.wav`), Buffer.from(audioBuffer), 'binary')
return `${uuid}.wav`
} catch (error) {
log.error('Error generating audio via CAMB:', error)
throw error
}
}

// Default: Fish Speech
return makeAudioApi({
speaker: uuid,
text,
Expand Down