Skip to content

Commit 499adb6

Browse files
committed
update: vad增加部分后续方便调试的变量
1 parent 90b1d35 commit 499adb6

File tree

1 file changed

+73
-11
lines changed

1 file changed

+73
-11
lines changed

src/main/java/com/xiaozhi/dialogue/service/VadService.java

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,12 @@ private class VadState {
8989
// 音频分析
9090
private float avgEnergy = 0;
9191
private final List<Float> probs = new ArrayList<>();
92+
93+
// 添加原始VAD概率列表
94+
private final List<Float> originalProbs = new ArrayList<>();
95+
96+
// 帧计数器(用于每10帧输出一次)
97+
private int frameCounter = 0;
9298

9399
// 预缓冲
94100
private final LinkedList<byte[]> preBuffer = new LinkedList<>();
@@ -166,6 +172,21 @@ public void addProb(float prob) {
166172
updateDeviceType();
167173
}
168174
}
175+
176+
// 添加原始VAD概率
177+
public void addOriginalProb(float prob) {
178+
originalProbs.add(prob);
179+
if (originalProbs.size() > 10) {
180+
originalProbs.remove(0);
181+
}
182+
183+
// 增加帧计数器
184+
frameCounter++;
185+
}
186+
187+
public float getLastOriginalProb() {
188+
return originalProbs.isEmpty() ? 0.0f : originalProbs.get(originalProbs.size() - 1);
189+
}
169190

170191
public float getLastProb() {
171192
return probs.isEmpty() ? 0.0f : probs.get(probs.size() - 1);
@@ -174,6 +195,10 @@ public float getLastProb() {
174195
public List<Float> getProbs() {
175196
return probs;
176197
}
198+
199+
public int getFrameCounter() {
200+
return frameCounter;
201+
}
177202

178203
public String getDetectedDeviceType() {
179204
return detectedDeviceType;
@@ -286,6 +311,8 @@ public void reset() {
286311
silenceTime = 0;
287312
avgEnergy = 0;
288313
probs.clear();
314+
originalProbs.clear(); // 重置原始概率列表
315+
frameCounter = 0; // 重置帧计数器
289316
preBuffer.clear();
290317
preBufferSize = 0;
291318
pcmData.clear();
@@ -386,17 +413,29 @@ public VadResult processAudio(String sessionId, byte[] opusData) {
386413
return new VadResult(VadStatus.ERROR, null);
387414
}
388415

416+
// 分析原始音频
417+
float[] originalSamples = bytesToFloats(pcmData);
418+
float originalEnergy = calcEnergy(originalSamples);
419+
420+
// 获取原始VAD概率
421+
float originalSpeechProb = detectSpeech(originalSamples);
422+
state.addOriginalProb(originalSpeechProb);
423+
389424
// ========== 应用 AGC ==========
390425
String deviceType = state.getDetectedDeviceType();
391426
byte[] originalPcm = pcmData.clone(); // 保留原始数据用于对比
392-
pcmData = agc.process(sessionId, pcmData, deviceType);
427+
428+
// 检查AGC是否可用
429+
if (agc != null) {
430+
pcmData = agc.process(sessionId, pcmData, deviceType);
431+
}
393432

394433
// 获取AGC统计信息
395-
AutomaticGainControl.AgcStats agcStats = agc.getStats(sessionId);
434+
AutomaticGainControl.AgcStats agcStats = agc != null ? agc.getStats(sessionId) : new AutomaticGainControl.AgcStats();
396435

397436
// 根据AGC增益动态调整VAD阈值
398-
speechThreshold = adjustVadThreshold(speechThreshold, agcStats);
399-
silenceThreshold = adjustVadThreshold(silenceThreshold, agcStats);
437+
float adjustedSpeechThreshold = adjustVadThreshold(speechThreshold, agcStats);
438+
float adjustedSilenceThreshold = adjustVadThreshold(silenceThreshold, agcStats);
400439

401440
// 添加到预缓冲区
402441
state.addToPreBuffer(pcmData);
@@ -415,21 +454,22 @@ public VadResult processAudio(String sessionId, byte[] opusData) {
415454
if (pcmData.length == 0) {
416455
return new VadResult(VadStatus.NO_SPEECH, null);
417456
}
457+
418458
}
419459

420-
// 分析音频
460+
// 分析AGC处理后的音频
421461
float[] samples = bytesToFloats(pcmData);
422462
float energy = calcEnergy(samples);
423463
state.updateEnergy(energy);
424464

425-
// VAD推断
465+
// VAD推断(AGC后)
426466
float speechProb = detectSpeech(samples);
427467
state.addProb(speechProb);
428468

429469
// 判断语音状态
430470
boolean hasEnergy = energy > state.getAvgEnergy() * 1.5 && energy > energyThreshold;
431-
boolean isSpeech = speechProb > speechThreshold && hasEnergy;
432-
boolean isSilence = speechProb < silenceThreshold;
471+
boolean isSpeech = speechProb > adjustedSpeechThreshold && hasEnergy;
472+
boolean isSilence = speechProb < adjustedSilenceThreshold;
433473
state.updateSilence(isSilence);
434474

435475
// 处理状态转换
@@ -443,9 +483,9 @@ public VadResult processAudio(String sessionId, byte[] opusData) {
443483
agcInfo = String.format(", AGC增益: %.2f, 设备类型: %s",
444484
agcStats.gain, state.getDetectedDeviceType());
445485

446-
logger.info("检测到语音开始 - SessionId: {}, 概率: {}, 能量: {}, " +
486+
logger.info("检测到语音开始 - SessionId: {}, 概率: {}, 原始概率: {}, 能量: {}, " +
447487
"调整后阈值: {}{}",
448-
sessionId, speechProb, energy, speechThreshold, agcInfo);
488+
sessionId, speechProb, originalSpeechProb, energy, adjustedSpeechThreshold, agcInfo);
449489

450490
// 获取预缓冲数据
451491
byte[] preBufferData = state.drainPreBuffer();
@@ -486,7 +526,7 @@ public VadResult processAudio(String sessionId, byte[] opusData) {
486526
return new VadResult(VadStatus.NO_SPEECH, null);
487527
}
488528
} catch (Exception e) {
489-
logger.error("处理音频失败: {}", sessionId, e);
529+
logger.error("处理音频失败: {}, 错误: {}", sessionId, e.getMessage(), e);
490530
return new VadResult(VadStatus.ERROR, null);
491531
}
492532
}
@@ -632,6 +672,17 @@ public float getSpeechProbability(String sessionId) {
632672
return state != null ? state.getLastProb() : 0.0f;
633673
}
634674
}
675+
676+
/**
677+
* 获取原始语音概率
678+
*/
679+
public float getOriginalSpeechProbability(String sessionId) {
680+
Object lock = getLock(sessionId);
681+
synchronized (lock) {
682+
VadState state = states.get(sessionId);
683+
return state != null ? state.getLastOriginalProb() : 0.0f;
684+
}
685+
}
635686

636687
/**
637688
* 获取音频数据
@@ -687,6 +738,17 @@ public void setDeviceType(String sessionId, String deviceType) {
687738
}
688739
}
689740

741+
/**
742+
* 获取当前帧计数
743+
*/
744+
public int getFrameCounter(String sessionId) {
745+
Object lock = getLock(sessionId);
746+
synchronized (lock) {
747+
VadState state = states.get(sessionId);
748+
return state != null ? state.getFrameCounter() : 0;
749+
}
750+
}
751+
690752
/**
691753
* VAD状态枚举
692754
*/

0 commit comments

Comments
 (0)