fix: Optimize voice interaction pipeline
1. register_speaker_node: Enable AEC to match main node for better SV accuracy. 2. tts/dashscope: Fix ffmpeg argument order (input option thread_queue_size). 3. asr/dashscope: Keep WebSocket connection alive to reduce latency. 4. speaker_verifier: Force single-thread inference to avoid CPU contention.
This commit is contained in:
@@ -116,56 +116,45 @@ class DashScopeASR(ASRClient):
|
||||
|
||||
def stop_current_recognition(self):
|
||||
"""
|
||||
停止当前识别,触发final结果,然后重新启动
|
||||
优化:
|
||||
1. 使用事件代替 sleep,等待 final 回调完成
|
||||
2. 使用锁防止并发调用
|
||||
3. 处理 start() 失败的情况,确保 running 状态正确
|
||||
4. 添加超时机制,避免无限等待
|
||||
触发提交操作获取当前识别结果,但不关闭连接
|
||||
"""
|
||||
if not self.running or not self.conversation:
|
||||
return False
|
||||
|
||||
# 使用锁防止并发调用
|
||||
if not self._stop_lock.acquire(blocking=False):
|
||||
self._log("warning", "stop_current_recognition 正在执行,跳过本次调用")
|
||||
return False
|
||||
|
||||
|
||||
try:
|
||||
if not self.running or not self.conversation:
|
||||
return False
|
||||
|
||||
# 重置事件,准备等待 final 回调
|
||||
self._final_result_event.clear()
|
||||
self._pending_commit = True
|
||||
|
||||
|
||||
# 触发 commit,等待 final 结果
|
||||
self.conversation.commit()
|
||||
|
||||
|
||||
# 等待 final 回调完成(最多等待1秒)
|
||||
if self._final_result_event.wait(timeout=1.0):
|
||||
self._log("debug", "已收到 final 回调,准备关闭连接")
|
||||
self._log("debug", "已收到 final 回调")
|
||||
else:
|
||||
self._log("warning", "等待 final 回调超时,继续执行")
|
||||
|
||||
# 先设置running=False,防止ASR线程继续发送音频
|
||||
self.running = False
|
||||
|
||||
# 关闭当前连接
|
||||
old_conversation = self.conversation
|
||||
self.conversation = None # 立即清空,防止send_audio继续使用
|
||||
try:
|
||||
old_conversation.close()
|
||||
except Exception as e:
|
||||
self._log("warning", f"关闭连接时出错: {e}")
|
||||
|
||||
# 短暂等待,确保连接完全关闭
|
||||
time.sleep(0.1)
|
||||
|
||||
# 重新启动,如果失败则保持 running=False
|
||||
if not self.start():
|
||||
self._log("error", "ASR重启失败,running状态已重置")
|
||||
return False
|
||||
|
||||
# 启动成功,running已在start()中设置为True
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self._log("error", f"提交当前识别结果失败: {e}")
|
||||
# 出现错误时尝试重启连接
|
||||
self.running = False
|
||||
try:
|
||||
if self.conversation:
|
||||
self.conversation.close()
|
||||
except:
|
||||
pass
|
||||
self.conversation = None
|
||||
time.sleep(0.1)
|
||||
return self.start()
|
||||
|
||||
finally:
|
||||
self._pending_commit = False
|
||||
self._stop_lock.release()
|
||||
|
||||
@@ -107,11 +107,19 @@ class SpeakerVerificationClient:
|
||||
|
||||
temp_wav_path = None
|
||||
try:
|
||||
temp_wav_path = self._write_temp_wav(audio_data, sample_rate)
|
||||
result = self.model.generate(input=temp_wav_path)
|
||||
|
||||
# 限制Torch在推理时使用单线程,避免在多任务环境下(尤其是一边录音一边识别)
|
||||
# 出现的极端CPU竞争和上下文切换开销
|
||||
import torch
|
||||
embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0] # shape [1, 192] -> [192]
|
||||
with torch.inference_mode():
|
||||
# 临时设置,虽然全局已经设置了,但在调用前再次确保
|
||||
# 注意:set_num_threads 是全局的,这里再次确认
|
||||
if torch.get_num_threads() != 1:
|
||||
torch.set_num_threads(1)
|
||||
|
||||
temp_wav_path = self._write_temp_wav(audio_data, sample_rate)
|
||||
result = self.model.generate(input=temp_wav_path)
|
||||
|
||||
embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0] # shape [1, 192] -> [192]
|
||||
|
||||
embedding_dim = len(embedding)
|
||||
if embedding_dim == 0:
|
||||
|
||||
Reference in New Issue
Block a user