fix: Optimize voice interaction pipeline

1. register_speaker_node: Enable AEC to match main node for better SV accuracy. 2. tts/dashscope: Fix ffmpeg argument order (input option thread_queue_size). 3. asr/dashscope: Keep WebSocket connection alive to reduce latency. 4. speaker_verifier: Force single-thread inference to avoid CPU contention.
2026-01-19 16:17:27 +08:00
parent 293e69e9f2
commit e5714e3a8b
2 changed files with 35 additions and 38 deletions
--- a/robot_speaker/models/asr/dashscope.py
+++ b/robot_speaker/models/asr/dashscope.py
@@ -116,56 +116,45 @@ class DashScopeASR(ASRClient):
    
    def stop_current_recognition(self):
        """
-        停止当前识别，触发final结果，然后重新启动
-        优化：
-        1. 使用事件代替 sleep，等待 final 回调完成
-        2. 使用锁防止并发调用
-        3. 处理 start() 失败的情况，确保 running 状态正确
-        4. 添加超时机制，避免无限等待
+        触发提交操作获取当前识别结果，但不关闭连接
        """
+        if not self.running or not self.conversation:
+            return False
+
        # 使用锁防止并发调用
        if not self._stop_lock.acquire(blocking=False):
            self._log("warning", "stop_current_recognition 正在执行，跳过本次调用")
            return False
-        
+
        try:
-            if not self.running or not self.conversation:
-                return False
-            
            # 重置事件，准备等待 final 回调
            self._final_result_event.clear()
            self._pending_commit = True
-            
+
            # 触发 commit，等待 final 结果
            self.conversation.commit()
-            
+
            # 等待 final 回调完成（最多等待1秒）
            if self._final_result_event.wait(timeout=1.0):
-                self._log("debug", "已收到 final 回调，准备关闭连接")
+                self._log("debug", "已收到 final 回调")
            else:
                self._log("warning", "等待 final 回调超时，继续执行")
-            
-            # 先设置running=False，防止ASR线程继续发送音频
-            self.running = False
-            
-            # 关闭当前连接
-            old_conversation = self.conversation
-            self.conversation = None  # 立即清空，防止send_audio继续使用
-            try:
-                old_conversation.close()
-            except Exception as e:
-                self._log("warning", f"关闭连接时出错: {e}")
-            
-            # 短暂等待，确保连接完全关闭
-            time.sleep(0.1)
-            
-            # 重新启动，如果失败则保持 running=False
-            if not self.start():
-                self._log("error", "ASR重启失败，running状态已重置")
-                return False
-            
-            # 启动成功，running已在start()中设置为True
+
            return True
+
+        except Exception as e:
+            self._log("error", f"提交当前识别结果失败: {e}")
+            # 出现错误时尝试重启连接
+            self.running = False
+            try:
+                if self.conversation:
+                    self.conversation.close()
+            except:
+                pass
+            self.conversation = None
+            time.sleep(0.1)
+            return self.start()
+
        finally:
            self._pending_commit = False
            self._stop_lock.release()
--- a/robot_speaker/perception/speaker_verifier.py
+++ b/robot_speaker/perception/speaker_verifier.py
@@ -107,11 +107,19 @@ class SpeakerVerificationClient:
        
        temp_wav_path = None
        try:
-            temp_wav_path = self._write_temp_wav(audio_data, sample_rate)
-            result = self.model.generate(input=temp_wav_path)
-            
+            # 限制Torch在推理时使用单线程，避免在多任务环境下（尤其是一边录音一边识别）
+            # 出现的极端CPU竞争和上下文切换开销
            import torch
-            embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0]  # shape [1, 192] -> [192]
+            with torch.inference_mode():
+                # 临时设置，虽然全局已经设置了，但在调用前再次确保
+                # 注意：set_num_threads 是全局的，这里再次确认
+                if torch.get_num_threads() != 1:
+                    torch.set_num_threads(1)
+                
+                temp_wav_path = self._write_temp_wav(audio_data, sample_rate)
+                result = self.model.generate(input=temp_wav_path)
+                
+                embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0]  # shape [1, 192] -> [192]
           
            embedding_dim = len(embedding)
            if embedding_dim == 0: