diff --git a/robot_speaker/models/asr/dashscope.py b/robot_speaker/models/asr/dashscope.py index 8c267b6..8cb402e 100644 --- a/robot_speaker/models/asr/dashscope.py +++ b/robot_speaker/models/asr/dashscope.py @@ -116,56 +116,45 @@ class DashScopeASR(ASRClient): def stop_current_recognition(self): """ - 停止当前识别,触发final结果,然后重新启动 - 优化: - 1. 使用事件代替 sleep,等待 final 回调完成 - 2. 使用锁防止并发调用 - 3. 处理 start() 失败的情况,确保 running 状态正确 - 4. 添加超时机制,避免无限等待 + 触发提交操作获取当前识别结果,但不关闭连接 """ + if not self.running or not self.conversation: + return False + # 使用锁防止并发调用 if not self._stop_lock.acquire(blocking=False): self._log("warning", "stop_current_recognition 正在执行,跳过本次调用") return False - + try: - if not self.running or not self.conversation: - return False - # 重置事件,准备等待 final 回调 self._final_result_event.clear() self._pending_commit = True - + # 触发 commit,等待 final 结果 self.conversation.commit() - + # 等待 final 回调完成(最多等待1秒) if self._final_result_event.wait(timeout=1.0): - self._log("debug", "已收到 final 回调,准备关闭连接") + self._log("debug", "已收到 final 回调") else: self._log("warning", "等待 final 回调超时,继续执行") - - # 先设置running=False,防止ASR线程继续发送音频 - self.running = False - - # 关闭当前连接 - old_conversation = self.conversation - self.conversation = None # 立即清空,防止send_audio继续使用 - try: - old_conversation.close() - except Exception as e: - self._log("warning", f"关闭连接时出错: {e}") - - # 短暂等待,确保连接完全关闭 - time.sleep(0.1) - - # 重新启动,如果失败则保持 running=False - if not self.start(): - self._log("error", "ASR重启失败,running状态已重置") - return False - - # 启动成功,running已在start()中设置为True + return True + + except Exception as e: + self._log("error", f"提交当前识别结果失败: {e}") + # 出现错误时尝试重启连接 + self.running = False + try: + if self.conversation: + self.conversation.close() + except: + pass + self.conversation = None + time.sleep(0.1) + return self.start() + finally: self._pending_commit = False self._stop_lock.release() diff --git a/robot_speaker/perception/speaker_verifier.py b/robot_speaker/perception/speaker_verifier.py index fd7a124..7b12118 100644 --- a/robot_speaker/perception/speaker_verifier.py +++ b/robot_speaker/perception/speaker_verifier.py @@ -107,11 +107,19 @@ class SpeakerVerificationClient: temp_wav_path = None try: - temp_wav_path = self._write_temp_wav(audio_data, sample_rate) - result = self.model.generate(input=temp_wav_path) - + # 限制Torch在推理时使用单线程,避免在多任务环境下(尤其是一边录音一边识别) + # 出现的极端CPU竞争和上下文切换开销 import torch - embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0] # shape [1, 192] -> [192] + with torch.inference_mode(): + # 临时设置,虽然全局已经设置了,但在调用前再次确保 + # 注意:set_num_threads 是全局的,这里再次确认 + if torch.get_num_threads() != 1: + torch.set_num_threads(1) + + temp_wav_path = self._write_temp_wav(audio_data, sample_rate) + result = self.model.generate(input=temp_wav_path) + + embedding = result[0]['spk_embedding'].detach().cpu().numpy()[0] # shape [1, 192] -> [192] embedding_dim = len(embedding) if embedding_dim == 0: