Update voice configuration and skill bridge logic

- Update voice.yaml to use default audio devices and 48kHz sample rate. - Update voice.yaml paths for voice model and interfaces. - Improve skill_bridge_node.py JSON parsing and skill parameter handling. - Update audio_pipeline.py warning message for device detection.
2026-01-22 17:28:28 +08:00
parent e8a9821ce4
commit 856c07715c
3 changed files with 39 additions and 43 deletions
--- a/config/voice.yaml
+++ b/config/voice.yaml
@@ -18,17 +18,12 @@ dashscope:

 audio:
  microphone:
-    # device_index: 3  # 指向 iFLYTEK-M2 (hw:1,0)
-    # sample_rate: 48000  # 尝试使用硬件原生采样率 48kHz，避免重采样可能导致的问题
    device_index: -1  # 使用系统默认输入设备
-    sample_rate: 16000
+    sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz，避免重采样可能导致的问题
    channels: 1  # 输入声道数：单声道（MONO，适合语音采集）
    chunk: 1024
    heartbeat_interval: 2.0  # 心跳间隔（秒），用于定期输出录音状态
  soundcard:
-    # card_index: 1  # USB Audio Device (card 1)
-    # device_index: 0  # USB Audio [USB Audio] (device 0)
-    # sample_rate: 48000  # 输出采样率：48kHz（iFLYTEK 支持 48000）
    card_index: -1  # 使用默认声卡
    device_index: -1  # 使用默认输出设备
    sample_rate: 48000  # 输出采样率：默认 44100
@@ -52,11 +47,11 @@ system:
  shutup_keywords: "bi zui"  # 闭嘴指令关键词（拼音，逗号分隔）
  interrupt_command_queue_depth: 10  # 中断命令订阅的队列深度（QoS）
  sv_enabled: true  # 是否启用声纹识别
-  # sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
-  sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
-  sv_threshold: 0.40  # 声纹识别阈值（0.0-1.0，值越小越宽松，值越大越严格）
-  # sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json"  # 声纹数据库保存路径（JSON格式，相对于ROS2包share目录）
-  sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json"  # 声纹数据库保存路径（JSON格式，相对于ROS2包share目录）
+  sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
+  # sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
+  sv_threshold: 0.45  # 声纹识别阈值（0.0-1.0，值越小越宽松，值越大越严格）
+  sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json"  # 声纹数据库保存路径（JSON格式，相对于ROS2包share目录）
+  # sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json"  # 声纹数据库保存路径（JSON格式，相对于ROS2包share目录）
  sv_buffer_size: 240000  # 声纹验证录音缓冲区大小（样本数，48kHz下5秒=240000）
  continue_without_image: false  # 多模态意图（skill_sequence/chat_camera）未获取到图片时是否继续推理

@@ -65,4 +60,5 @@ camera:
    jpeg_quality: 85  # JPEG压缩质量（0-100，85是质量和大小平衡点）

 interfaces:
-  root_path: "~/ros_learn/hivecore_robot_interfaces/src"  # 接口文件根目录，支持 ~ 展开和相对路径
+  root_path: "~/hivecore_robot_os1/hivecore_robot_interfaces/src"  # 接口文件根目录，支持 ~ 展开和相对路径
+  # root_path: "~/ros_learn/hivecore_robot_interfaces/src"  # 接口文件根目录，支持 ~ 展开和相对路径
--- a/robot_speaker/bridge/skill_bridge_node.py
+++ b/robot_speaker/bridge/skill_bridge_node.py
@@ -46,41 +46,41 @@ class SkillBridgeNode(Node):
        try:
            data = json.loads(raw)
            sequence_list = self._parse_json_sequence(data)
+            if sequence_list is None:
+                self.get_logger().error("Invalid skill sequence format; must be JSON or plain text")
+                return
        except (json.JSONDecodeError, ValueError) as e:
            self.get_logger().debug(f"JSON解析失败，尝试文本解析: {e}")
        
-        # JSON解析失败，回退到文本解析
-        if sequence_list is None:
-            # 原有文本解析逻辑（保留作为回退）
-            sequence, invalid = self._extract_skill_sequence(raw)
-            if invalid:
-                self.get_logger().warning(f"Rejected sequence with invalid skills: {invalid}")
-                return
-            if not sequence:
-                self.get_logger().warning(f"Invalid skill sequence: {raw}")
-                return
-            # self._send_skill_sequence(sequence)
-
-            #判断如果sequence 中包含VisionObjectRecognition,Arm,GripperCmd0,Arm这几个actions，则调用rebuild_now
-            if any(skill in sequence for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
-                self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {sequence}")
+        # JSON格式处理
+        try:
+            skill_names = [item["skill"] for item in sequence_list]
+            if any(skill in skill_names for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
+                self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {skill_names}")
                self.rebuild_now("Trigger", "bt_vision_grasp_dual_arm", "")
            else:
-                #只发送逗号分隔符的第一个action
-                first_skill = sequence.split(",")[0]
-                self.get_logger().info(f"Sending first skill in sequence: {first_skill}")
-                self.rebuild_now("Remote", first_skill, "")
-            return
-        
-        # JSON格式处理
-        skill_names = [item["skill"] for item in sequence_list]
-        if any(skill in skill_names for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
-            self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {skill_names}")
-            self.rebuild_now("Trigger", "bt_vision_grasp_dual_arm", "")
-        else:
-            first_skill = skill_names[0] if skill_names else ""
-            self.get_logger().info(f"Sending first skill in sequence: {first_skill}")
-            self.rebuild_now("Remote", first_skill, "")
+                skill_params = []
+                for item in sequence_list:
+                    p = item.get("parameters")
+                    params = ""
+                    if isinstance(p, dict):
+                        lines = []
+                        for k, v in p.items():
+                            lines.append(f"{k}: {v}")
+                        if lines:
+                            params = "\n".join(lines) + "\n"
+                    skill_params.append(params)
+
+                self.get_logger().info(f"Sending skill sequence: {skill_names}")
+                self.get_logger().info(f"Sending skill parameters: {skill_params}")
+
+                # 将技能名和参数列表分别用单引号包括，并用逗号隔开
+                names_str = ", ".join([f"'{name}'" for name in skill_names])
+                params_str = ", ".join([f"'{param}'" for param in skill_params])
+
+                self.rebuild_now("Remote", names_str, params_str)
+        except Exception as e:
+            self.get_logger().error(f"Error processing skill sequence: {e}")

    def _load_allowed_skills(self) -> set[str]:
        try:
--- a/robot_speaker/perception/audio_pipeline.py
+++ b/robot_speaker/perception/audio_pipeline.py
@@ -86,7 +86,7 @@ class AudioRecorder:
                self.device_index = found_index
            else:
                if self.logger:
-                    self.logger.warning(f"未自动检测到 iFLYTEK 设备，将继续使用配置的索引: {self.device_index}")
+                    self.logger.warning(f"未自动检测到 iFLYTEK 设备，请检查USB连接，或执行 'arecord -l' 确认系统是否识别到录音设备，将继续使用配置的索引: {self.device_index}")

        except Exception as e:
            if self.logger: