Update voice configuration and skill bridge logic

- Update voice.yaml to use default audio devices and 48kHz sample rate.
- Update voice.yaml paths for voice model and interfaces.
- Improve skill_bridge_node.py JSON parsing and skill parameter handling.
- Update audio_pipeline.py warning message for device detection.
This commit is contained in:
NuoDaJia02
2026-01-22 17:28:28 +08:00
parent e8a9821ce4
commit 856c07715c
3 changed files with 39 additions and 43 deletions

View File

@@ -18,17 +18,12 @@ dashscope:
audio:
microphone:
# device_index: 3 # 指向 iFLYTEK-M2 (hw:1,0)
# sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz避免重采样可能导致的问题
device_index: -1 # 使用系统默认输入设备
sample_rate: 16000
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz避免重采样可能导致的问题
channels: 1 # 输入声道数单声道MONO适合语音采集
chunk: 1024
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
soundcard:
# card_index: 1 # USB Audio Device (card 1)
# device_index: 0 # USB Audio [USB Audio] (device 0)
# sample_rate: 48000 # 输出采样率48kHziFLYTEK 支持 48000
card_index: -1 # 使用默认声卡
device_index: -1 # 使用默认输出设备
sample_rate: 48000 # 输出采样率:默认 44100
@@ -52,11 +47,11 @@ system:
shutup_keywords: "bi zui" # 闭嘴指令关键词(拼音,逗号分隔)
interrupt_command_queue_depth: 10 # 中断命令订阅的队列深度QoS
sv_enabled: true # 是否启用声纹识别
# sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
sv_threshold: 0.40 # 声纹识别阈值0.0-1.0,值越小越宽松,值越大越严格)
# sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
# sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
sv_threshold: 0.45 # 声纹识别阈值0.0-1.0,值越小越宽松,值越大越严格)
sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
# sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_buffer_size: 240000 # 声纹验证录音缓冲区大小样本数48kHz下5秒=240000
continue_without_image: false # 多模态意图skill_sequence/chat_camera未获取到图片时是否继续推理
@@ -65,4 +60,5 @@ camera:
jpeg_quality: 85 # JPEG压缩质量0-10085是质量和大小平衡点
interfaces:
root_path: "~/ros_learn/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径
root_path: "~/hivecore_robot_os1/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径
# root_path: "~/ros_learn/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径

View File

@@ -46,41 +46,41 @@ class SkillBridgeNode(Node):
try:
data = json.loads(raw)
sequence_list = self._parse_json_sequence(data)
if sequence_list is None:
self.get_logger().error("Invalid skill sequence format; must be JSON or plain text")
return
except (json.JSONDecodeError, ValueError) as e:
self.get_logger().debug(f"JSON解析失败尝试文本解析: {e}")
# JSON解析失败,回退到文本解析
if sequence_list is None:
# 原有文本解析逻辑(保留作为回退)
sequence, invalid = self._extract_skill_sequence(raw)
if invalid:
self.get_logger().warning(f"Rejected sequence with invalid skills: {invalid}")
return
if not sequence:
self.get_logger().warning(f"Invalid skill sequence: {raw}")
return
# self._send_skill_sequence(sequence)
#判断如果sequence 中包含VisionObjectRecognition,Arm,GripperCmd0,Arm这几个actions则调用rebuild_now
if any(skill in sequence for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {sequence}")
# JSON格式处理
try:
skill_names = [item["skill"] for item in sequence_list]
if any(skill in skill_names for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {skill_names}")
self.rebuild_now("Trigger", "bt_vision_grasp_dual_arm", "")
else:
#只发送逗号分隔符的第一个action
first_skill = sequence.split(",")[0]
self.get_logger().info(f"Sending first skill in sequence: {first_skill}")
self.rebuild_now("Remote", first_skill, "")
return
# JSON格式处理
skill_names = [item["skill"] for item in sequence_list]
if any(skill in skill_names for skill in ["VisionObjectRecognition", "Arm", "GripperCmd0"]):
self.get_logger().info(f"Skill sequence contains special skills, triggering rebuild: {skill_names}")
self.rebuild_now("Trigger", "bt_vision_grasp_dual_arm", "")
else:
first_skill = skill_names[0] if skill_names else ""
self.get_logger().info(f"Sending first skill in sequence: {first_skill}")
self.rebuild_now("Remote", first_skill, "")
skill_params = []
for item in sequence_list:
p = item.get("parameters")
params = ""
if isinstance(p, dict):
lines = []
for k, v in p.items():
lines.append(f"{k}: {v}")
if lines:
params = "\n".join(lines) + "\n"
skill_params.append(params)
self.get_logger().info(f"Sending skill sequence: {skill_names}")
self.get_logger().info(f"Sending skill parameters: {skill_params}")
# 将技能名和参数列表分别用单引号包括,并用逗号隔开
names_str = ", ".join([f"'{name}'" for name in skill_names])
params_str = ", ".join([f"'{param}'" for param in skill_params])
self.rebuild_now("Remote", names_str, params_str)
except Exception as e:
self.get_logger().error(f"Error processing skill sequence: {e}")
def _load_allowed_skills(self) -> set[str]:
try:

View File

@@ -86,7 +86,7 @@ class AudioRecorder:
self.device_index = found_index
else:
if self.logger:
self.logger.warning(f"未自动检测到 iFLYTEK 设备,将继续使用配置的索引: {self.device_index}")
self.logger.warning(f"未自动检测到 iFLYTEK 设备,请检查USB连接或执行 'arecord -l' 确认系统是否识别到录音设备,将继续使用配置的索引: {self.device_index}")
except Exception as e:
if self.logger: