添加与行为树的桥接节点

This commit is contained in:
lxy
2026-01-19 09:58:40 +08:00
parent 9fd658990c
commit 6d101b9d9e
17 changed files with 244 additions and 6 deletions

View File

@@ -32,6 +32,7 @@ audio:
channels: 2 # 输出声道数立体声2声道FL+FR
volume: 1.0 # 音量比例0.0-1.00.2表示20%音量)
echo_cancellation:
enabled: false # 是否启用回声消除true/false
max_duration_ms: 500 # 参考信号缓冲区最大时长(毫秒)
tts:
source_sample_rate: 22050 # TTS服务固定输出采样率DashScope服务固定值不可修改

View File

@@ -9,6 +9,8 @@
<depend>rclpy</depend>
<depend>std_msgs</depend>
<depend>ament_index_python</depend>
<depend>interfaces</depend>
<exec_depend>python3-pyaudio</exec_depend>
<exec_depend>python3-requests</exec_depend>

View File

@@ -0,0 +1,2 @@
# Bridge package for connecting LLM outputs to brain execution.

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""
桥接LLM技能序列到小脑ExecuteBtAction并转发反馈/结果。
"""
import json
import os
import re
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from std_msgs.msg import String
from ament_index_python.packages import get_package_share_directory
from interfaces.action import ExecuteBtAction
class SkillBridgeNode(Node):
def __init__(self):
super().__init__('skill_bridge_node')
self._action_client = ActionClient(self, ExecuteBtAction, '/execute_bt_action')
self._current_epoch = 1
self._allowed_skills = self._load_allowed_skills()
self.skill_seq_sub = self.create_subscription(
String, '/llm_skill_sequence', self._on_skill_sequence_received, 10
)
self.feedback_pub = self.create_publisher(String, '/skill_execution_feedback', 10)
self.result_pub = self.create_publisher(String, '/skill_execution_result', 10)
self.get_logger().info('SkillBridgeNode started')
def _on_skill_sequence_received(self, msg: String):
raw = (msg.data or "").strip()
if not raw:
return
if not self._allowed_skills:
self.get_logger().warning("No skill whitelist loaded; reject all sequences")
return
sequence, invalid = self._extract_skill_sequence(raw)
if invalid:
self.get_logger().warning(f"Rejected sequence with invalid skills: {invalid}")
return
if not sequence:
self.get_logger().warning(f"Invalid skill sequence: {raw}")
return
self._send_skill_sequence(sequence)
def _load_allowed_skills(self) -> set[str]:
try:
brain_share = get_package_share_directory("brain")
skill_path = os.path.join(brain_share, "config", "robot_skills.yaml")
if not os.path.exists(skill_path):
return set()
import yaml
with open(skill_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or []
return {str(entry["name"]) for entry in data if isinstance(entry, dict) and entry.get("name")}
except Exception as e:
self.get_logger().warning(f"Load skills failed: {e}")
return set()
def _extract_skill_sequence(self, text: str) -> tuple[str, list[str]]:
# Accept CSV/space/semicolon and filter by CamelCase tokens
tokens = re.split(r'[,\s;]+', text.strip())
skills = [t for t in tokens if re.match(r'^[A-Z][A-Za-z0-9]*$', t)]
if not skills:
return "", []
invalid = [s for s in skills if s not in self._allowed_skills]
return ",".join(skills), invalid
def _send_skill_sequence(self, skill_sequence: str):
if not self._action_client.wait_for_server(timeout_sec=2.0):
self.get_logger().error('ExecuteBtAction server unavailable')
return
goal = ExecuteBtAction.Goal()
goal.epoch = self._current_epoch
self._current_epoch += 1
goal.action_name = skill_sequence
goal.calls = []
self.get_logger().info(f"Dispatch skill sequence: {skill_sequence}")
send_future = self._action_client.send_goal_async(goal, feedback_callback=self._feedback_callback)
rclpy.spin_until_future_complete(self, send_future, timeout_sec=5.0)
if not send_future.done():
self.get_logger().warning("Send goal timed out")
return
goal_handle = send_future.result()
if not goal_handle or not goal_handle.accepted:
self.get_logger().error("Goal rejected")
return
result_future = goal_handle.get_result_async()
rclpy.spin_until_future_complete(self, result_future)
if result_future.done():
self._handle_result(result_future.result())
def _feedback_callback(self, feedback_msg):
fb = feedback_msg.feedback
payload = {
"stage": fb.stage,
"current_skill": fb.current_skill,
"progress": float(fb.progress),
"detail": fb.detail,
"epoch": int(fb.epoch),
}
msg = String()
msg.data = json.dumps(payload, ensure_ascii=True)
self.feedback_pub.publish(msg)
def _handle_result(self, result_wrapper):
result = result_wrapper.result
if not result:
return
payload = {
"success": bool(result.success),
"message": result.message,
"total_skills": int(result.total_skills),
"succeeded_skills": int(result.succeeded_skills),
}
msg = String()
msg.data = json.dumps(payload, ensure_ascii=True)
self.result_pub.publish(msg)
def main(args=None):
rclpy.init(args=args)
node = SkillBridgeNode()
rclpy.spin(node)
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -2,3 +2,4 @@

View File

@@ -1,5 +1,8 @@
from dataclasses import dataclass
from typing import Optional
import os
import yaml
from ament_index_python.packages import get_package_share_directory
from pypinyin import pinyin, Style
@@ -24,6 +27,24 @@ class IntentRouter:
self.kb_keywords = [
"ni shi shei", "ni de ming zi"
]
self._cached_skill_names: list[str] | None = None
def _load_brain_skill_names(self) -> list[str]:
if self._cached_skill_names is not None:
return self._cached_skill_names
skill_names: list[str] = []
try:
brain_share = get_package_share_directory("brain")
skill_path = os.path.join(brain_share, "config", "robot_skills.yaml")
with open(skill_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or []
for entry in data:
if isinstance(entry, dict) and entry.get("name"):
skill_names.append(str(entry["name"]))
except Exception:
skill_names = []
self._cached_skill_names = skill_names
return skill_names
def to_pinyin(self, text: str) -> str:
chars = [c for c in text if '\u4e00' <= c <= '\u9fa5']
@@ -63,9 +84,19 @@ class IntentRouter:
return "head"
def build_skill_prompt(self) -> str:
skills = self._load_brain_skill_names()
skills_text = ", ".join(skills) if skills else ""
skill_guard = (
"【技能限制】只能使用以下技能名称:" + skills_text
if skills_text
else "【技能限制】技能列表不可用,请不要输出任何技能名称。"
)
return (
"你是机器人任务规划器。\n"
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照(默认头部相机),并结合当前环境信息生成简洁、可执行的技能序列。"
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照(默认头部相机),并结合当前环境信息生成简洁、可执行的技能序列。\n"
"【重要】如果对话历史中包含【执行结果】或【执行状态】,请参考上一轮技能序列的执行情况,根据成功/失败信息调整本次技能序列。\n"
"【输出格式要求】只输出逗号分隔的技能名称,不要任何解释说明。\n"
+ skill_guard
)
def build_chat_prompt(self, need_camera: bool) -> str:

View File

@@ -96,6 +96,16 @@ class RobotSpeakerNode(Node):
self.interrupt_sub = self.create_subscription(
String, 'interrupt_command', self.callbacks.handle_interrupt_command, self.system_interrupt_command_queue_depth
)
self.skill_sequence_pub = self.create_publisher(String, '/llm_skill_sequence', 10)
self.skill_feedback_sub = self.create_subscription(
String, '/skill_execution_feedback', self._on_skill_feedback, 10
)
self.skill_result_sub = self.create_subscription(
String, '/skill_execution_result', self._on_skill_result, 10
)
self.latest_skill_feedback = None
self.latest_skill_result = None
# 启动线程
self._start_threads()
@@ -129,6 +139,7 @@ class RobotSpeakerNode(Node):
self.output_sample_rate = soundcard['sample_rate']
self.output_channels = soundcard['channels']
self.output_volume = soundcard['volume']
self.audio_echo_cancellation_enabled = echo.get('enabled', True) # 默认启用
self.audio_echo_cancellation_max_duration_ms = echo['max_duration_ms']
self.audio_tts_source_sample_rate = tts_audio['source_sample_rate']
self.audio_tts_source_channels = tts_audio['source_channels']
@@ -220,7 +231,7 @@ class RobotSpeakerNode(Node):
max_duration_ms=self.audio_echo_cancellation_max_duration_ms,
sample_rate=self.sample_rate,
channels=self.output_channels
)
) if self.audio_echo_cancellation_enabled else None
# 录音器 - 直接发送音频chunk到队列
self.audio_recorder = AudioRecorder(
@@ -242,7 +253,7 @@ class RobotSpeakerNode(Node):
on_audio_chunk=self.callbacks.on_audio_chunk_for_sv if self.sv_enabled else None, # 声纹录音回调
should_put_to_queue=self.callbacks.should_put_audio_to_queue, # 检查是否应该将音频放入队列
get_silence_threshold=self.callbacks.get_silence_threshold, # 动态静音阈值回调
enable_echo_cancellation=True, # 启用回声消除
enable_echo_cancellation=self.audio_echo_cancellation_enabled, # 从配置文件读取
reference_signal_buffer=self.reference_signal_buffer, # 传递参考信号缓冲区
logger=self.get_logger()
)
@@ -451,7 +462,13 @@ class RobotSpeakerNode(Node):
self.get_logger().error(f"图像编码失败: {e}")
return ""
def _llm_process_stream_with_camera(self, user_text: str, need_camera: bool, system_prompt: str | None = None) -> str:
def _llm_process_stream_with_camera(
self,
user_text: str,
need_camera: bool,
system_prompt: str | None = None,
suppress_tts: bool = False
) -> str:
"""LLM流式处理 - 支持多模态(文本+图像)"""
if not self.llm_client or not self.history:
return ""
@@ -530,7 +547,7 @@ class RobotSpeakerNode(Node):
tts_text_to_send = ""
self.get_logger().warning("[流式TTS] reply和tts_text_buffer都为空无法发送TTS文本")
if not self.interrupt_event.is_set() and tts_text_to_send:
if not self.interrupt_event.is_set() and tts_text_to_send and not suppress_tts:
text_len = len(tts_text_to_send)
self.get_logger().info(
f"[流式TTS] 发送完整文本到TTS队列: {tts_text_to_send[:100]}... (总长度: {text_len}字符)"
@@ -538,6 +555,8 @@ class RobotSpeakerNode(Node):
if text_len > 100:
self.get_logger().debug(f"[流式TTS] 完整文本内容: {tts_text_to_send}")
self._put_tts_text(tts_text_to_send)
elif suppress_tts:
self.get_logger().info("[流式TTS] suppress_tts开启跳过TTS输出")
return reply.strip() if reply else ""
@@ -722,11 +741,17 @@ class RobotSpeakerNode(Node):
reply = self._llm_process_stream_with_camera(
text,
need_camera=need_camera,
system_prompt=system_prompt
system_prompt=system_prompt,
suppress_tts=(intent == "skill_sequence")
)
if reply:
if self.history:
self.history.commit_turn(reply)
if intent == "skill_sequence":
skill_msg = String()
skill_msg.data = reply.strip()
self.skill_sequence_pub.publish(skill_msg)
self.get_logger().info(f"[技能序列] 已发布: {skill_msg.data}")
else:
if self.history:
self.history.cancel_turn()
@@ -771,6 +796,36 @@ class RobotSpeakerNode(Node):
super().destroy_node()
def _on_skill_feedback(self, msg: String):
try:
feedback = json.loads(msg.data)
self.latest_skill_feedback = feedback
feedback_text = (
f"【执行状态】阶段:{feedback.get('stage','')}, "
f"技能:{feedback.get('current_skill','')}, "
f"进度:{feedback.get('progress', 0):.1%}, "
f"详情:{feedback.get('detail','')}"
)
if self.history:
self.history.add_message("system", feedback_text)
except Exception as e:
self.get_logger().warning(f"[技能反馈] 解析失败: {e}")
def _on_skill_result(self, msg: String):
try:
result = json.loads(msg.data)
self.latest_skill_result = result
result_text = (
f"【执行结果】{'成功' if result.get('success') else '失败'}, "
f"总技能数:{result.get('total_skills', 0)}, "
f"成功数:{result.get('succeeded_skills', 0)}, "
f"消息:{result.get('message','')}"
)
if self.history:
self.history.add_message("system", result_text)
except Exception as e:
self.get_logger().warning(f"[技能结果] 解析失败: {e}")
def _init_ros(args):
rclpy.init(args=args)

View File

@@ -2,3 +2,4 @@

View File

@@ -2,3 +2,4 @@

View File

@@ -10,3 +10,4 @@ class ASRClient:

View File

@@ -2,3 +2,4 @@

View File

@@ -12,3 +12,4 @@ class LLMClient:

View File

@@ -2,3 +2,4 @@

View File

@@ -11,3 +11,4 @@ class TTSClient:

View File

@@ -2,3 +2,4 @@

View File

@@ -2,3 +2,4 @@

View File

@@ -30,6 +30,7 @@ setup(
'console_scripts': [
'robot_speaker_node = robot_speaker.core.robot_speaker_node:main',
'register_speaker_node = robot_speaker.core.register_speaker_node:main',
'skill_bridge_node = robot_speaker.bridge.skill_bridge_node:main',
],
},
)