refactor: 删除回声消除相关代码,支持从hivecore_robot_drivers/img_dev获取图片
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
{
|
||||
"id": "robot_identity",
|
||||
"patterns": [
|
||||
"ni shi shei"
|
||||
"ni shi shui"
|
||||
],
|
||||
"answer": "我叫二狗,是蜂核科技的机器人,很高兴为你服务"
|
||||
},
|
||||
|
||||
@@ -595,5 +595,204 @@
|
||||
"env": "near",
|
||||
"threshold": 0.55,
|
||||
"registered_at": 1768530001.2158406
|
||||
},
|
||||
"user_1768845491": {
|
||||
"embedding": [
|
||||
0.015937702730298042,
|
||||
0.001559161813929677,
|
||||
-0.049822624772787094,
|
||||
0.09989305585622787,
|
||||
0.018279563635587692,
|
||||
-0.2004263699054718,
|
||||
0.006144546903669834,
|
||||
0.005965661257505417,
|
||||
0.012017739936709404,
|
||||
0.020486492663621902,
|
||||
-0.040278736501932144,
|
||||
-0.05860017612576485,
|
||||
0.04651434347033501,
|
||||
0.1633484661579132,
|
||||
-0.03308954834938049,
|
||||
-0.022089917212724686,
|
||||
0.12966565787792206,
|
||||
-0.001390158082358539,
|
||||
-0.05404090881347656,
|
||||
-0.048333823680877686,
|
||||
0.023206135258078575,
|
||||
-0.05033773183822632,
|
||||
-0.10477420687675476,
|
||||
0.10657669603824615,
|
||||
-0.09571000933647156,
|
||||
-0.05239453166723251,
|
||||
0.03246476873755455,
|
||||
0.04507458209991455,
|
||||
0.027843689545989037,
|
||||
-0.15640732645988464,
|
||||
-0.01717425137758255,
|
||||
0.053287796676158905,
|
||||
0.07642859220504761,
|
||||
0.16121289134025574,
|
||||
-0.034773923456668854,
|
||||
0.042213018983602524,
|
||||
0.03897469863295555,
|
||||
-0.0613938644528389,
|
||||
-0.01999823749065399,
|
||||
-0.03844919428229332,
|
||||
-0.08077485114336014,
|
||||
0.11703582853078842,
|
||||
-0.01661379635334015,
|
||||
-0.11473247408866882,
|
||||
-0.021961240097880363,
|
||||
-0.150223046541214,
|
||||
0.08799541741609573,
|
||||
0.019122444093227386,
|
||||
-0.04347413778305054,
|
||||
0.054035451263189316,
|
||||
-0.05809119716286659,
|
||||
-0.05327044054865837,
|
||||
0.026238180696964264,
|
||||
-0.08530712872743607,
|
||||
-0.005645385477691889,
|
||||
0.09096740186214447,
|
||||
0.08434951305389404,
|
||||
0.17141343653202057,
|
||||
-0.005146870389580727,
|
||||
-0.08602679520845413,
|
||||
-0.07365834712982178,
|
||||
0.05543521046638489,
|
||||
-0.1374669075012207,
|
||||
-0.10697560012340546,
|
||||
-0.009140565991401672,
|
||||
0.004680712707340717,
|
||||
0.12675900757312775,
|
||||
0.0848039835691452,
|
||||
-0.04435611888766289,
|
||||
0.19222386181354523,
|
||||
0.16802501678466797,
|
||||
0.006006766576319933,
|
||||
-0.002293711993843317,
|
||||
0.031131887808442116,
|
||||
0.0426473505795002,
|
||||
-0.0454414002597332,
|
||||
0.0455852746963501,
|
||||
0.01862845942378044,
|
||||
-0.056163739413022995,
|
||||
-0.030417200177907944,
|
||||
-0.08624715358018875,
|
||||
0.01799696497619152,
|
||||
0.05537595972418785,
|
||||
0.02721775881946087,
|
||||
-0.024693293496966362,
|
||||
-0.09453072398900986,
|
||||
0.03656083345413208,
|
||||
-0.019727006554603577,
|
||||
-0.04557542875409126,
|
||||
0.026594148948788643,
|
||||
0.06597225368022919,
|
||||
-0.07168523222208023,
|
||||
-0.17211276292800903,
|
||||
-0.06902605295181274,
|
||||
-0.01896323822438717,
|
||||
-0.03277217224240303,
|
||||
0.10336172580718994,
|
||||
0.028470057994127274,
|
||||
0.10734961926937103,
|
||||
-0.09156577289104462,
|
||||
0.06228775903582573,
|
||||
0.07215031236410141,
|
||||
0.0239338967949152,
|
||||
0.158150315284729,
|
||||
-0.10234662145376205,
|
||||
-0.02522525191307068,
|
||||
-0.051316920667886734,
|
||||
0.0021482903975993395,
|
||||
0.07491917163133621,
|
||||
-0.02104146033525467,
|
||||
-0.07901310920715332,
|
||||
0.07640012353658676,
|
||||
-0.06093406304717064,
|
||||
-0.13202868402004242,
|
||||
-0.06267445534467697,
|
||||
0.08388402312994003,
|
||||
-0.05089619383215904,
|
||||
-0.04823632910847664,
|
||||
0.01031999196857214,
|
||||
-0.023580649867653847,
|
||||
-0.09613402187824249,
|
||||
0.02805117331445217,
|
||||
0.07453802227973938,
|
||||
0.12510700523853302,
|
||||
0.14248521625995636,
|
||||
-0.042960282415151596,
|
||||
0.037714891135692596,
|
||||
-0.07789590954780579,
|
||||
0.013859922997653484,
|
||||
0.059469543397426605,
|
||||
-0.06383980810642242,
|
||||
0.029322469606995583,
|
||||
0.044372908771038055,
|
||||
0.012625147588551044,
|
||||
0.015539717860519886,
|
||||
0.019373703747987747,
|
||||
0.02928899973630905,
|
||||
0.015136508271098137,
|
||||
0.018850654363632202,
|
||||
0.10889417678117752,
|
||||
-0.026799343526363373,
|
||||
-0.0837407261133194,
|
||||
0.055622730404138565,
|
||||
-0.05373093858361244,
|
||||
-0.07665497809648514,
|
||||
-0.060409802943468094,
|
||||
0.1106465607881546,
|
||||
-0.13180992007255554,
|
||||
0.05790461599826813,
|
||||
-0.006277923472225666,
|
||||
0.016103282570838928,
|
||||
-0.0385354720056057,
|
||||
-0.032628193497657776,
|
||||
-0.07809191197156906,
|
||||
0.024083232507109642,
|
||||
-0.08718746900558472,
|
||||
-0.0539533905684948,
|
||||
-0.11702725291252136,
|
||||
0.027705105021595955,
|
||||
0.06656485795974731,
|
||||
-0.05842652544379234,
|
||||
0.03137844428420067,
|
||||
0.11062013357877731,
|
||||
-0.002389072673395276,
|
||||
-0.040558233857154846,
|
||||
-0.02512812428176403,
|
||||
-0.00023564467846881598,
|
||||
0.04711990803480148,
|
||||
0.022769151255488396,
|
||||
-0.013735070824623108,
|
||||
0.07807290554046631,
|
||||
-0.047492094337940216,
|
||||
-0.04897252842783928,
|
||||
0.006663929205387831,
|
||||
-0.11178303509950638,
|
||||
-0.008013523183763027,
|
||||
0.06803164631128311,
|
||||
0.008022366091609001,
|
||||
-0.04196283593773842,
|
||||
-0.025105053558945656,
|
||||
0.0431133434176445,
|
||||
-0.07424937933683395,
|
||||
0.0432509183883667,
|
||||
0.09608350694179535,
|
||||
-0.15923553705215454,
|
||||
-0.028727376833558083,
|
||||
0.01354081928730011,
|
||||
0.01657080464065075,
|
||||
-0.02491777203977108,
|
||||
-0.008332896046340466,
|
||||
0.06449767202138901,
|
||||
0.10712931305170059
|
||||
],
|
||||
"env": "near",
|
||||
"threshold": 0.55,
|
||||
"registered_at": 1768845491.8086796
|
||||
}
|
||||
}
|
||||
@@ -20,20 +20,20 @@ audio:
|
||||
microphone:
|
||||
device_index: 3 # 指向 iFLYTEK-M2 (hw:1,0)
|
||||
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz,避免重采样可能导致的问题
|
||||
# device_index: -1 # 使用系统默认输入设备
|
||||
# sample_rate: 16000
|
||||
channels: 1 # 输入声道数:单声道(MONO,适合语音采集)
|
||||
chunk: 1024
|
||||
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
|
||||
soundcard:
|
||||
card_index: 1 # USB Audio Device (card 1)
|
||||
device_index: 0 # USB Audio [USB Audio] (device 0)
|
||||
sample_rate: 48000 # 输出采样率:48kHz(iFLYTEK 支持 48000)
|
||||
# card_index: -1 # 使用默认声卡
|
||||
# device_index: -1 # 使用默认输出设备
|
||||
sample_rate: 48000 # 输出采样率:48kHz(iFLYTEK 支持 48000)
|
||||
# sample_rate: 44100 # 输出采样率:默认 44100
|
||||
channels: 2 # 输出声道数:立体声(2声道,FL+FR)
|
||||
volume: 1.0 # 音量比例(0.0-1.0,0.2表示20%音量)
|
||||
echo_cancellation:
|
||||
enable: false # 是否启用回声消除
|
||||
max_duration_ms: 500 # 参考信号缓冲区最大时长(毫秒)
|
||||
tts:
|
||||
source_sample_rate: 22050 # TTS服务固定输出采样率(DashScope服务固定值,不可修改)
|
||||
source_channels: 1 # TTS服务固定输出声道数(DashScope服务固定值,不可修改)
|
||||
@@ -56,15 +56,7 @@ system:
|
||||
sv_threshold: 0.55 # 声纹识别阈值(0.0-1.0,值越小越宽松,值越大越严格)
|
||||
sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径(JSON格式,相对于ROS2包share目录)
|
||||
sv_buffer_size: 240000 # 声纹验证录音缓冲区大小(样本数,48kHz下5秒=240000)
|
||||
sv_registration_silence_threshold_ms: 500 # 声纹注册状态下的静音阈值(毫秒)
|
||||
|
||||
camera:
|
||||
serial_number: "405622075404" # 相机序列号(Intel RealSense D435)
|
||||
rgb:
|
||||
width: 640 # 图像宽度
|
||||
height: 480 # 图像高度
|
||||
fps: 30 # 帧率(支持:6, 10, 15, 30, 60)
|
||||
format: "RGB8" # 图像格式:RGB8, BGR8
|
||||
image:
|
||||
jpeg_quality: 85 # JPEG压缩质量(0-100,85是质量和大小平衡点)
|
||||
max_size: "1280x720" # 最大尺寸
|
||||
|
||||
@@ -1,10 +1,24 @@
|
||||
from launch import LaunchDescription
|
||||
from launch_ros.actions import Node
|
||||
from launch.actions import SetEnvironmentVariable
|
||||
import os
|
||||
|
||||
|
||||
def generate_launch_description():
|
||||
"""启动语音交互节点,所有参数从 voice.yaml 读取"""
|
||||
# 获取interfaces包的install路径
|
||||
interfaces_install_path = os.path.expanduser('~/ros_learn/hivecore_robot_interfaces/install')
|
||||
|
||||
# 设置AMENT_PREFIX_PATH,确保能找到interfaces包的消息类型
|
||||
ament_prefix_path = os.environ.get('AMENT_PREFIX_PATH', '')
|
||||
if interfaces_install_path not in ament_prefix_path:
|
||||
if ament_prefix_path:
|
||||
ament_prefix_path = f'{ament_prefix_path}:{interfaces_install_path}'
|
||||
else:
|
||||
ament_prefix_path = interfaces_install_path
|
||||
|
||||
return LaunchDescription([
|
||||
SetEnvironmentVariable('AMENT_PREFIX_PATH', ament_prefix_path),
|
||||
Node(
|
||||
package='robot_speaker',
|
||||
executable='robot_speaker_node',
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
|
||||
<depend>rclpy</depend>
|
||||
<depend>std_msgs</depend>
|
||||
<depend>sensor_msgs</depend>
|
||||
<depend>cv_bridge</depend>
|
||||
<depend>ament_index_python</depend>
|
||||
<depend>interfaces</depend>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ pypinyin>=0.49.0
|
||||
rclpy>=3.0.0
|
||||
pyrealsense2>=2.54.0
|
||||
Pillow>=10.0.0
|
||||
numpy>=1.24.0
|
||||
numpy>=1.24.0,<2.0.0 # cv_bridge需要NumPy 1.x,NumPy 2.x会导致段错误
|
||||
PyYAML>=6.0
|
||||
aec-audio-processing
|
||||
modelscope>=1.33.0
|
||||
|
||||
@@ -1,2 +1,4 @@
|
||||
# Bridge package for connecting LLM outputs to brain execution.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import os
|
||||
import yaml
|
||||
import json
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
from pypinyin import pinyin, Style
|
||||
@@ -12,7 +13,7 @@ class IntentResult:
|
||||
intent: str # "skill_sequence" | "kb_qa" | "chat_text" | "chat_camera"
|
||||
text: str
|
||||
need_camera: bool
|
||||
camera_mode: Optional[str] # "head" | "left_hand" | "right_hand" | None
|
||||
camera_mode: Optional[str] # "top" | "left" | "right" | None
|
||||
system_prompt: Optional[str]
|
||||
|
||||
|
||||
@@ -21,13 +22,24 @@ class IntentRouter:
|
||||
self.camera_capture_keywords = [
|
||||
"pai zhao", "pai ge zhao", "pai zhang zhao"
|
||||
]
|
||||
self.skill_keywords = [
|
||||
"ban xiang zi"
|
||||
# 动作词列表(拼音)- 用于检测技能序列意图
|
||||
self.action_verbs = [
|
||||
"zou", "zou liang bu", "zou ji bu", # 走、走两步、走几步
|
||||
"na", "na qi", "na zhu", # 拿、拿起、拿住
|
||||
"ban", "ban yun", # 搬、搬运
|
||||
"zhua", "zhua qu", # 抓、抓取
|
||||
"tui", "tui dong", # 推、推动
|
||||
"la", "la dong", # 拉、拉动
|
||||
"yi dong", "qian jin", "hou tui", # 移动、前进、后退
|
||||
"kong zhi", "cao zuo", # 控制、操作
|
||||
"fang xia", "fang zhi", # 放下、放置
|
||||
"ju qi", "sheng qi", # 举起、升起
|
||||
]
|
||||
self.kb_keywords = [
|
||||
"ni shi shei", "ni de ming zi"
|
||||
"ni shi shui", "ni de ming zi"
|
||||
]
|
||||
self._cached_skill_names: list[str] | None = None
|
||||
self._cached_kb_data: list[dict] | None = None
|
||||
|
||||
def _load_brain_skill_names(self) -> list[str]:
|
||||
if self._cached_skill_names is not None:
|
||||
@@ -53,35 +65,36 @@ class IntentRouter:
|
||||
py_list = pinyin(''.join(chars), style=Style.NORMAL)
|
||||
return ' '.join([item[0] for item in py_list]).lower().strip()
|
||||
|
||||
def is_skill_sequence_intent(self, text: str) -> bool:
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
return any(k in text_pinyin for k in self.skill_keywords)
|
||||
def is_skill_sequence_intent(self, text: str, text_pinyin: str | None = None) -> bool:
|
||||
if text_pinyin is None:
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
|
||||
# 检查动作词(精确匹配)
|
||||
return any(action in text_pinyin for action in self.action_verbs)
|
||||
|
||||
|
||||
def check_camera_command(self, text: str) -> tuple[bool, Optional[str]]:
|
||||
def check_camera_command(self, text: str, text_pinyin: str | None = None) -> tuple[bool, Optional[str]]:
|
||||
"""检查是否包含拍照指令,返回(是否需要相机, 相机模式)"""
|
||||
if not text:
|
||||
return False, None
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
for keyword in self.camera_capture_keywords:
|
||||
if keyword in text_pinyin:
|
||||
return True, self.detect_camera_mode(text)
|
||||
if text_pinyin is None:
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
# 精确匹配:关键词必须作为完整短语出现在文本拼音中
|
||||
if any(keyword in text_pinyin for keyword in self.camera_capture_keywords):
|
||||
return True, self.detect_camera_mode(text, text_pinyin)
|
||||
return False, None
|
||||
|
||||
def detect_camera_mode(self, text: str) -> str:
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
left_keys = ["zuo shou", "zuo bi", "zuo bian"]
|
||||
right_keys = ["you shou", "you bi", "you bian"]
|
||||
head_keys = ["tou", "nao dai"]
|
||||
for kw in left_keys:
|
||||
if kw in text_pinyin:
|
||||
return "left_hand"
|
||||
for kw in right_keys:
|
||||
if kw in text_pinyin:
|
||||
return "right_hand"
|
||||
for kw in head_keys:
|
||||
if kw in text_pinyin:
|
||||
return "head"
|
||||
return "head"
|
||||
def detect_camera_mode(self, text: str, text_pinyin: str | None = None) -> str:
|
||||
"""检测相机模式,返回与相机驱动匹配的position值:left/right/top"""
|
||||
if text_pinyin is None:
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
if any(kw in text_pinyin for kw in ["zuo shou", "zuo bi", "zuo bian"]):
|
||||
return "left"
|
||||
if any(kw in text_pinyin for kw in ["you shou", "you bi", "you bian"]):
|
||||
return "right"
|
||||
if any(kw in text_pinyin for kw in ["tou", "nao dai"]):
|
||||
return "top"
|
||||
return "top" # 默认头部相机
|
||||
|
||||
def build_skill_prompt(self) -> str:
|
||||
skills = self._load_brain_skill_names()
|
||||
@@ -93,7 +106,7 @@ class IntentRouter:
|
||||
)
|
||||
return (
|
||||
"你是机器人任务规划器。\n"
|
||||
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照(默认头部相机),并结合当前环境信息生成简洁、可执行的技能序列。\n"
|
||||
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照,并结合当前环境信息生成简洁、可执行的技能序列。\n"
|
||||
"【重要】如果对话历史中包含【执行结果】或【执行状态】,请参考上一轮技能序列的执行情况,根据成功/失败信息调整本次技能序列。\n"
|
||||
"【输出格式要求】只输出逗号分隔的技能名称,不要任何解释说明。\n"
|
||||
+ skill_guard
|
||||
@@ -109,12 +122,38 @@ class IntentRouter:
|
||||
"你是一个智能语音助手。\n"
|
||||
"请自然、简短地与用户对话。"
|
||||
)
|
||||
|
||||
def build_kb_prompt(self) -> str:
|
||||
return (
|
||||
"你是蜂核科技的员工。\n"
|
||||
"请基于知识库信息回答用户问题,回答要准确简洁。"
|
||||
)
|
||||
|
||||
def _load_kb_data(self) -> list[dict]:
|
||||
"""加载知识库数据"""
|
||||
if self._cached_kb_data is not None:
|
||||
return self._cached_kb_data
|
||||
kb_data = []
|
||||
try:
|
||||
robot_speaker_share = get_package_share_directory("robot_speaker")
|
||||
kb_path = os.path.join(robot_speaker_share, "config", "knowledge.json")
|
||||
with open(kb_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
kb_data = data["entries"]
|
||||
except Exception as e:
|
||||
kb_data = []
|
||||
self._cached_kb_data = kb_data
|
||||
return kb_data
|
||||
|
||||
def search_kb(self, text: str) -> Optional[str]:
|
||||
"""检索知识库,返回匹配的答案"""
|
||||
if not text:
|
||||
return None
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
kb_data = self._load_kb_data()
|
||||
|
||||
for entry in kb_data:
|
||||
patterns = entry["patterns"]
|
||||
for pattern in patterns:
|
||||
if pattern in text_pinyin:
|
||||
answer = entry["answer"]
|
||||
if answer:
|
||||
return answer
|
||||
return None
|
||||
|
||||
def build_default_system_prompt(self) -> str:
|
||||
return (
|
||||
@@ -125,12 +164,11 @@ class IntentRouter:
|
||||
)
|
||||
|
||||
def route(self, text: str) -> IntentResult:
|
||||
need_camera, camera_mode = self.check_camera_command(text)
|
||||
text_pinyin = self.to_pinyin(text)
|
||||
need_camera, camera_mode = self.check_camera_command(text, text_pinyin)
|
||||
|
||||
if self.is_skill_sequence_intent(text):
|
||||
if camera_mode is None:
|
||||
camera_mode = "head"
|
||||
if self.is_skill_sequence_intent(text, text_pinyin):
|
||||
# 用户没有指定相机模式时,保持 None,使用第一个收到的消息
|
||||
return IntentResult(
|
||||
intent="skill_sequence",
|
||||
text=text,
|
||||
@@ -139,13 +177,14 @@ class IntentRouter:
|
||||
system_prompt=self.build_skill_prompt()
|
||||
)
|
||||
|
||||
if any(k in text_pinyin for k in self.kb_keywords):
|
||||
# 精确匹配:关键词必须作为完整短语出现在文本拼音中
|
||||
if any(keyword in text_pinyin for keyword in self.kb_keywords):
|
||||
return IntentResult(
|
||||
intent="kb_qa",
|
||||
text=text,
|
||||
need_camera=False,
|
||||
camera_mode=None,
|
||||
system_prompt=self.build_kb_prompt()
|
||||
system_prompt=None # kb_qa不走LLM,不需要system_prompt
|
||||
)
|
||||
|
||||
return IntentResult(
|
||||
|
||||
@@ -15,7 +15,6 @@ from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
from robot_speaker.perception.audio_pipeline import VADDetector, AudioRecorder
|
||||
from robot_speaker.perception.speaker_verifier import SpeakerVerificationClient
|
||||
from robot_speaker.perception.echo_cancellation import ReferenceSignalBuffer
|
||||
from robot_speaker.models.asr.dashscope import DashScopeASR
|
||||
from robot_speaker.models.tts.dashscope import DashScopeTTSClient
|
||||
from robot_speaker.core.types import TTSRequest
|
||||
@@ -45,13 +44,6 @@ class RegisterSpeakerNode(Node):
|
||||
sample_rate=self.sample_rate
|
||||
)
|
||||
|
||||
# 创建参考信号缓冲区(用于回声消除)
|
||||
self.reference_signal_buffer = ReferenceSignalBuffer(
|
||||
max_duration_ms=self.audio_echo_cancellation_max_duration_ms,
|
||||
sample_rate=self.sample_rate,
|
||||
channels=self.output_channels
|
||||
) if self.audio_echo_cancellation_enabled else None
|
||||
|
||||
self.audio_recorder = AudioRecorder(
|
||||
device_index=self.input_device_index,
|
||||
sample_rate=self.sample_rate,
|
||||
@@ -71,8 +63,6 @@ class RegisterSpeakerNode(Node):
|
||||
on_audio_chunk=self._on_audio_chunk,
|
||||
should_put_to_queue=self._should_put_to_queue,
|
||||
get_silence_threshold=lambda: self.silence_duration_ms,
|
||||
enable_echo_cancellation=self.audio_echo_cancellation_enabled, # 启用回声消除,保持与主程序一致
|
||||
reference_signal_buffer=self.reference_signal_buffer,
|
||||
logger=self.get_logger()
|
||||
)
|
||||
|
||||
@@ -122,7 +112,6 @@ class RegisterSpeakerNode(Node):
|
||||
tts_source_sample_rate=self.audio_tts_source_sample_rate,
|
||||
tts_source_channels=self.audio_tts_source_channels,
|
||||
tts_ffmpeg_thread_queue_size=self.audio_tts_ffmpeg_thread_queue_size,
|
||||
reference_signal_buffer=self.reference_signal_buffer,
|
||||
logger=self.get_logger()
|
||||
)
|
||||
|
||||
@@ -170,9 +159,6 @@ class RegisterSpeakerNode(Node):
|
||||
self.output_channels = soundcard['channels']
|
||||
self.output_volume = soundcard['volume']
|
||||
|
||||
echo = audio.get('echo_cancellation', {})
|
||||
self.audio_echo_cancellation_enabled = echo['enable']
|
||||
self.audio_echo_cancellation_max_duration_ms = echo.get('max_duration_ms', 200)
|
||||
|
||||
tts_audio = audio.get('tts', {})
|
||||
self.audio_tts_source_sample_rate = tts_audio.get('source_sample_rate', 22050)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -11,3 +11,5 @@ class ASRClient:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -13,3 +13,5 @@ class LLMClient:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -12,3 +12,5 @@ class TTSClient:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
"""
|
||||
音频处理模块:录音 + VAD + 回声消除
|
||||
音频处理模块:录音 + VAD
|
||||
"""
|
||||
import time
|
||||
import pyaudio
|
||||
import webrtcvad
|
||||
import struct
|
||||
import queue
|
||||
from .echo_cancellation import EchoCanceller, ReferenceSignalBuffer
|
||||
|
||||
|
||||
class VADDetector:
|
||||
@@ -35,8 +34,6 @@ class AudioRecorder:
|
||||
on_audio_chunk=None, # 音频chunk回调(用于声纹录音等,可选)
|
||||
should_put_to_queue=None, # 检查是否应该将音频放入队列(用于阻止ASR,可选)
|
||||
get_silence_threshold=None, # 获取动态静音阈值(毫秒,可选)
|
||||
enable_echo_cancellation: bool = True, # 是否启用回声消除
|
||||
reference_signal_buffer: ReferenceSignalBuffer = None, # 参考信号缓冲区(可选)
|
||||
logger=None):
|
||||
self.device_index = device_index
|
||||
self.sample_rate = sample_rate
|
||||
@@ -97,39 +94,6 @@ class AudioRecorder:
|
||||
|
||||
self.format = pyaudio.paInt16
|
||||
self._debug_counter = 0
|
||||
|
||||
# 回声消除相关
|
||||
self.enable_echo_cancellation = enable_echo_cancellation
|
||||
self.reference_signal_buffer = reference_signal_buffer
|
||||
if enable_echo_cancellation:
|
||||
# 初始化回声消除器(在录音线程中同步处理,不是单独线程)
|
||||
# frame_size设置为chunk大小,确保每次处理一个chunk
|
||||
frame_size = chunk
|
||||
try:
|
||||
# 获取参考信号声道数(从reference_signal_buffer获取,因为它是根据播放声道数创建的)
|
||||
ref_channels = self.reference_signal_buffer.channels if self.reference_signal_buffer else 1
|
||||
self.echo_canceller = EchoCanceller(
|
||||
sample_rate=sample_rate,
|
||||
frame_size=frame_size,
|
||||
channels=self.channels, # 麦克风输入:1声道
|
||||
ref_channels=ref_channels, # 参考信号:播放声道数(2声道)
|
||||
logger=logger
|
||||
)
|
||||
if self.echo_canceller.aec is not None:
|
||||
if logger:
|
||||
logger.info(f"回声消除器已启用: sample_rate={sample_rate}, frame_size={frame_size}")
|
||||
else:
|
||||
if logger:
|
||||
logger.warning("回声消除器初始化失败,将禁用回声消除功能")
|
||||
self.enable_echo_cancellation = False
|
||||
self.echo_canceller = None
|
||||
except Exception as e:
|
||||
if logger:
|
||||
logger.warning(f"回声消除器初始化失败: {e},将禁用回声消除功能")
|
||||
self.enable_echo_cancellation = False
|
||||
self.echo_canceller = None
|
||||
else:
|
||||
self.echo_canceller = None
|
||||
|
||||
def record_with_vad(self):
|
||||
"""录音线程:VAD + 能量检测"""
|
||||
@@ -163,19 +127,7 @@ class AudioRecorder:
|
||||
while not self.stop_flag():
|
||||
# exception_on_overflow=False, 宁可丢帧,也不阻塞
|
||||
data = stream.read(self.chunk, exception_on_overflow=False)
|
||||
|
||||
# 回声消除处理
|
||||
processed_data = data
|
||||
if self.enable_echo_cancellation and self.echo_canceller and self.reference_signal_buffer:
|
||||
try:
|
||||
# 获取参考信号(长度与麦克风信号匹配)
|
||||
ref_signal = self.reference_signal_buffer.get_reference(num_samples=self.chunk)
|
||||
# 执行回声消除
|
||||
processed_data = self.echo_canceller.process(data, ref_signal)
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.warning(f"回声消除处理失败: {e},使用原始音频")
|
||||
processed_data = data
|
||||
|
||||
# 检查是否应该将音频放入队列(用于阻止ASR,例如无声纹文件时需要注册)
|
||||
if self.should_put_to_queue():
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
"""
|
||||
相机模块 - RealSense相机封装
|
||||
"""
|
||||
import numpy as np
|
||||
import contextlib
|
||||
|
||||
|
||||
class CameraClient:
|
||||
def __init__(self,
|
||||
serial_number: str | None,
|
||||
width: int,
|
||||
height: int,
|
||||
fps: int,
|
||||
format: str,
|
||||
logger=None):
|
||||
self.serial_number = serial_number
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.fps = fps
|
||||
self.format = format
|
||||
self.logger = logger
|
||||
|
||||
self.pipeline = None
|
||||
self.config = None
|
||||
self._is_initialized = False
|
||||
self._rs = None
|
||||
|
||||
def _log(self, level: str, msg: str):
|
||||
if self.logger:
|
||||
getattr(self.logger, level, self.logger.info)(msg)
|
||||
else:
|
||||
print(f"[相机] {msg}")
|
||||
|
||||
def initialize(self) -> bool:
|
||||
"""
|
||||
初始化并启动相机管道
|
||||
"""
|
||||
if self._is_initialized:
|
||||
return True
|
||||
|
||||
try:
|
||||
import pyrealsense2 as rs
|
||||
self._rs = rs
|
||||
|
||||
self.pipeline = rs.pipeline()
|
||||
self.config = rs.config()
|
||||
|
||||
if self.serial_number:
|
||||
self.config.enable_device(self.serial_number)
|
||||
|
||||
self.config.enable_stream(
|
||||
rs.stream.color,
|
||||
self.width,
|
||||
self.height,
|
||||
rs.format.rgb8 if self.format == 'RGB8' else rs.format.bgr8,
|
||||
self.fps
|
||||
)
|
||||
|
||||
self.pipeline.start(self.config)
|
||||
self._is_initialized = True
|
||||
self._log("info", f"相机已启动并保持运行: {self.width}x{self.height}@{self.fps}fps")
|
||||
return True
|
||||
except Exception as e:
|
||||
self._log("error", f"相机初始化失败: {e}")
|
||||
self.cleanup()
|
||||
return False
|
||||
|
||||
def cleanup(self):
|
||||
"""停止相机管道,释放资源"""
|
||||
if self.pipeline:
|
||||
self.pipeline.stop()
|
||||
self._log("info", "相机已停止")
|
||||
self.pipeline = None
|
||||
self.config = None
|
||||
self._is_initialized = False
|
||||
|
||||
def capture_rgb(self) -> np.ndarray | None:
|
||||
"""
|
||||
从运行中的相机管道捕获一帧RGB图像
|
||||
"""
|
||||
if not self._is_initialized:
|
||||
self._log("error", "相机未初始化,无法捕获图像")
|
||||
return None
|
||||
|
||||
try:
|
||||
frames = self.pipeline.wait_for_frames()
|
||||
color_frame = frames.get_color_frame()
|
||||
|
||||
return np.asanyarray(color_frame.get_data())
|
||||
except Exception as e:
|
||||
self._log("error", f"捕获图像失败: {e}")
|
||||
return None
|
||||
|
||||
@contextlib.contextmanager
|
||||
def capture_context(self):
|
||||
"""
|
||||
上下文管理器:拍照并自动清理资源
|
||||
"""
|
||||
image_data = self.capture_rgb()
|
||||
try:
|
||||
yield image_data
|
||||
finally:
|
||||
if image_data is not None:
|
||||
del image_data
|
||||
|
||||
def capture_multiple(self, count: int = 1) -> list[np.ndarray]:
|
||||
"""
|
||||
捕获多张图像(为未来扩展准备)
|
||||
"""
|
||||
images = []
|
||||
for i in range(count):
|
||||
img = self.capture_rgb()
|
||||
if img is not None:
|
||||
images.append(img)
|
||||
else:
|
||||
self._log("warning", f"第{i+1}张图像捕获失败")
|
||||
return images
|
||||
|
||||
@contextlib.contextmanager
|
||||
def capture_multiple_context(self, count: int = 1):
|
||||
"""
|
||||
上下文管理器:捕获多张图像并自动清理资源
|
||||
"""
|
||||
images = self.capture_multiple(count)
|
||||
try:
|
||||
yield images
|
||||
finally:
|
||||
for img in images:
|
||||
del img
|
||||
images.clear()
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
import collections
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ReferenceSignalBuffer:
|
||||
"""参考信号缓冲区"""
|
||||
|
||||
def __init__(self, sample_rate: int, channels: int, max_duration_ms: int | None = None,
|
||||
buffer_seconds: float = 5.0):
|
||||
self.sample_rate = int(sample_rate)
|
||||
self.channels = int(channels)
|
||||
if max_duration_ms is not None:
|
||||
buffer_seconds = max(float(max_duration_ms) / 1000.0, 0.1)
|
||||
self.max_samples = int(self.sample_rate * buffer_seconds)
|
||||
self._buffer = collections.deque(maxlen=self.max_samples * self.channels)
|
||||
|
||||
def add_reference(self, data: bytes, source_sample_rate: int, source_channels: int):
|
||||
if source_sample_rate != self.sample_rate or source_channels != self.channels:
|
||||
return
|
||||
samples = np.frombuffer(data, dtype=np.int16)
|
||||
self._buffer.extend(samples.tolist())
|
||||
|
||||
def get_reference(self, num_samples: int) -> bytes:
|
||||
needed = int(num_samples) * self.channels
|
||||
if needed <= 0:
|
||||
return b""
|
||||
if len(self._buffer) < needed:
|
||||
data = list(self._buffer) + [0] * (needed - len(self._buffer))
|
||||
else:
|
||||
data = list(self._buffer)[-needed:]
|
||||
return np.array(data, dtype=np.int16).tobytes()
|
||||
|
||||
|
||||
class EchoCanceller:
|
||||
"""回声消除器(基于 aec-audio-processing)"""
|
||||
|
||||
def __init__(self, sample_rate: int, frame_size: int, channels: int, ref_channels: int, logger=None):
|
||||
self.sample_rate = int(sample_rate)
|
||||
self.frame_size = int(frame_size)
|
||||
self.channels = int(channels)
|
||||
self.ref_channels = int(ref_channels)
|
||||
self.logger = logger
|
||||
self.aec = None
|
||||
self._process_reverse = None
|
||||
self._frame_bytes = int(self.sample_rate / 100) * self.channels * 2 # 10ms, int16
|
||||
self._ref_frame_bytes = int(self.sample_rate / 100) * self.ref_channels * 2
|
||||
try:
|
||||
from aec_audio_processing import AudioProcessor
|
||||
self.aec = AudioProcessor(enable_aec=True, enable_ns=False, enable_agc=False)
|
||||
self.aec.set_stream_format(self.sample_rate, self.channels)
|
||||
if hasattr(self.aec, "set_reverse_stream_format"):
|
||||
self.aec.set_reverse_stream_format(self.sample_rate, self.ref_channels)
|
||||
if hasattr(self.aec, "set_stream_delay"):
|
||||
self.aec.set_stream_delay(0)
|
||||
if hasattr(self.aec, "process_reverse_stream"):
|
||||
self._process_reverse = self.aec.process_reverse_stream
|
||||
elif hasattr(self.aec, "process_reverse"):
|
||||
self._process_reverse = self.aec.process_reverse
|
||||
except Exception:
|
||||
self.aec = None
|
||||
|
||||
def process(self, mic_data: bytes, ref_data: bytes) -> bytes:
|
||||
if not self.aec:
|
||||
return mic_data
|
||||
if not mic_data:
|
||||
return mic_data
|
||||
|
||||
try:
|
||||
out_chunks = []
|
||||
total_len = len(mic_data)
|
||||
frame_bytes = self._frame_bytes
|
||||
ref_frame_bytes = self._ref_frame_bytes
|
||||
|
||||
frame_count = (total_len + frame_bytes - 1) // frame_bytes
|
||||
for i in range(frame_count):
|
||||
m_start = i * frame_bytes
|
||||
m_end = m_start + frame_bytes
|
||||
mic_frame = mic_data[m_start:m_end]
|
||||
if len(mic_frame) < frame_bytes:
|
||||
mic_frame = mic_frame + b"\x00" * (frame_bytes - len(mic_frame))
|
||||
|
||||
if ref_data:
|
||||
r_start = i * ref_frame_bytes
|
||||
r_end = r_start + ref_frame_bytes
|
||||
ref_frame = ref_data[r_start:r_end]
|
||||
if len(ref_frame) < ref_frame_bytes:
|
||||
ref_frame = ref_frame + b"\x00" * (ref_frame_bytes - len(ref_frame))
|
||||
if self._process_reverse:
|
||||
self._process_reverse(ref_frame)
|
||||
|
||||
processed = self.aec.process_stream(mic_frame)
|
||||
out_chunks.append(processed if processed is not None else mic_frame)
|
||||
|
||||
return b"".join(out_chunks)[:total_len]
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.warning(f"回声消除处理失败: {e},使用原始音频")
|
||||
return mic_data
|
||||
@@ -3,3 +3,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
查看相机画面的简单脚本
|
||||
按空格键保存当前帧,按'q'键退出
|
||||
"""
|
||||
import sys
|
||||
import cv2
|
||||
import numpy as np
|
||||
try:
|
||||
import pyrealsense2 as rs
|
||||
except ImportError:
|
||||
print("错误: 未安装pyrealsense2,请运行: pip install pyrealsense2")
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
# 配置相机
|
||||
pipeline = rs.pipeline()
|
||||
config = rs.config()
|
||||
|
||||
# 启用彩色流
|
||||
config.enable_stream(rs.stream.color, 640, 480, rs.format.rgb8, 30)
|
||||
|
||||
# 启动管道
|
||||
pipeline.start(config)
|
||||
print("相机已启动,按空格键保存图片,按'q'键退出")
|
||||
|
||||
frame_count = 0
|
||||
try:
|
||||
while True:
|
||||
# 等待一帧
|
||||
frames = pipeline.wait_for_frames()
|
||||
color_frame = frames.get_color_frame()
|
||||
|
||||
if not color_frame:
|
||||
continue
|
||||
|
||||
# 转换为numpy数组 (RGB格式)
|
||||
color_image = np.asanyarray(color_frame.get_data())
|
||||
|
||||
# OpenCV使用BGR格式,需要转换
|
||||
bgr_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# 显示图像
|
||||
cv2.imshow('Camera View', bgr_image)
|
||||
|
||||
# 等待按键
|
||||
key = cv2.waitKey(1) & 0xFF
|
||||
|
||||
if key == ord('q'):
|
||||
print("退出...")
|
||||
break
|
||||
elif key == ord(' '): # 空格键保存
|
||||
frame_count += 1
|
||||
filename = f'camera_frame_{frame_count:04d}.jpg'
|
||||
cv2.imwrite(filename, bgr_image)
|
||||
print(f"已保存: {filename}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n中断...")
|
||||
finally:
|
||||
pipeline.stop()
|
||||
cv2.destroyAllWindows()
|
||||
print("相机已关闭")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user