refactor: 删除回声消除相关代码,支持从hivecore_robot_drivers/img_dev获取图片

This commit is contained in:
lxy
2026-01-20 09:28:57 +08:00
parent 71062701e1
commit 98c0eb5ca5
24 changed files with 925 additions and 726 deletions

View File

@@ -3,7 +3,7 @@
{
"id": "robot_identity",
"patterns": [
"ni shi shei"
"ni shi shui"
],
"answer": "我叫二狗,是蜂核科技的机器人,很高兴为你服务"
},

View File

@@ -595,5 +595,204 @@
"env": "near",
"threshold": 0.55,
"registered_at": 1768530001.2158406
},
"user_1768845491": {
"embedding": [
0.015937702730298042,
0.001559161813929677,
-0.049822624772787094,
0.09989305585622787,
0.018279563635587692,
-0.2004263699054718,
0.006144546903669834,
0.005965661257505417,
0.012017739936709404,
0.020486492663621902,
-0.040278736501932144,
-0.05860017612576485,
0.04651434347033501,
0.1633484661579132,
-0.03308954834938049,
-0.022089917212724686,
0.12966565787792206,
-0.001390158082358539,
-0.05404090881347656,
-0.048333823680877686,
0.023206135258078575,
-0.05033773183822632,
-0.10477420687675476,
0.10657669603824615,
-0.09571000933647156,
-0.05239453166723251,
0.03246476873755455,
0.04507458209991455,
0.027843689545989037,
-0.15640732645988464,
-0.01717425137758255,
0.053287796676158905,
0.07642859220504761,
0.16121289134025574,
-0.034773923456668854,
0.042213018983602524,
0.03897469863295555,
-0.0613938644528389,
-0.01999823749065399,
-0.03844919428229332,
-0.08077485114336014,
0.11703582853078842,
-0.01661379635334015,
-0.11473247408866882,
-0.021961240097880363,
-0.150223046541214,
0.08799541741609573,
0.019122444093227386,
-0.04347413778305054,
0.054035451263189316,
-0.05809119716286659,
-0.05327044054865837,
0.026238180696964264,
-0.08530712872743607,
-0.005645385477691889,
0.09096740186214447,
0.08434951305389404,
0.17141343653202057,
-0.005146870389580727,
-0.08602679520845413,
-0.07365834712982178,
0.05543521046638489,
-0.1374669075012207,
-0.10697560012340546,
-0.009140565991401672,
0.004680712707340717,
0.12675900757312775,
0.0848039835691452,
-0.04435611888766289,
0.19222386181354523,
0.16802501678466797,
0.006006766576319933,
-0.002293711993843317,
0.031131887808442116,
0.0426473505795002,
-0.0454414002597332,
0.0455852746963501,
0.01862845942378044,
-0.056163739413022995,
-0.030417200177907944,
-0.08624715358018875,
0.01799696497619152,
0.05537595972418785,
0.02721775881946087,
-0.024693293496966362,
-0.09453072398900986,
0.03656083345413208,
-0.019727006554603577,
-0.04557542875409126,
0.026594148948788643,
0.06597225368022919,
-0.07168523222208023,
-0.17211276292800903,
-0.06902605295181274,
-0.01896323822438717,
-0.03277217224240303,
0.10336172580718994,
0.028470057994127274,
0.10734961926937103,
-0.09156577289104462,
0.06228775903582573,
0.07215031236410141,
0.0239338967949152,
0.158150315284729,
-0.10234662145376205,
-0.02522525191307068,
-0.051316920667886734,
0.0021482903975993395,
0.07491917163133621,
-0.02104146033525467,
-0.07901310920715332,
0.07640012353658676,
-0.06093406304717064,
-0.13202868402004242,
-0.06267445534467697,
0.08388402312994003,
-0.05089619383215904,
-0.04823632910847664,
0.01031999196857214,
-0.023580649867653847,
-0.09613402187824249,
0.02805117331445217,
0.07453802227973938,
0.12510700523853302,
0.14248521625995636,
-0.042960282415151596,
0.037714891135692596,
-0.07789590954780579,
0.013859922997653484,
0.059469543397426605,
-0.06383980810642242,
0.029322469606995583,
0.044372908771038055,
0.012625147588551044,
0.015539717860519886,
0.019373703747987747,
0.02928899973630905,
0.015136508271098137,
0.018850654363632202,
0.10889417678117752,
-0.026799343526363373,
-0.0837407261133194,
0.055622730404138565,
-0.05373093858361244,
-0.07665497809648514,
-0.060409802943468094,
0.1106465607881546,
-0.13180992007255554,
0.05790461599826813,
-0.006277923472225666,
0.016103282570838928,
-0.0385354720056057,
-0.032628193497657776,
-0.07809191197156906,
0.024083232507109642,
-0.08718746900558472,
-0.0539533905684948,
-0.11702725291252136,
0.027705105021595955,
0.06656485795974731,
-0.05842652544379234,
0.03137844428420067,
0.11062013357877731,
-0.002389072673395276,
-0.040558233857154846,
-0.02512812428176403,
-0.00023564467846881598,
0.04711990803480148,
0.022769151255488396,
-0.013735070824623108,
0.07807290554046631,
-0.047492094337940216,
-0.04897252842783928,
0.006663929205387831,
-0.11178303509950638,
-0.008013523183763027,
0.06803164631128311,
0.008022366091609001,
-0.04196283593773842,
-0.025105053558945656,
0.0431133434176445,
-0.07424937933683395,
0.0432509183883667,
0.09608350694179535,
-0.15923553705215454,
-0.028727376833558083,
0.01354081928730011,
0.01657080464065075,
-0.02491777203977108,
-0.008332896046340466,
0.06449767202138901,
0.10712931305170059
],
"env": "near",
"threshold": 0.55,
"registered_at": 1768845491.8086796
}
}

View File

@@ -20,20 +20,20 @@ audio:
microphone:
device_index: 3 # 指向 iFLYTEK-M2 (hw:1,0)
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz避免重采样可能导致的问题
# device_index: -1 # 使用系统默认输入设备
# sample_rate: 16000
channels: 1 # 输入声道数单声道MONO适合语音采集
chunk: 1024
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
soundcard:
card_index: 1 # USB Audio Device (card 1)
device_index: 0 # USB Audio [USB Audio] (device 0)
sample_rate: 48000 # 输出采样率48kHziFLYTEK 支持 48000
# card_index: -1 # 使用默认声卡
# device_index: -1 # 使用默认输出设备
sample_rate: 48000 # 输出采样率48kHziFLYTEK 支持 48000
# sample_rate: 44100 # 输出采样率:默认 44100
channels: 2 # 输出声道数立体声2声道FL+FR
volume: 1.0 # 音量比例0.0-1.00.2表示20%音量)
echo_cancellation:
enable: false # 是否启用回声消除
max_duration_ms: 500 # 参考信号缓冲区最大时长(毫秒)
tts:
source_sample_rate: 22050 # TTS服务固定输出采样率DashScope服务固定值不可修改
source_channels: 1 # TTS服务固定输出声道数DashScope服务固定值不可修改
@@ -56,15 +56,7 @@ system:
sv_threshold: 0.55 # 声纹识别阈值0.0-1.0,值越小越宽松,值越大越严格)
sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_buffer_size: 240000 # 声纹验证录音缓冲区大小样本数48kHz下5秒=240000
sv_registration_silence_threshold_ms: 500 # 声纹注册状态下的静音阈值(毫秒)
camera:
serial_number: "405622075404" # 相机序列号Intel RealSense D435
rgb:
width: 640 # 图像宽度
height: 480 # 图像高度
fps: 30 # 帧率支持6, 10, 15, 30, 60
format: "RGB8" # 图像格式RGB8, BGR8
image:
jpeg_quality: 85 # JPEG压缩质量0-10085是质量和大小平衡点
max_size: "1280x720" # 最大尺寸

View File

@@ -1,10 +1,24 @@
from launch import LaunchDescription
from launch_ros.actions import Node
from launch.actions import SetEnvironmentVariable
import os
def generate_launch_description():
"""启动语音交互节点,所有参数从 voice.yaml 读取"""
# 获取interfaces包的install路径
interfaces_install_path = os.path.expanduser('~/ros_learn/hivecore_robot_interfaces/install')
# 设置AMENT_PREFIX_PATH确保能找到interfaces包的消息类型
ament_prefix_path = os.environ.get('AMENT_PREFIX_PATH', '')
if interfaces_install_path not in ament_prefix_path:
if ament_prefix_path:
ament_prefix_path = f'{ament_prefix_path}:{interfaces_install_path}'
else:
ament_prefix_path = interfaces_install_path
return LaunchDescription([
SetEnvironmentVariable('AMENT_PREFIX_PATH', ament_prefix_path),
Node(
package='robot_speaker',
executable='robot_speaker_node',

View File

@@ -9,6 +9,8 @@
<depend>rclpy</depend>
<depend>std_msgs</depend>
<depend>sensor_msgs</depend>
<depend>cv_bridge</depend>
<depend>ament_index_python</depend>
<depend>interfaces</depend>

View File

@@ -6,7 +6,7 @@ pypinyin>=0.49.0
rclpy>=3.0.0
pyrealsense2>=2.54.0
Pillow>=10.0.0
numpy>=1.24.0
numpy>=1.24.0,<2.0.0 # cv_bridge需要NumPy 1.xNumPy 2.x会导致段错误
PyYAML>=6.0
aec-audio-processing
modelscope>=1.33.0

View File

@@ -1,2 +1,4 @@
# Bridge package for connecting LLM outputs to brain execution.

View File

@@ -3,3 +3,5 @@

View File

@@ -2,6 +2,7 @@ from dataclasses import dataclass
from typing import Optional
import os
import yaml
import json
from ament_index_python.packages import get_package_share_directory
from pypinyin import pinyin, Style
@@ -12,7 +13,7 @@ class IntentResult:
intent: str # "skill_sequence" | "kb_qa" | "chat_text" | "chat_camera"
text: str
need_camera: bool
camera_mode: Optional[str] # "head" | "left_hand" | "right_hand" | None
camera_mode: Optional[str] # "top" | "left" | "right" | None
system_prompt: Optional[str]
@@ -21,13 +22,24 @@ class IntentRouter:
self.camera_capture_keywords = [
"pai zhao", "pai ge zhao", "pai zhang zhao"
]
self.skill_keywords = [
"ban xiang zi"
# 动作词列表(拼音)- 用于检测技能序列意图
self.action_verbs = [
"zou", "zou liang bu", "zou ji bu", # 走、走两步、走几步
"na", "na qi", "na zhu", # 拿、拿起、拿住
"ban", "ban yun", # 搬、搬运
"zhua", "zhua qu", # 抓、抓取
"tui", "tui dong", # 推、推动
"la", "la dong", # 拉、拉动
"yi dong", "qian jin", "hou tui", # 移动、前进、后退
"kong zhi", "cao zuo", # 控制、操作
"fang xia", "fang zhi", # 放下、放置
"ju qi", "sheng qi", # 举起、升起
]
self.kb_keywords = [
"ni shi shei", "ni de ming zi"
"ni shi shui", "ni de ming zi"
]
self._cached_skill_names: list[str] | None = None
self._cached_kb_data: list[dict] | None = None
def _load_brain_skill_names(self) -> list[str]:
if self._cached_skill_names is not None:
@@ -53,35 +65,36 @@ class IntentRouter:
py_list = pinyin(''.join(chars), style=Style.NORMAL)
return ' '.join([item[0] for item in py_list]).lower().strip()
def is_skill_sequence_intent(self, text: str) -> bool:
text_pinyin = self.to_pinyin(text)
return any(k in text_pinyin for k in self.skill_keywords)
def is_skill_sequence_intent(self, text: str, text_pinyin: str | None = None) -> bool:
if text_pinyin is None:
text_pinyin = self.to_pinyin(text)
# 检查动作词(精确匹配)
return any(action in text_pinyin for action in self.action_verbs)
def check_camera_command(self, text: str) -> tuple[bool, Optional[str]]:
def check_camera_command(self, text: str, text_pinyin: str | None = None) -> tuple[bool, Optional[str]]:
"""检查是否包含拍照指令,返回(是否需要相机, 相机模式)"""
if not text:
return False, None
text_pinyin = self.to_pinyin(text)
for keyword in self.camera_capture_keywords:
if keyword in text_pinyin:
return True, self.detect_camera_mode(text)
if text_pinyin is None:
text_pinyin = self.to_pinyin(text)
# 精确匹配:关键词必须作为完整短语出现在文本拼音中
if any(keyword in text_pinyin for keyword in self.camera_capture_keywords):
return True, self.detect_camera_mode(text, text_pinyin)
return False, None
def detect_camera_mode(self, text: str) -> str:
text_pinyin = self.to_pinyin(text)
left_keys = ["zuo shou", "zuo bi", "zuo bian"]
right_keys = ["you shou", "you bi", "you bian"]
head_keys = ["tou", "nao dai"]
for kw in left_keys:
if kw in text_pinyin:
return "left_hand"
for kw in right_keys:
if kw in text_pinyin:
return "right_hand"
for kw in head_keys:
if kw in text_pinyin:
return "head"
return "head"
def detect_camera_mode(self, text: str, text_pinyin: str | None = None) -> str:
"""检测相机模式返回与相机驱动匹配的position值left/right/top"""
if text_pinyin is None:
text_pinyin = self.to_pinyin(text)
if any(kw in text_pinyin for kw in ["zuo shou", "zuo bi", "zuo bian"]):
return "left"
if any(kw in text_pinyin for kw in ["you shou", "you bi", "you bian"]):
return "right"
if any(kw in text_pinyin for kw in ["tou", "nao dai"]):
return "top"
return "top" # 默认头部相机
def build_skill_prompt(self) -> str:
skills = self._load_brain_skill_names()
@@ -93,7 +106,7 @@ class IntentRouter:
)
return (
"你是机器人任务规划器。\n"
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照(默认头部相机),并结合当前环境信息生成简洁、可执行的技能序列。\n"
"本任务必须拍照。请根据用户请求选择使用哪个相机拍照,并结合当前环境信息生成简洁、可执行的技能序列。\n"
"【重要】如果对话历史中包含【执行结果】或【执行状态】,请参考上一轮技能序列的执行情况,根据成功/失败信息调整本次技能序列。\n"
"【输出格式要求】只输出逗号分隔的技能名称,不要任何解释说明。\n"
+ skill_guard
@@ -109,12 +122,38 @@ class IntentRouter:
"你是一个智能语音助手。\n"
"请自然、简短地与用户对话。"
)
def build_kb_prompt(self) -> str:
return (
"你是蜂核科技的员工。\n"
"请基于知识库信息回答用户问题,回答要准确简洁。"
)
def _load_kb_data(self) -> list[dict]:
"""加载知识库数据"""
if self._cached_kb_data is not None:
return self._cached_kb_data
kb_data = []
try:
robot_speaker_share = get_package_share_directory("robot_speaker")
kb_path = os.path.join(robot_speaker_share, "config", "knowledge.json")
with open(kb_path, "r", encoding="utf-8") as f:
data = json.load(f)
kb_data = data["entries"]
except Exception as e:
kb_data = []
self._cached_kb_data = kb_data
return kb_data
def search_kb(self, text: str) -> Optional[str]:
"""检索知识库,返回匹配的答案"""
if not text:
return None
text_pinyin = self.to_pinyin(text)
kb_data = self._load_kb_data()
for entry in kb_data:
patterns = entry["patterns"]
for pattern in patterns:
if pattern in text_pinyin:
answer = entry["answer"]
if answer:
return answer
return None
def build_default_system_prompt(self) -> str:
return (
@@ -125,12 +164,11 @@ class IntentRouter:
)
def route(self, text: str) -> IntentResult:
need_camera, camera_mode = self.check_camera_command(text)
text_pinyin = self.to_pinyin(text)
need_camera, camera_mode = self.check_camera_command(text, text_pinyin)
if self.is_skill_sequence_intent(text):
if camera_mode is None:
camera_mode = "head"
if self.is_skill_sequence_intent(text, text_pinyin):
# 用户没有指定相机模式时,保持 None使用第一个收到的消息
return IntentResult(
intent="skill_sequence",
text=text,
@@ -139,13 +177,14 @@ class IntentRouter:
system_prompt=self.build_skill_prompt()
)
if any(k in text_pinyin for k in self.kb_keywords):
# 精确匹配:关键词必须作为完整短语出现在文本拼音中
if any(keyword in text_pinyin for keyword in self.kb_keywords):
return IntentResult(
intent="kb_qa",
text=text,
need_camera=False,
camera_mode=None,
system_prompt=self.build_kb_prompt()
system_prompt=None # kb_qa不走LLM不需要system_prompt
)
return IntentResult(

View File

@@ -15,7 +15,6 @@ from ament_index_python.packages import get_package_share_directory
from robot_speaker.perception.audio_pipeline import VADDetector, AudioRecorder
from robot_speaker.perception.speaker_verifier import SpeakerVerificationClient
from robot_speaker.perception.echo_cancellation import ReferenceSignalBuffer
from robot_speaker.models.asr.dashscope import DashScopeASR
from robot_speaker.models.tts.dashscope import DashScopeTTSClient
from robot_speaker.core.types import TTSRequest
@@ -45,13 +44,6 @@ class RegisterSpeakerNode(Node):
sample_rate=self.sample_rate
)
# 创建参考信号缓冲区(用于回声消除)
self.reference_signal_buffer = ReferenceSignalBuffer(
max_duration_ms=self.audio_echo_cancellation_max_duration_ms,
sample_rate=self.sample_rate,
channels=self.output_channels
) if self.audio_echo_cancellation_enabled else None
self.audio_recorder = AudioRecorder(
device_index=self.input_device_index,
sample_rate=self.sample_rate,
@@ -71,8 +63,6 @@ class RegisterSpeakerNode(Node):
on_audio_chunk=self._on_audio_chunk,
should_put_to_queue=self._should_put_to_queue,
get_silence_threshold=lambda: self.silence_duration_ms,
enable_echo_cancellation=self.audio_echo_cancellation_enabled, # 启用回声消除,保持与主程序一致
reference_signal_buffer=self.reference_signal_buffer,
logger=self.get_logger()
)
@@ -122,7 +112,6 @@ class RegisterSpeakerNode(Node):
tts_source_sample_rate=self.audio_tts_source_sample_rate,
tts_source_channels=self.audio_tts_source_channels,
tts_ffmpeg_thread_queue_size=self.audio_tts_ffmpeg_thread_queue_size,
reference_signal_buffer=self.reference_signal_buffer,
logger=self.get_logger()
)
@@ -170,9 +159,6 @@ class RegisterSpeakerNode(Node):
self.output_channels = soundcard['channels']
self.output_volume = soundcard['volume']
echo = audio.get('echo_cancellation', {})
self.audio_echo_cancellation_enabled = echo['enable']
self.audio_echo_cancellation_max_duration_ms = echo.get('max_duration_ms', 200)
tts_audio = audio.get('tts', {})
self.audio_tts_source_sample_rate = tts_audio.get('source_sample_rate', 22050)

File diff suppressed because it is too large Load Diff

View File

@@ -3,3 +3,5 @@

View File

@@ -3,3 +3,5 @@

View File

@@ -11,3 +11,5 @@ class ASRClient:

View File

@@ -3,3 +3,5 @@

View File

@@ -13,3 +13,5 @@ class LLMClient:

View File

@@ -3,3 +3,5 @@

View File

@@ -12,3 +12,5 @@ class TTSClient:

View File

@@ -3,3 +3,5 @@

View File

@@ -1,12 +1,11 @@
"""
音频处理模块:录音 + VAD + 回声消除
音频处理模块:录音 + VAD
"""
import time
import pyaudio
import webrtcvad
import struct
import queue
from .echo_cancellation import EchoCanceller, ReferenceSignalBuffer
class VADDetector:
@@ -35,8 +34,6 @@ class AudioRecorder:
on_audio_chunk=None, # 音频chunk回调用于声纹录音等可选
should_put_to_queue=None, # 检查是否应该将音频放入队列用于阻止ASR可选
get_silence_threshold=None, # 获取动态静音阈值(毫秒,可选)
enable_echo_cancellation: bool = True, # 是否启用回声消除
reference_signal_buffer: ReferenceSignalBuffer = None, # 参考信号缓冲区(可选)
logger=None):
self.device_index = device_index
self.sample_rate = sample_rate
@@ -97,39 +94,6 @@ class AudioRecorder:
self.format = pyaudio.paInt16
self._debug_counter = 0
# 回声消除相关
self.enable_echo_cancellation = enable_echo_cancellation
self.reference_signal_buffer = reference_signal_buffer
if enable_echo_cancellation:
# 初始化回声消除器(在录音线程中同步处理,不是单独线程)
# frame_size设置为chunk大小确保每次处理一个chunk
frame_size = chunk
try:
# 获取参考信号声道数从reference_signal_buffer获取因为它是根据播放声道数创建的
ref_channels = self.reference_signal_buffer.channels if self.reference_signal_buffer else 1
self.echo_canceller = EchoCanceller(
sample_rate=sample_rate,
frame_size=frame_size,
channels=self.channels, # 麦克风输入1声道
ref_channels=ref_channels, # 参考信号播放声道数2声道
logger=logger
)
if self.echo_canceller.aec is not None:
if logger:
logger.info(f"回声消除器已启用: sample_rate={sample_rate}, frame_size={frame_size}")
else:
if logger:
logger.warning("回声消除器初始化失败,将禁用回声消除功能")
self.enable_echo_cancellation = False
self.echo_canceller = None
except Exception as e:
if logger:
logger.warning(f"回声消除器初始化失败: {e},将禁用回声消除功能")
self.enable_echo_cancellation = False
self.echo_canceller = None
else:
self.echo_canceller = None
def record_with_vad(self):
"""录音线程VAD + 能量检测"""
@@ -163,19 +127,7 @@ class AudioRecorder:
while not self.stop_flag():
# exception_on_overflow=False, 宁可丢帧,也不阻塞
data = stream.read(self.chunk, exception_on_overflow=False)
# 回声消除处理
processed_data = data
if self.enable_echo_cancellation and self.echo_canceller and self.reference_signal_buffer:
try:
# 获取参考信号(长度与麦克风信号匹配)
ref_signal = self.reference_signal_buffer.get_reference(num_samples=self.chunk)
# 执行回声消除
processed_data = self.echo_canceller.process(data, ref_signal)
except Exception as e:
if self.logger:
self.logger.warning(f"回声消除处理失败: {e},使用原始音频")
processed_data = data
# 检查是否应该将音频放入队列用于阻止ASR例如无声纹文件时需要注册
if self.should_put_to_queue():

View File

@@ -1,131 +0,0 @@
"""
相机模块 - RealSense相机封装
"""
import numpy as np
import contextlib
class CameraClient:
def __init__(self,
serial_number: str | None,
width: int,
height: int,
fps: int,
format: str,
logger=None):
self.serial_number = serial_number
self.width = width
self.height = height
self.fps = fps
self.format = format
self.logger = logger
self.pipeline = None
self.config = None
self._is_initialized = False
self._rs = None
def _log(self, level: str, msg: str):
if self.logger:
getattr(self.logger, level, self.logger.info)(msg)
else:
print(f"[相机] {msg}")
def initialize(self) -> bool:
"""
初始化并启动相机管道
"""
if self._is_initialized:
return True
try:
import pyrealsense2 as rs
self._rs = rs
self.pipeline = rs.pipeline()
self.config = rs.config()
if self.serial_number:
self.config.enable_device(self.serial_number)
self.config.enable_stream(
rs.stream.color,
self.width,
self.height,
rs.format.rgb8 if self.format == 'RGB8' else rs.format.bgr8,
self.fps
)
self.pipeline.start(self.config)
self._is_initialized = True
self._log("info", f"相机已启动并保持运行: {self.width}x{self.height}@{self.fps}fps")
return True
except Exception as e:
self._log("error", f"相机初始化失败: {e}")
self.cleanup()
return False
def cleanup(self):
"""停止相机管道,释放资源"""
if self.pipeline:
self.pipeline.stop()
self._log("info", "相机已停止")
self.pipeline = None
self.config = None
self._is_initialized = False
def capture_rgb(self) -> np.ndarray | None:
"""
从运行中的相机管道捕获一帧RGB图像
"""
if not self._is_initialized:
self._log("error", "相机未初始化,无法捕获图像")
return None
try:
frames = self.pipeline.wait_for_frames()
color_frame = frames.get_color_frame()
return np.asanyarray(color_frame.get_data())
except Exception as e:
self._log("error", f"捕获图像失败: {e}")
return None
@contextlib.contextmanager
def capture_context(self):
"""
上下文管理器:拍照并自动清理资源
"""
image_data = self.capture_rgb()
try:
yield image_data
finally:
if image_data is not None:
del image_data
def capture_multiple(self, count: int = 1) -> list[np.ndarray]:
"""
捕获多张图像(为未来扩展准备)
"""
images = []
for i in range(count):
img = self.capture_rgb()
if img is not None:
images.append(img)
else:
self._log("warning", f"{i+1}张图像捕获失败")
return images
@contextlib.contextmanager
def capture_multiple_context(self, count: int = 1):
"""
上下文管理器:捕获多张图像并自动清理资源
"""
images = self.capture_multiple(count)
try:
yield images
finally:
for img in images:
del img
images.clear()

View File

@@ -1,98 +0,0 @@
import collections
import numpy as np
class ReferenceSignalBuffer:
"""参考信号缓冲区"""
def __init__(self, sample_rate: int, channels: int, max_duration_ms: int | None = None,
buffer_seconds: float = 5.0):
self.sample_rate = int(sample_rate)
self.channels = int(channels)
if max_duration_ms is not None:
buffer_seconds = max(float(max_duration_ms) / 1000.0, 0.1)
self.max_samples = int(self.sample_rate * buffer_seconds)
self._buffer = collections.deque(maxlen=self.max_samples * self.channels)
def add_reference(self, data: bytes, source_sample_rate: int, source_channels: int):
if source_sample_rate != self.sample_rate or source_channels != self.channels:
return
samples = np.frombuffer(data, dtype=np.int16)
self._buffer.extend(samples.tolist())
def get_reference(self, num_samples: int) -> bytes:
needed = int(num_samples) * self.channels
if needed <= 0:
return b""
if len(self._buffer) < needed:
data = list(self._buffer) + [0] * (needed - len(self._buffer))
else:
data = list(self._buffer)[-needed:]
return np.array(data, dtype=np.int16).tobytes()
class EchoCanceller:
"""回声消除器(基于 aec-audio-processing"""
def __init__(self, sample_rate: int, frame_size: int, channels: int, ref_channels: int, logger=None):
self.sample_rate = int(sample_rate)
self.frame_size = int(frame_size)
self.channels = int(channels)
self.ref_channels = int(ref_channels)
self.logger = logger
self.aec = None
self._process_reverse = None
self._frame_bytes = int(self.sample_rate / 100) * self.channels * 2 # 10ms, int16
self._ref_frame_bytes = int(self.sample_rate / 100) * self.ref_channels * 2
try:
from aec_audio_processing import AudioProcessor
self.aec = AudioProcessor(enable_aec=True, enable_ns=False, enable_agc=False)
self.aec.set_stream_format(self.sample_rate, self.channels)
if hasattr(self.aec, "set_reverse_stream_format"):
self.aec.set_reverse_stream_format(self.sample_rate, self.ref_channels)
if hasattr(self.aec, "set_stream_delay"):
self.aec.set_stream_delay(0)
if hasattr(self.aec, "process_reverse_stream"):
self._process_reverse = self.aec.process_reverse_stream
elif hasattr(self.aec, "process_reverse"):
self._process_reverse = self.aec.process_reverse
except Exception:
self.aec = None
def process(self, mic_data: bytes, ref_data: bytes) -> bytes:
if not self.aec:
return mic_data
if not mic_data:
return mic_data
try:
out_chunks = []
total_len = len(mic_data)
frame_bytes = self._frame_bytes
ref_frame_bytes = self._ref_frame_bytes
frame_count = (total_len + frame_bytes - 1) // frame_bytes
for i in range(frame_count):
m_start = i * frame_bytes
m_end = m_start + frame_bytes
mic_frame = mic_data[m_start:m_end]
if len(mic_frame) < frame_bytes:
mic_frame = mic_frame + b"\x00" * (frame_bytes - len(mic_frame))
if ref_data:
r_start = i * ref_frame_bytes
r_end = r_start + ref_frame_bytes
ref_frame = ref_data[r_start:r_end]
if len(ref_frame) < ref_frame_bytes:
ref_frame = ref_frame + b"\x00" * (ref_frame_bytes - len(ref_frame))
if self._process_reverse:
self._process_reverse(ref_frame)
processed = self.aec.process_stream(mic_frame)
out_chunks.append(processed if processed is not None else mic_frame)
return b"".join(out_chunks)[:total_len]
except Exception as e:
if self.logger:
self.logger.warning(f"回声消除处理失败: {e},使用原始音频")
return mic_data

View File

@@ -3,3 +3,5 @@

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env python3
"""
查看相机画面的简单脚本
按空格键保存当前帧,按'q'键退出
"""
import sys
import cv2
import numpy as np
try:
import pyrealsense2 as rs
except ImportError:
print("错误: 未安装pyrealsense2请运行: pip install pyrealsense2")
sys.exit(1)
def main():
# 配置相机
pipeline = rs.pipeline()
config = rs.config()
# 启用彩色流
config.enable_stream(rs.stream.color, 640, 480, rs.format.rgb8, 30)
# 启动管道
pipeline.start(config)
print("相机已启动,按空格键保存图片,按'q'键退出")
frame_count = 0
try:
while True:
# 等待一帧
frames = pipeline.wait_for_frames()
color_frame = frames.get_color_frame()
if not color_frame:
continue
# 转换为numpy数组 (RGB格式)
color_image = np.asanyarray(color_frame.get_data())
# OpenCV使用BGR格式需要转换
bgr_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
# 显示图像
cv2.imshow('Camera View', bgr_image)
# 等待按键
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
print("退出...")
break
elif key == ord(' '): # 空格键保存
frame_count += 1
filename = f'camera_frame_{frame_count:04d}.jpg'
cv2.imwrite(filename, bgr_image)
print(f"已保存: {filename}")
except KeyboardInterrupt:
print("\n中断...")
finally:
pipeline.stop()
cv2.destroyAllWindows()
print("相机已关闭")
if __name__ == '__main__':
main()