71 lines
3.1 KiB
YAML
71 lines
3.1 KiB
YAML
# ROS 语音包配置文件
|
||
|
||
dashscope:
|
||
api_key: "sk-7215a5ab7a00469db4072e1672a0661e"
|
||
asr:
|
||
model: "qwen3-asr-flash-realtime"
|
||
url: "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||
llm:
|
||
model: "qwen3-vl-flash"
|
||
base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||
temperature: 0.7
|
||
max_tokens: 4096
|
||
max_history: 10
|
||
summary_trigger: 3
|
||
tts:
|
||
model: "cosyvoice-v3-flash"
|
||
voice: "longanyang"
|
||
|
||
audio:
|
||
microphone:
|
||
device_index: 3 # 指向 iFLYTEK-M2 (hw:1,0)
|
||
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz,避免重采样可能导致的问题
|
||
channels: 1 # 输入声道数:单声道(MONO,适合语音采集)
|
||
chunk: 1024
|
||
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
|
||
soundcard:
|
||
card_index: 1 # USB Audio Device (card 1)
|
||
device_index: 0 # USB Audio [USB Audio] (device 0)
|
||
# card_index: -1 # 使用默认声卡
|
||
# device_index: -1 # 使用默认输出设备
|
||
sample_rate: 48000 # 输出采样率:48kHz(iFLYTEK 支持 48000)
|
||
channels: 2 # 输出声道数:立体声(2声道,FL+FR)
|
||
volume: 1.0 # 音量比例(0.0-1.0,0.2表示20%音量)
|
||
echo_cancellation:
|
||
enabled: false # 是否启用回声消除(true/false)
|
||
max_duration_ms: 500 # 参考信号缓冲区最大时长(毫秒)
|
||
tts:
|
||
source_sample_rate: 22050 # TTS服务固定输出采样率(DashScope服务固定值,不可修改)
|
||
source_channels: 1 # TTS服务固定输出声道数(DashScope服务固定值,不可修改)
|
||
ffmpeg_thread_queue_size: 4096 # ffmpeg输入线程队列大小(增大以减少卡顿)
|
||
|
||
vad:
|
||
vad_mode: 3 # VAD模式:0-3,3最严格
|
||
silence_duration_ms: 1000 # 静音持续时长(毫秒)
|
||
min_energy_threshold: 300 # 最小能量阈值
|
||
|
||
system:
|
||
use_llm: true # 是否使用LLM
|
||
use_wake_word: true # 是否启用唤醒词检测
|
||
wake_word: "er gou" # 唤醒词(拼音)
|
||
session_timeout: 3.0 # 会话超时时间(秒)
|
||
shutup_keywords: "bi zui" # 闭嘴指令关键词(拼音,逗号分隔)
|
||
interrupt_command_queue_depth: 10 # 中断命令订阅的队列深度(QoS)
|
||
sv_enabled: true # 是否启用声纹识别
|
||
sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
|
||
sv_threshold: 0.55 # 声纹识别阈值(0.0-1.0,值越小越宽松,值越大越严格)
|
||
sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径(JSON格式,相对于ROS2包share目录)
|
||
sv_buffer_size: 240000 # 声纹验证录音缓冲区大小(样本数,48kHz下5秒=240000)
|
||
sv_registration_silence_threshold_ms: 500 # 声纹注册状态下的静音阈值(毫秒)
|
||
|
||
camera:
|
||
serial_number: "405622075404" # 相机序列号(Intel RealSense D435)
|
||
rgb:
|
||
width: 640 # 图像宽度
|
||
height: 480 # 图像高度
|
||
fps: 30 # 帧率(支持:6, 10, 15, 30, 60)
|
||
format: "RGB8" # 图像格式:RGB8, BGR8
|
||
image:
|
||
jpeg_quality: 85 # JPEG压缩质量(0-100,85是质量和大小平衡点)
|
||
max_size: "1280x720" # 最大尺寸
|