68 lines
3.2 KiB
YAML
68 lines
3.2 KiB
YAML
# ROS 语音包配置文件
|
||
|
||
dashscope:
|
||
api_key: "sk-7215a5ab7a00469db4072e1672a0661e"
|
||
asr:
|
||
model: "qwen3-asr-flash-realtime"
|
||
url: "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||
llm:
|
||
model: "qwen3-vl-flash"
|
||
base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||
temperature: 0.7
|
||
max_tokens: 4096
|
||
max_history: 10
|
||
summary_trigger: 3
|
||
tts:
|
||
model: "cosyvoice-v3-flash"
|
||
voice: "longanyang"
|
||
|
||
audio:
|
||
microphone:
|
||
device_index: -1 # 使用系统默认输入设备
|
||
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz,避免重采样可能导致的问题
|
||
channels: 1 # 输入声道数:单声道(MONO,适合语音采集)
|
||
chunk: 1024
|
||
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
|
||
soundcard:
|
||
card_index: -1 # 使用默认声卡
|
||
device_index: -1 # 使用默认输出设备
|
||
sample_rate: 48000 # 输出采样率:默认 44100
|
||
channels: 2 # 输出声道数:立体声(2声道,FL+FR)
|
||
volume: 1.0 # 音量比例(0.0-1.0,0.2表示20%音量)
|
||
tts:
|
||
source_sample_rate: 22050 # TTS服务固定输出采样率(DashScope服务固定值,不可修改)
|
||
source_channels: 1 # TTS服务固定输出声道数(DashScope服务固定值,不可修改)
|
||
ffmpeg_thread_queue_size: 4096 # ffmpeg输入线程队列大小(增大以减少卡顿)
|
||
force_stop_delay: 0.1 # 强制停止时的延迟(秒)
|
||
cleanup_timeout: 30.0 # 清理超时(秒)
|
||
terminate_timeout: 1.0 # 终止超时(秒)
|
||
interrupt_wait: 0.1 # 中断等待时间(秒)
|
||
|
||
vad:
|
||
vad_mode: 3 # VAD模式:0-3,3最严格
|
||
silence_duration_ms: 1000 # 静音持续时长(毫秒)
|
||
min_energy_threshold: 300 # 最小能量阈值
|
||
|
||
system:
|
||
use_wake_word: true # 是否启用唤醒词检测
|
||
wake_word: "er gou" # 唤醒词(拼音)
|
||
session_timeout: 3.0 # 会话超时时间(秒)
|
||
shutup_keywords: "bi zui" # 闭嘴指令关键词(拼音,逗号分隔)
|
||
interrupt_command_queue_depth: 10 # 中断命令订阅的队列深度(QoS)
|
||
sv_enabled: false # 是否启用声纹识别
|
||
# sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
|
||
sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
|
||
sv_threshold: 0.65 # 声纹识别阈值(0.0-1.0,值越小越宽松,值越大越严格)
|
||
# sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径(JSON格式,相对于ROS2包share目录)
|
||
sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json" # 声纹数据库保存路径(JSON格式,相对于ROS2包share目录)
|
||
sv_buffer_size: 96000 # 声纹验证录音缓冲区大小(样本数,48kHz下2秒=96000)
|
||
continue_without_image: true # 多模态意图(skill_sequence/chat_camera)未获取到图片时是否继续推理
|
||
|
||
camera:
|
||
image:
|
||
jpeg_quality: 85 # JPEG压缩质量(0-100,85是质量和大小平衡点)
|
||
|
||
interfaces:
|
||
# root_path: "~/hivecore_robot_os1/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径
|
||
root_path: "~/ros_learn/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径
|