Files
hivecore_robot_voice/config/voice.yaml

68 lines
3.2 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ROS 语音包配置文件
dashscope:
api_key: "sk-7215a5ab7a00469db4072e1672a0661e"
asr:
model: "qwen3-asr-flash-realtime"
url: "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
llm:
model: "qwen3-vl-flash"
base_url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
temperature: 0.7
max_tokens: 4096
max_history: 10
summary_trigger: 3
tts:
model: "cosyvoice-v3-flash"
voice: "longanyang"
audio:
microphone:
device_index: -1 # 使用系统默认输入设备
sample_rate: 48000 # 尝试使用硬件原生采样率 48kHz避免重采样可能导致的问题
channels: 1 # 输入声道数单声道MONO适合语音采集
chunk: 1024
heartbeat_interval: 2.0 # 心跳间隔(秒),用于定期输出录音状态
soundcard:
card_index: -1 # 使用默认声卡
device_index: -1 # 使用默认输出设备
sample_rate: 48000 # 输出采样率:默认 44100
channels: 2 # 输出声道数立体声2声道FL+FR
volume: 1.0 # 音量比例0.0-1.00.2表示20%音量)
tts:
source_sample_rate: 22050 # TTS服务固定输出采样率DashScope服务固定值不可修改
source_channels: 1 # TTS服务固定输出声道数DashScope服务固定值不可修改
ffmpeg_thread_queue_size: 4096 # ffmpeg输入线程队列大小增大以减少卡顿
force_stop_delay: 0.1 # 强制停止时的延迟(秒)
cleanup_timeout: 30.0 # 清理超时(秒)
terminate_timeout: 1.0 # 终止超时(秒)
interrupt_wait: 0.1 # 中断等待时间(秒)
vad:
vad_mode: 3 # VAD模式0-33最严格
silence_duration_ms: 1000 # 静音持续时长(毫秒)
min_energy_threshold: 300 # 最小能量阈值
system:
use_wake_word: true # 是否启用唤醒词检测
wake_word: "er gou" # 唤醒词(拼音)
session_timeout: 3.0 # 会话超时时间(秒)
shutup_keywords: "bi zui" # 闭嘴指令关键词(拼音,逗号分隔)
interrupt_command_queue_depth: 10 # 中断命令订阅的队列深度QoS
sv_enabled: false # 是否启用声纹识别
# sv_model_path: "~/hivecore_robot_os1/voice_model" # 声纹模型路径
sv_model_path: "~/ros_learn/speech_campplus_sv_zh-cn_16k-common" # 声纹模型路径
sv_threshold: 0.65 # 声纹识别阈值0.0-1.0,值越小越宽松,值越大越严格)
# sv_speaker_db_path: "~/hivecore_robot_os1/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_speaker_db_path: "~/ros_learn/hivecore_robot_voice/config/speakers.json" # 声纹数据库保存路径JSON格式相对于ROS2包share目录
sv_buffer_size: 96000 # 声纹验证录音缓冲区大小样本数48kHz下2秒=96000
continue_without_image: true # 多模态意图skill_sequence/chat_camera未获取到图片时是否继续推理
camera:
image:
jpeg_quality: 85 # JPEG压缩质量0-10085是质量和大小平衡点
interfaces:
# root_path: "~/hivecore_robot_os1/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径
root_path: "~/ros_learn/hivecore_robot_interfaces/src" # 接口文件根目录,支持 ~ 展开和相对路径