From 42cf1610eb45da5bdce11588cacb2a525be8a547 Mon Sep 17 00:00:00 2001 From: david Date: Fri, 6 Mar 2026 22:26:07 +0800 Subject: [PATCH] fix: build script fail-fast on colcon error + manager.py indent + FAQ multi-user - build_install_sdk.sh: colcon build output captured; exit 1 on non-zero exit code or 'FAILED' summary line; stale colcon artifacts cleaned before each build to prevent cached IndentationError surfacing - build_install_sdk.sh: prod post-install block creates /var/log/robot with setgid group (hivecore, chmod 2775) for multi-user node support - manager.py: fix ManagerConfig docstring indentation (8-space to 4-space) that caused IndentationError in stale build/install colcon copies - USER_GUIDE.md FAQ: expand permission troubleshooting with Scenario A (single-user chown) and Scenario B (3-step setgid group for multi-user) --- hivecore_logger/USER_GUIDE.md | 57 ++++++++++++++++++- .../manager/hivecore_log_manager/manager.py | 8 +-- hivecore_logger/scripts/build_install_sdk.sh | 37 +++++++++++- 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/hivecore_logger/USER_GUIDE.md b/hivecore_logger/USER_GUIDE.md index e283d4b..be2899e 100644 --- a/hivecore_logger/USER_GUIDE.md +++ b/hivecore_logger/USER_GUIDE.md @@ -469,9 +469,62 @@ sudo ln -sfn /opt/hivecore/venvs/robot-runtime-1.0.1 \ ## 9. 常见问题 **Q: 日志写入失败,提示权限错误?** -SDK 会自动回退到 `/tmp/robot_logs/`,并在控制台输出警告。建议为 `/var/log/robot` 设置正确权限: +SDK 会自动回退到 `/tmp/robot_logs/`,并在控制台输出警告。 + +**场景 A:所有节点以同一系统用户运行(单用户)** +直接将目录属主改为该用户,无需其他操作: ```bash -sudo mkdir -p /var/log/robot && sudo chown $USER /var/log/robot +sudo mkdir -p /var/log/robot && sudo chown $(whoami) /var/log/robot +``` + +**场景 B:多个节点以不同系统用户运行(生产环境常见)** +`chown` 只授权单一用户,其他用户仍无写权限(目录默认 `755`)。需要三步操作: + +**第一步:创建共享目录(一次性,需 root)** + +```bash +# setgid(chmod 2775)使目录内新建的文件和子目录自动继承 hivecore 组, +# 无论是哪个用户的进程创建的,Manager 都能统一管理。 +sudo groupadd -f hivecore +sudo mkdir -p /var/log/robot +sudo chown root:hivecore /var/log/robot +sudo chmod 2775 /var/log/robot +``` + +验证: +```bash +ls -ld /var/log/robot +# 期望输出:drwxrwsr-x 2 root hivecore ... /var/log/robot +# ^ s 位(setgid)表示设置成功 +``` + +**第二步:将每个节点运行用户加入 hivecore 组(每增加一个用户执行一次,需 root)** + +```bash +sudo usermod -aG hivecore <节点运行用户> +``` + +> `usermod` 修改的是 `/etc/group`,内核只在用户**新建 session** 时读取,已运行的进程不会自动获得新组。执行后必须以该用户重新登录,或在当前 shell 中执行 `newgrp hivecore` 临时生效。 + +**第三步:告知 SDK 写入共享目录** + +在节点代码或启动脚本中显式指定 `log_dir`,SDK 不会自动推断该路径: + +```python +# Python 节点 +hivecore_logger.init(node_name="vision_node", log_dir="/var/log/robot") +``` + +```cpp +// C++ 节点 +hivecore::log::LoggerOptions opts; +opts.log_dir = "/var/log/robot"; +hivecore::log::Logger::init("vision_node", opts); +``` + +```bash +# 启动脚本中通过命令行参数传入(Manager) +hivecore-log-manager --log-dir /var/log/robot ... ``` **Q: 动态调级不生效?** diff --git a/hivecore_logger/manager/hivecore_log_manager/manager.py b/hivecore_logger/manager/hivecore_log_manager/manager.py index 2437354..534c147 100644 --- a/hivecore_logger/manager/hivecore_log_manager/manager.py +++ b/hivecore_logger/manager/hivecore_log_manager/manager.py @@ -21,11 +21,11 @@ from .ros2_adapter import Ros2LevelService @dataclass class ManagerConfig: - """日志管理器配置。 + """日志管理器配置。 - 该配置控制磁盘配额、水位线阈值、清理与压缩周期、HTTP 服务以及 ROS 2 - 适配器启停等运行参数,适合在守护进程启动时统一注入。 - """ + 该配置控制磁盘配额、水位线阈值、清理与压缩周期、HTTP 服务以及 ROS 2 + 适配器启停等运行参数,适合在守护进程启动时统一注入。 + """ log_dir: str = "/var/log/robot" quota_mb: int = 2048 diff --git a/hivecore_logger/scripts/build_install_sdk.sh b/hivecore_logger/scripts/build_install_sdk.sh index 6afed80..0baa869 100755 --- a/hivecore_logger/scripts/build_install_sdk.sh +++ b/hivecore_logger/scripts/build_install_sdk.sh @@ -193,7 +193,14 @@ build_ros2_interfaces() { fi echo "[5/5] Build ROS2 interfaces" - ( + + # Remove stale build/install artifacts to prevent cached syntax errors + # being reported during the colcon build even when the source is clean. + rm -rf "${WORKSPACE_DIR}/build/hivecore_log_manager" \ + "${WORKSPACE_DIR}/install/hivecore_log_manager" 2>/dev/null || true + + colcon_output=$( + set -e set +u if [[ -f "$ROS2_SETUP_FILE" ]]; then # shellcheck disable=SC1090 @@ -206,8 +213,14 @@ build_ros2_interfaces() { set -u cd "$WORKSPACE_DIR" - colcon build --packages-up-to hivecore_log_manager - ) + colcon build --packages-up-to hivecore_log_manager 2>&1 + ) || { echo "[ERROR] ROS2 colcon build failed (exit $?)." >&2; exit 1; } + + echo "$colcon_output" + if echo "$colcon_output" | grep -qE "^(Summary:.*[1-9][0-9]* package(s)? failed|.*FAILED)"; then + echo "[ERROR] ROS2 build reported package failures. Aborting installation." >&2 + exit 1 + fi } echo "Deploy mode: $DEPLOY_MODE" @@ -236,6 +249,24 @@ install_python_package "$ROOT_DIR/manager" build_ros2_interfaces +# 生产模式下创建共享日志目录并配置 setgid 组权限,允许多用户节点同时写入。 +# 使用 hivecore 组 + chmod 2775(setgid)确保目录内新建的文件和子目录 +# 自动继承 hivecore 组,Manager 可统一管理所有节点产生的日志。 +# 若只需单用户写入,删除 usermod 行即可,其余步骤不变。 +if [[ "$DEPLOY_MODE" == "prod" ]]; then + LOG_DIR="/var/log/robot" + LOG_DIR_OWNER="${SUDO_USER:-$(whoami)}" + echo "[Post-install] Create shared log directory: $LOG_DIR" + groupadd -f hivecore + mkdir -p "$LOG_DIR" + chown root:hivecore "$LOG_DIR" + chmod 2775 "$LOG_DIR" + # 将安装操作的实际执行用户加入 hivecore 组(其他节点用户需手动执行 usermod) + usermod -aG hivecore "$LOG_DIR_OWNER" 2>/dev/null && \ + echo "[Post-install] Added '$LOG_DIR_OWNER' to group 'hivecore'. Re-login required." || \ + echo "[Post-install] Note: could not add '$LOG_DIR_OWNER' to group 'hivecore'. Run: sudo usermod -aG hivecore " +fi + echo "Build and install completed." echo " C++ SDK install prefix: $INSTALL_PREFIX" echo " Python executable: $PYTHON_BIN"