add 预处理模块测试节点、图像质量判断功能

增加相机控制接口，图像预处理部分功能
框架调整(部分)，暂无crossboard-color-icp-e2e，添加了动作服务节点及动作客户端节点
2026-03-06 15:07:34 +08:00 · 2026-03-06 10:07:31 +08:00 · 2026-02-06 18:09:37 +08:00 · 2026-01-23 11:47:45 +08:00 · 2026-01-23 11:39:30 +08:00 · 2026-01-23 11:36:09 +08:00
206 changed files with 10456 additions and 5100 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,5 +1,5 @@
-/vision_test/ merge=ours
-/test/ merge=ours
+vision_test/ merge=ours
+test/ merge=ours

-/.gitattributes merge=ours
-/.gitignore merge=ours
+.gitattributes merge=ours
+.gitignore merge=ours
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 **/build/
 **/install/
 **/log/
+**/vision_core/**/**.so
+**/__pycache__
--- a/calibration_mat/eye_in_left_hand.json
+++ b/calibration_mat/eye_in_left_hand.json
@@ -0,0 +1,28 @@
+{
+    "T": [
+        [
+            -0.029312906184646676,
+            0.9941492398859674,
+            0.10396173510077301,
+            -0.0809046635564155
+        ],
+        [
+            -0.9993360095995784,
+            -0.02689542239474161,
+            -0.024579995358005674,
+            0.036195409684615196
+        ],
+        [
+            -0.021640088923136416,
+            -0.10461321660460321,
+            0.9942775173275502,
+            0.04913468358915477
+        ],
+        [
+            0.0,
+            0.0,
+            0.0,
+            1.0
+        ]
+    ]
+}
--- a/calibration_mat/eye_in_right_hand.json
+++ b/calibration_mat/eye_in_right_hand.json
@@ -0,0 +1,28 @@
+{
+    "T": [
+        [
+            0.02515190926712163,
+            0.9984434279780899,
+            -0.049780544267610596,
+            -0.08312977955463981
+        ],
+        [
+            -0.9996337211449028,
+            0.025617085950903107,
+            0.008728599966714646,
+            0.059044674332170574
+        ],
+        [
+            0.00999024575340213,
+            0.04954276975245833,
+            0.9987220378839358,
+            0.017378234075134728
+        ],
+        [
+            0.0,
+            0.0,
+            0.0,
+            1.0
+        ]
+    ]
+}
--- a/calibration_mat/eye_to_hand.json
+++ b/calibration_mat/eye_to_hand.json
@@ -0,0 +1,28 @@
+{
+    "T": [
+        [
+            0.01868075138315295,
+            0.8881359544145228,
+            0.45920099738999437,
+            -0.19284748723617093
+        ],
+        [
+            0.002854261818715398,
+            -0.4593266423892484,
+            0.8882628489252997,
+            0.057154511710361816
+        ],
+        [
+            0.9998214254141742,
+            -0.01528273756969839,
+            -0.011115539354645598,
+            -0.04187423675125192
+        ],
+        [
+            0.0,
+            0.0,
+            0.0,
+            1.0
+        ]
+    ]
+}
--- a/tools/add_n_txt.py
+++ b/tools/add_n_txt.py
@@ -0,0 +1,21 @@
+import os
+
+# ======== 你可以修改的参数 ========
+start_id = 1           # 起始编号
+end_id = 200           # 结束编号
+save_dir = "labels"    # 输出目录
+prefix = "neg_"        # 文件名前缀
+zero_padding = 5       # 位数（例如 00001）
+# ==================================
+
+os.makedirs(save_dir, exist_ok=True)
+
+for i in range(start_id, end_id + 1):
+    name = f"{prefix}{str(i).zfill(zero_padding)}.txt"
+    path = os.path.join(save_dir, name)
+
+    # 创建空文件
+    with open(path, "w") as f:
+        pass
+
+    print("created:", path)
--- a/tools/calibration_tools.py
+++ b/tools/calibration_tools.py
@@ -192,13 +192,13 @@ class _Calibration:
                    np.linalg.inv(
                        self._function(
                            self._hand[i], self._hand[i + 1], self._hand[i + 2],
-                            self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
+                            self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
                        )
                    )
                    if self._mode == 'eye_to_hand' else
                    self._function(
                        self._hand[i], self._hand[i + 1], self._hand[i + 2],
-                        self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
+                        self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
                    )
                )
                self._Hcs.append(
--- a/tools/cap_video.py
+++ b/tools/cap_video.py
@@ -0,0 +1,39 @@
+import cv2
+# import numpy as np
+import time
+
+cap = cv2.VideoCapture(6)
+
+cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)   # 宽
+cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)   # 高
+
+frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+fourcc = cv2.VideoWriter_fourcc(*'MP4V')  # 'XVID', 'MJPG', 'MP4V' 等
+out = cv2.VideoWriter('video/output_720p.mp4', fourcc, 20.0, (frame_width, frame_height))
+
+i = 0
+
+time.sleep(1)
+
+while True:
+    # 逐帧捕获
+    ret, frame = cap.read()
+
+    if not ret:
+        print("读取摄像头画面失败")
+        break
+
+    if i <= 5:
+        i = i+1
+        continue
+
+    cv2.imshow('camera', frame)
+    out.write(frame)
+    if cv2.waitKey(5) & 0xFF == ord('q'):
+        break
+
+cap.release()
+out.release()
+cv2.destroyAllWindows()
--- a/tools/crop_from_video.py
+++ b/tools/crop_from_video.py
@@ -0,0 +1,31 @@
+import cv2
+import os
+
+video_path = "video/video_5.mp4"
+
+save_dir = "/home/lyx/Images/hivecore_box_datasets"
+os.makedirs(save_dir, exist_ok=True)
+
+cap = cv2.VideoCapture(video_path)
+if not cap.isOpened():
+    print("无法打开视频")
+    exit()
+
+frame_interval = 15
+frame_count = 0
+saved_count = 448
+
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+
+    if frame_count % frame_interval == 0:
+        save_path = os.path.join(save_dir, f"frame_{saved_count:06d}.jpg")
+        cv2.imwrite(save_path, frame)
+        saved_count += 1
+
+    frame_count += 1
+
+cap.release()
+print(f"共保存 {saved_count} 张图像到 {save_dir}")
--- a/tools/error_calculation_tool.py
+++ b/tools/error_calculation_tool.py
@@ -52,7 +52,7 @@ def calculate(mode, hand_path, camera_path, hand_eye_path):
    for i in range(sum_):
        hand = [hand_all[i*7], hand_all[i*7+1], hand_all[i*7+2], hand_all[i*7+3], hand_all[i*7+4], hand_all[i*7+5], hand_all[i*7+6]]
        camera = [camera_all[i*7], camera_all[i*7+1], camera_all[i*7+2], camera_all[i*7+3], camera_all[i*7+4], camera_all[i*7+5], camera_all[i*7+6]]
-        T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[3], hand[4], hand[5], hand[6])
+        T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[6], hand[3], hand[4], hand[5])
        T_cal_in_camera = get_matrix_quat(camera[0], camera[1], camera[2], camera[3], camera[4], camera[5], camera[6])

        if mode == "eye_to_hand":
--- a/tools/get_checkpoint_name_map.py
+++ b/tools/get_checkpoint_name_map.py
@@ -0,0 +1,17 @@
+import os
+import json
+from ultralytics import YOLO
+
+
+checkpoint_path = "vision_detect/checkpoints/medical_sense-seg.pt"
+save_path = "vision_detect/manager_map/label/medical_sense.json"
+
+model = YOLO(os.path.expanduser(checkpoint_path))
+
+# 反转：name -> id
+name_to_id = {v: k for k, v in model.names.items()}
+
+print(name_to_id)
+
+with open(os.path.expanduser(save_path), "w", encoding="utf-8") as f:
+    json.dump(name_to_id, f, ensure_ascii=False, indent=2)
--- a/tools/get_file_name_list.py
+++ b/tools/get_file_name_list.py
@@ -0,0 +1,16 @@
+import os
+
+
+file_dir = "/home/lyx/Datasets/hivecore_box_datasets/"
+
+with open(os.path.join(file_dir, "train.txt"), "w") as f:
+    for file in os.listdir(os.path.join(file_dir, 'images/train/')):
+        if file.endswith(".jpg"):
+            labels_dir = os.path.join(os.path.join('./images/train/', file))
+            f.write(labels_dir + "\n")
+
+with open(os.path.join(file_dir, "val.txt"), "w") as f:
+    for file in os.listdir(os.path.join(file_dir, 'images/val/')):
+        if file.endswith(".jpg"):
+            labels_dir = os.path.join(os.path.join('./images/val/', file))
+            f.write(labels_dir + "\n")
--- a/tools/get_train_and_val.py
+++ b/tools/get_train_and_val.py
@@ -0,0 +1,37 @@
+import os
+import shutil
+
+
+def check_dir(dirname):
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+
+file_dir = "/home/lyx/Datasets/hivecore_box_datasets/data"
+imgs_dir = "/home/lyx/Datasets/hivecore_box_datasets/images"
+labels_dir = "/home/lyx/Datasets/hivecore_box_datasets/labels"
+
+check_dir(imgs_dir)
+check_dir(os.path.join(imgs_dir, "train"))
+check_dir(os.path.join(imgs_dir, "val"))
+check_dir(labels_dir)
+check_dir(os.path.join(labels_dir, "train"))
+check_dir(os.path.join(labels_dir, "val"))
+
+i = 0
+for file in os.listdir(file_dir):
+    if file.endswith(".jpg"):
+        dot_index = file.rfind(".")
+        file_name = file[:dot_index]
+        label = file_name + '.txt'
+
+        if i % 10 == 9:
+            shutil.copy(os.path.join(file_dir, file), os.path.join(imgs_dir, "val",file))
+            shutil.copy(os.path.join(file_dir, label), os.path.join(labels_dir, "val", label))
+        else:
+            shutil.copy(os.path.join(file_dir, file), os.path.join(imgs_dir, "train", file))
+            shutil.copy(os.path.join(file_dir, label), os.path.join(labels_dir, "train", label))
+
+        i = i + 1
+
+
--- a/tools/json2yolo.py
+++ b/tools/json2yolo.py
@@ -0,0 +1,56 @@
+import json
+import os
+ 
+label_to_class_id = {
+    "box": 0,  # 从0开始
+    # 其他类别...
+}
+ 
+ 
+def convert_labelme_json_to_yolo(json_file, output_dir):
+    try:
+        with open(json_file, 'r') as f:
+            labelme_data = json.load(f)
+ 
+        img_width = labelme_data["imageWidth"]
+        img_height = labelme_data["imageHeight"]
+ 
+        file_name = os.path.splitext(os.path.basename(json_file))[0]
+        txt_path = os.path.join(output_dir, f"{file_name}.txt")
+ 
+        with open(txt_path, 'w') as txt_file:
+            for shape in labelme_data['shapes']:
+                label = shape['label']
+                points = shape['points']
+ 
+                if not points:
+                    continue
+ 
+                class_id = label_to_class_id.get(label)
+                if class_id is None:
+                    print(f"Warning: 跳过未定义标签 '{label}'")
+                    continue
+ 
+                # 检查多边形是否闭合
+                if points[0] != points[-1]:
+                    points.append(points[0])
+ 
+                normalized = [(x / img_width, y / img_height) for x, y in points]
+                line = f"{class_id} " + " ".join(f"{x:.6f} {y:.6f}" for x, y in normalized)
+                txt_file.write(line + "\n")
+ 
+    except Exception as e:
+        print(f"处理文件 {json_file} 时出错: {str(e)}")
+ 
+ 
+if __name__ == "__main__":
+    json_dir = "/home/lyx/Datasets/hivecore_box_datasets/data"  # labelme标注存放的目录
+    output_dir = "/home/lyx/Datasets/hivecore_box_datasets/data"  # 输出目录
+ 
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+ 
+    for json_file in os.listdir(json_dir):
+        if json_file.endswith(".json"):
+            json_path = os.path.join(json_dir, json_file)
+            convert_labelme_json_to_yolo(json_path, output_dir)
--- a/tools/pt2vino.py
+++ b/tools/pt2vino.py
@@ -0,0 +1,24 @@
+import argparse
+
+from ultralytics import YOLO
+
+
+def main(checkpoint_path):
+    model = YOLO(checkpoint_path)
+    model.export(
+        format="openvino",
+        imgsz=(1280, 720),
+        dynamic=True,
+        simplify=True,
+        half=True,
+        workspace=0.8,
+        batch=1,
+        device="cpu"
+    )
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_path", type=str)
+    args = parser.parse_args()
+    main(args.checkpoint_path)
+    # main(checkpoint_path="checkpoint/medical_sense-seg.pt")
--- a/vision_control/CMakeLists.txt
+++ b/vision_control/CMakeLists.txt
@@ -0,0 +1,56 @@
+cmake_minimum_required(VERSION 3.8)
+project(vision_control)
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+# find dependencies
+find_package(ament_cmake REQUIRED)
+
+find_package(rclcpp REQUIRED)
+find_package(std_srvs REQUIRED)
+
+find_package(interfaces REQUIRED)
+find_package(orbbec_camera_msgs REQUIRED)
+
+
+add_executable(camera_control_node src/CameraRawControlNode.cpp)
+
+target_include_directories(camera_control_node PRIVATE /usr/include/nlohmann) # json
+
+target_include_directories(camera_control_node
+  PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+ament_target_dependencies(
+  camera_control_node 
+  rclcpp std_srvs interfaces orbbec_camera_msgs
+)
+
+install(
+  DIRECTORY 
+  launch
+  configs
+  DESTINATION share/${PROJECT_NAME}
+)
+
+install(
+  TARGETS camera_control_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  # the following line skips the linter which checks for copyrights
+  # comment the line when a copyright and license is added to all source files
+  set(ament_cmake_copyright_FOUND TRUE)
+  # the following line skips cpplint (only works in a git repo)
+  # comment the line when this package is in a git repo and when
+  # a copyright and license is added to all source files
+  set(ament_cmake_cpplint_FOUND TRUE)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_package()
--- a/vision_control/configs/raw_control_configs.json
+++ b/vision_control/configs/raw_control_configs.json
@@ -0,0 +1,8 @@
+{
+    "left_name": "/camera1",
+    "left_namespace": "/camera1",
+    "right_name": "/camera2",
+    "right_namespace": "/camera2",
+    "head_name": "/camera",
+    "head_namespace": "/camera"
+}
--- a/vision_control/include/vision_control/CameraRawControlNode.hpp
+++ b/vision_control/include/vision_control/CameraRawControlNode.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <string>
+
+#include <rclcpp/rclcpp.hpp>
+#include <rclcpp/parameter_client.hpp>
+#include <std_msgs/msg/string.hpp>
+
+#include "interfaces/srv/set_camera_raw_params.hpp"
+#include "interfaces/srv/set_camera_raw_status.hpp"
+#include "orbbec_camera_msgs/srv/set_int32.hpp"
+#include "std_srvs/srv/set_bool.hpp"
+
+
+class CameraRawControlNode : public rclcpp::Node {
+    public:
+        CameraRawControlNode(std::string name);
+
+    private:
+        bool left_sign, right_sign, head_sign;
+
+        std::string left_name, right_name, head_name;
+        std::string left_namespace, right_namespace, head_namespace;
+
+        rclcpp::CallbackGroup::SharedPtr client_cb_group_;
+
+        rclcpp::Service<interfaces::srv::SetCameraRawParams>::SharedPtr raw_params_service;
+        rclcpp::Service<interfaces::srv::SetCameraRawStatus>::SharedPtr raw_status_service;
+
+        // rclcpp::Client<orbbec_camera_msgs::srv::SetInt32> ::SharedPtr 
+
+        std::shared_ptr<rclcpp::SyncParametersClient> camera_left;
+        std::shared_ptr<rclcpp::SyncParametersClient> camera_right;
+        std::shared_ptr<rclcpp::SyncParametersClient> camera_head;
+
+        rclcpp::Client<orbbec_camera_msgs::srv::SetInt32>::SharedPtr color_exposure, depth_exposure;
+        rclcpp::Client<orbbec_camera_msgs::srv::SetInt32>::SharedPtr color_gain, depth_gain;
+
+        rclcpp::Client<std_srvs::srv::SetBool>::SharedPtr color_raw_control, depth_raw_control;
+
+        void raw_params_callback(
+            const std::shared_ptr<interfaces::srv::SetCameraRawParams::Request> &request,
+            const std::shared_ptr<interfaces::srv::SetCameraRawParams::Response> &response
+        );
+        void raw_status_callback(
+            const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request,
+            const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> &response
+        );
+
+};
--- a/vision_pose_msgs/package.xml
+++ b/vision_pose_msgs/package.xml
@@ -1,21 +1,20 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
-  <name>vision_pose_msgs</name>
+  <name>vision_control</name>
  <version>0.0.0</version>
  <description>TODO: Package description</description>
-  <maintainer email="lyx@todo.todo">lyx</maintainer>
-  <license>Apache-2.0</license>
+  <maintainer email="16126883+liangyuxuan123@user.noreply.gitee.com">lyx</maintainer>
+  <license>TODO: License declaration</license>

  <buildtool_depend>ament_cmake</buildtool_depend>
-  <buildtool_depend>geometry_msgs</buildtool_depend>

-  <build_depend>rosidl_default_generators</build_depend>
-	<exec_depend>rosidl_default_runtime</exec_depend>
-	<member_of_group>rosidl_interface_packages</member_of_group>
-	
-	<depend>rclpy</depend>
+  <depend>rclcpp</depend>
+  <depend>std_srvs</depend>
  
+  <depend>orbbec_camera_msgs</depend>
+  <depend>interfaces</depend>
+
  <test_depend>ament_lint_auto</test_depend>
  <test_depend>ament_lint_common</test_depend>

--- a/vision_control/src/CameraRawControlNode.cpp
+++ b/vision_control/src/CameraRawControlNode.cpp
@@ -0,0 +1,379 @@
+#include <iostream>
+#include <fstream>
+
+#include <nlohmann/json.hpp>
+
+#include <rclcpp/parameter.hpp>
+#include <rclcpp/executors/multi_threaded_executor.hpp>
+#include "ament_index_cpp/get_package_share_directory.hpp"
+
+#include "vision_control/CameraRawControlNode.hpp"
+
+
+using json = nlohmann::json;
+using string = std::string;
+using raw_params_type = orbbec_camera_msgs::srv::SetInt32;
+
+
+const string SHARED_DIR = ament_index_cpp::get_package_share_directory("vision_control");
+const string CONFIGS_PATH = "configs/raw_control_configs.json";
+
+
+void create_params_client(
+    std::shared_ptr<rclcpp::SyncParametersClient> &camera_client,
+    rclcpp::Node *node,
+    const string &params_service_name,
+    const string position,
+    bool &sign
+) {
+    int i = 0;
+    camera_client = std::make_shared<rclcpp::SyncParametersClient>(node, params_service_name);
+    while (!camera_client->wait_for_service(std::chrono::seconds(1)) && i < 3) {
+        RCLCPP_INFO(
+            node->get_logger(), 
+            ("Waiting for " + position + " camera parameter service...").c_str()
+        );
+        i++;
+    }
+    if (i == 3) {
+        RCLCPP_WARN(
+            node->get_logger(), 
+            (position + " camera params service is unavailable").c_str()
+        );
+        sign = false;
+    } else {
+        RCLCPP_INFO(
+            node->get_logger(), 
+            (position + " camera params service is alreadly available").c_str()
+        );
+        sign = true;
+    }
+}
+
+
+CameraRawControlNode::CameraRawControlNode(std::string name) : Node(name) {
+
+    // create callback group
+    client_cb_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
+
+    // open configs file
+    try {
+        std::ifstream file(SHARED_DIR + "/" + CONFIGS_PATH);
+        RCLCPP_INFO(this->get_logger(), (SHARED_DIR + "/" + CONFIGS_PATH).c_str());
+        if (!file.is_open()) {
+            throw std::runtime_error("Can't open json file");
+        } else {
+            json configs;
+            file >> configs;
+            left_name = configs["left_name"];
+            right_name = configs["right_name"];
+            head_name = configs["head_name"];
+
+            left_namespace = configs["left_namespace"];
+            right_namespace = configs["right_namespace"];
+            head_namespace = configs["head_namespace"];
+        }
+    } catch (const std::runtime_error &e) {
+        RCLCPP_ERROR(this->get_logger(), e.what());
+        left_name = "/camera1";
+        right_name = "/camera2";
+        head_name = "/camera";
+
+        left_namespace = "/camera1";
+        right_namespace = "/camera2";
+        head_namespace = "/camera";
+
+    };
+
+    // create camera parameter client
+    create_params_client(camera_left, this, left_namespace + left_name, "left", left_sign);
+    create_params_client(camera_right, this, right_namespace + right_name, "right", right_sign);
+    create_params_client(camera_head, this, head_namespace + head_name, "head", head_sign);
+
+    if (head_sign) {
+        camera_head.reset();
+        color_exposure = this->create_client<raw_params_type>(
+            head_name + "/set_color_exposure", rmw_qos_profile_services_default, client_cb_group_);
+        depth_exposure = this->create_client<raw_params_type>(
+            head_name + "/set_depth_exposure", rmw_qos_profile_services_default, client_cb_group_);
+        color_gain = this->create_client<raw_params_type>(
+            head_name + "/set_color_gain", rmw_qos_profile_services_default, client_cb_group_);
+        depth_gain = this->create_client<raw_params_type>(
+            head_name + "/set_depth_gain", rmw_qos_profile_services_default, client_cb_group_);
+
+        color_raw_control = this->create_client<std_srvs::srv::SetBool>(
+            head_name + "/toggle_color", rmw_qos_profile_services_default, client_cb_group_);
+        depth_raw_control = this->create_client<std_srvs::srv::SetBool>(
+            head_name + "/toggle_depth", rmw_qos_profile_services_default, client_cb_group_);
+    }
+    
+    // create params control service
+    raw_params_service = this->create_service<interfaces::srv::SetCameraRawParams>(
+        "set_camera_raw_params",
+        std::bind(
+            &CameraRawControlNode::raw_params_callback,
+            this,
+            std::placeholders::_1,
+            std::placeholders::_2
+        )
+    );
+
+    // create status control service
+    raw_status_service = this->create_service<interfaces::srv::SetCameraRawStatus>(
+        "set_camera_raw_status",
+        [this](
+            const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request, 
+            const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> &response
+        ) {
+            raw_status_callback(request, response);
+        }
+    );
+}
+
+
+void CameraRawControlNode::raw_params_callback(
+    const std::shared_ptr<interfaces::srv::SetCameraRawParams::Request> &request,
+    const std::shared_ptr<interfaces::srv::SetCameraRawParams::Response> &response
+) {
+    if (request->camera_position == "left") {
+        string prefix;
+        if (!left_sign) {
+            response->success = false;
+            response->info = "Left camera params service is unavailable";
+            return;
+            
+        } else if (request->raw == "color") {
+            prefix = "rgb_camera";
+
+        } else if (request->raw == "depth") {
+            prefix = "depth_module";
+
+        } else {
+            response->success = false;
+            response->info = "raw is wrong: " + request->raw;
+            return;
+        }
+
+        rclcpp::Parameter exposure(prefix + ".exposure", request->exposure);
+        rclcpp::Parameter gain(prefix + ".gain", request->gain);
+        auto results = camera_left->set_parameters({exposure, gain});
+        response->success = true;
+        for (auto &result : results) {
+            if (!result.successful) {
+                response->success = false;
+                response->info += result.reason + " | ";
+            }
+        }
+
+    } else if (request->camera_position == "right") {
+        string prefix;
+        if (!right_sign) {
+            response->success = false;
+            response->info = "Right camera params service is unavailable";
+            return;
+        }
+        if (request->raw == "color") {
+            prefix = "rgb_camera";
+        } else if (request->raw == "depth") {
+            prefix = "depth_module";
+        } else {
+            response->success = false;
+            response->info = "raw is wrong: " + request->raw;
+            return;
+        }
+
+        rclcpp::Parameter exposure(prefix + ".exposure", request->exposure);
+        rclcpp::Parameter gain(prefix + ".gain", request->gain);
+        auto results = camera_right->set_parameters({exposure, gain});
+        response->success = true;
+        for (auto &result : results) {
+            if (!result.successful) {
+                response->success = false;
+                response->info += result.reason + " | ";
+            }
+        }
+
+    } else if (request->camera_position == "head") {
+        string prefix;
+        if (!head_sign) {
+            response->success = false;
+            response->info = "Head camera params service is unavailable";
+            return;
+        }
+        if (request->raw == "color") {
+            // /camera/set_color_exposure
+            // /camera/set_color_gain
+
+            auto exposure_request = std::make_shared<raw_params_type::Request>();
+            auto gain_request = std::make_shared<raw_params_type::Request>();
+
+            exposure_request->data = request->exposure;
+            gain_request->data = request->gain;
+
+            auto future_exposure = color_exposure->async_send_request(exposure_request);
+            auto future_gain = color_gain->async_send_request(gain_request);
+
+            response->success = true;
+
+            auto result_exposure = future_exposure.get();
+            auto result_gain = future_gain.get();
+
+            response->success = response->success && result_exposure->success;
+            if (!response->success) {
+                response->info += result_exposure->message;
+            }
+
+            response->success = response->success && result_gain->success;
+            if (!response->success) {
+                response->info += result_gain->message;
+            }
+
+            return;
+
+        } else if (request->raw == "depth") {
+            // /camera/set_depth_exposure
+            // /camera/set_depth_gain
+
+            auto exposure_request = std::make_shared<raw_params_type::Request>();
+            auto gain_request = std::make_shared<raw_params_type::Request>();
+
+            exposure_request->data = request->exposure;
+            gain_request->data = request->gain;
+
+            auto future_exposure = depth_exposure->async_send_request(exposure_request);
+            auto future_gain = depth_gain->async_send_request(gain_request);
+
+            response->success = true;
+
+            auto result_exposure = future_exposure.get();
+            auto result_gain = future_gain.get();
+
+            response->success = response->success && result_exposure->success;
+            if (!response->success) {
+                response->info += result_exposure->message;
+            }
+
+            response->success = response->success && result_gain->success;
+            if (!response->success) {
+                response->info += result_gain->message;
+            }
+
+            return;
+
+        } else {
+            response->success = false;
+            response->info = "raw is wrong: " + request->raw;
+            return;
+        }
+
+
+    } else {
+        response->success = false;
+        response->info = ("camera position is wrong: " + request->camera_position);
+    }   
+}
+
+
+void CameraRawControlNode::raw_status_callback(
+    const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request,
+    std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> const &response
+) {
+    std::shared_ptr<rclcpp::SyncParametersClient> camera = nullptr;
+    if (request->camera_position == "left") {
+        if (left_sign) {
+            camera = camera_left;
+        } else {
+            response->success = false;
+            response->info = "Left camera params service is unavailable.";
+            return;
+        }
+
+
+    } else if (request->camera_position == "right") {
+        if (right_sign) {
+            camera = camera_right;
+        } else {
+            response->success = false;
+            response->info = "Right camera params service is unavailable.";
+            return;
+        }
+
+    } else if (request->camera_position == "head") {
+        if (head_sign) {
+            auto request_c = std::make_shared<std_srvs::srv::SetBool::Request>();
+            auto request_d = std::make_shared<std_srvs::srv::SetBool::Request>();
+
+            request_c->data = request->color_raw;
+            request_d->data = request->depth_raw;
+
+            auto future_c = color_raw_control->async_send_request(request_c);
+            auto future_d = depth_raw_control->async_send_request(request_d);
+
+            auto result_c = future_c.get();
+            auto result_d = future_d.get();
+
+            response->success = true;
+
+            response->success = response->success && result_c->success;
+            if (!response->success) {
+                response->info += result_c->message;
+            }
+
+            response->success = response->success && result_d->success;
+            if (!response->success) {
+                response->info += " | ";
+                response->info += result_d->message;
+            }
+            
+            return;
+
+        } else {
+            response->success = false;
+            response->info = "Head camera params service is unavailable.";
+            return;
+        }
+
+    } else {
+        response->success = false;
+        response->info = ("camera position is wrong: " + request->camera_position);
+        return;
+    } 
+
+    rclcpp::Parameter color_raw("enable_color", request->color_raw);
+    rclcpp::Parameter depth_raw("enable_depth", request->depth_raw);
+    rclcpp::Parameter ir_raw("enable_ir", request->ir_raw);
+
+    auto results = camera->set_parameters({color_raw, depth_raw, ir_raw});
+    response->success = false;
+    for (auto &result : results) {
+        if (!result.successful) {
+            response->success = response->success || false;
+            response->info += result.reason + " | ";
+        } else {
+            response->success = response->success || true;
+        }
+    }
+    return; 
+}
+
+
+int main(int argc, char *argv[]) {
+    rclcpp::init(argc, argv);
+    auto node = std::make_shared<CameraRawControlNode>("camera_raw_control_node");
+    auto executor = rclcpp::executors::MultiThreadedExecutor();
+    try {
+        executor.add_node(node);
+        executor.spin();
+        executor.remove_node(node);
+
+    } catch (...) {
+
+    }
+
+    node.reset();
+    if (rclcpp::ok()) {
+        rclcpp::shutdown();
+    }
+
+    return 0;
+}
--- a/vision_detect/init.py
+++ b/vision_detect/init.py
--- a/vision_detect/configs/hand_eye_mat/eye_in_left_hand.json
+++ b/vision_detect/configs/hand_eye_mat/eye_in_left_hand.json
--- a/vision_detect/configs/hand_eye_mat/eye_in_right_hand.json
+++ b/vision_detect/configs/hand_eye_mat/eye_in_right_hand.json
--- a/vision_detect/configs/hand_eye_mat/eye_to_hand.json
+++ b/vision_detect/configs/hand_eye_mat/eye_to_hand.json
--- a/vision_detect/checkpoints/medical_sense-seg.pt
+++ b/vision_detect/checkpoints/medical_sense-seg.pt
--- a/vision_detect/checkpoints/posenet.pt
+++ b/vision_detect/checkpoints/posenet.pt
--- a/vision_detect/checkpoints/yolo11s-seg.onnx
+++ b/vision_detect/checkpoints/yolo11s-seg.onnx
--- a/vision_detect/configs/flexiv_configs/default_config.json
+++ b/vision_detect/configs/flexiv_configs/default_config.json
--- a/vision_detect/configs/launch_configs/bottle_detect_service_icp.json
+++ b/vision_detect/configs/launch_configs/bottle_detect_service_icp.json
@@ -2,28 +2,31 @@
    "node_name": "bottle_detect_service",
    "output_boxes": "True",
    "output_masks": "False",
-    "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
-    "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
-    "head": "configs/hand_eye_mat/eye_to_hand.json",
+
+    "calibration": {
+        "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
+        "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
+        "head": "configs/hand_eye_mat/eye_to_hand.json"
+    },
+
    "get_camera_mode": "Service",
    "Service_configs": {
        "service_name": "/vision_object_recognition"
    },
+
    "detect_mode": "Detect",
    "Detect_configs": {
        "checkpoint_path": "checkpoints/yolo11s-seg.pt",
        "confidence": 0.50,
        "classes": [39]
    },
+
    "calculate_mode": "PCA",
    "ICP_configs": {
        "complete_model_path": "pointclouds/bottle_model.pcd",
        "depth_scale": 1000.0,
        "depth_trunc": 2.0,
-        "nb_points": 10,
-        "radius": 0.1,
-        "nb_neighbors": 20,
-        "std_ratio": 3.0,
+        "voxel_size": 0.010,
        "ransac_voxel_size": 0.005,
        "icp_voxel_radius": [0.004, 0.002, 0.001],
        "icp_max_iter": [50, 30, 14]
--- a/vision_detect/configs/launch_configs/bottle_detect_service_pca.json
+++ b/vision_detect/configs/launch_configs/bottle_detect_service_pca.json
@@ -2,27 +2,29 @@
    "node_name": "bottle_detect_service",
    "output_boxes": "True",
    "output_masks": "False",
-    "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
-    "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
-    "head": "configs/hand_eye_mat/eye_to_hand.json",
+
+    "calibration": {
+        "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
+        "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
+        "head": "configs/hand_eye_mat/eye_to_hand.json"
+    },
+
    "get_camera_mode": "Service",
    "Service_configs": {
        "service_name": "/vision_object_recognition"
    },
+
    "detect_mode": "Detect",
    "Detect_configs": {
        "checkpoint_path": "checkpoints/yolo11s-seg.pt",
        "confidence": 0.50,
        "classes": [39]
    },
+
    "calculate_mode": "PCA",
    "PCA_configs": {
        "depth_scale": 1000.0,
        "depth_trunc": 3.0,
-        "voxel_size": 0.020,
-        "nb_points": 10,
-        "radius": 0.1,
-        "nb_neighbors": 20,
-        "std_ratio": 3.0
+        "voxel_size": 0.010
    }
 }
--- a/vision_detect/configs/launch_configs/crossboard_topic_pca.json
+++ b/vision_detect/configs/launch_configs/crossboard_topic_pca.json
@@ -2,9 +2,13 @@
    "node_name": "crossboard_topic",
    "output_boxes": "True",
    "output_masks": "False",
-    "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
-    "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
-    "head": "configs/hand_eye_mat/eye_to_hand.json",
+
+    "calibration": {
+        "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
+        "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
+        "head": "configs/hand_eye_mat/eye_to_hand.json"
+    },
+
    "get_camera_mode": "Topic",
    "Topic_configs": {
        "position": "right",
@@ -12,10 +16,12 @@
        "depth_image_topic_name": "/camera/depth/image_raw",
        "camera_info_topic_name": "/camera/color/camera_info"
    },
+
    "detect_mode": "Crossboard",
    "Crossboard_configs": {
        "pattern_size": [8, 5]
    },
+
    "calculate_mode": "PCA",
    "PCA_configs": {
        "depth_scale": 1000.0,
--- a/vision_detect/configs/launch/default_action_config.json
+++ b/vision_detect/configs/launch/default_action_config.json
@@ -0,0 +1,101 @@
+{
+    "node_name": "default_config_detect_service",
+    "output_boxes": false,
+    "output_masks": false,
+    "save_image": true,
+    "image_save_dir": "~/images",
+
+    "calibration": {
+        "left_hand": "calibration/eye_in_left_hand.json",
+        "right_hand": "calibration/eye_in_right_hand.json",
+        "head": "calibration/eye_to_hand.json"
+    },
+
+    "node_mode": "ACTION",
+    "service_node_configs": {
+        "service_name": "/vision_object_recognition"
+    },
+    "publisher_node_configs": {
+        "publish_time": 0.1,
+        "position": "right",
+        "publisher_name": "/detect/pose"
+    },
+    "action_node_configs": {
+        "action_name": "/vision_object_recognition"
+    },
+
+    "image_source": "DRIVER",
+    "preprocess_configs": {
+        "distortion": false,
+        "denoising": false,
+        "enhancement": false,
+        "quality": false,
+        "quality_threshold": 100.0
+    },
+    "driver_source_configs": {
+        "subscription_name": "/img_msg"
+    },
+    "direct_source_configs": {
+        "position": "right",
+        "color_image_topic_name": "/camera/color/image_raw",
+        "depth_image_topic_name": "/camera/depth/image_raw",
+        "camera_info_topic_name": "/camera/color/camera_info"
+    },
+    "topic_source_configs": {
+        "left": [
+            "/camera1/camera1/color/image_raw",
+            "/camera1/camera1/aligned_depth_to_color/image_raw",
+            "/camera1/camera1/color/camera_info"
+        ],
+        "right": [
+            "/camera2/camera2/color/image_raw",
+            "/camera2/camera2/aligned_depth_to_color/image_raw",
+            "/camera2/camera2/color/camera_info"
+        ],
+        "head": [
+            "/camera/color/image_raw",
+            "/camera/depth/image_raw",
+            "/camera/color/camera_info"
+        ]
+    },
+
+    "detect_mode": "OBJECT",
+    "object_detector_configs": {
+        "checkpoint_path": "checkpoints/yolo11s-seg.pt",
+        "confidence": 0.70,
+        "label_map_path": "map/label/medical_sense.json",
+        "classes": []
+    },
+    "color_detector_configs": {
+        "distance": 1500,
+        "color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
+    },
+    "crossboard_detector_configs": {
+        "pattern_size": [8, 5]
+    },
+
+    "estimate_mode": "PCA",
+    "pca_estimator_configs": {
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.004
+    },
+    "icp_estimator_configs": {
+        "complete_model_path": "pointclouds/bottle_model.pcd",
+        "depth_scale": 1000.0,
+        "depth_trunc": 2.0,
+        "voxel_size": 0.002,
+        "ransac_voxel_size": 0.005,
+        "icp_voxel_radius": [0.004, 0.002, 0.001],
+        "icp_max_iter": [50, 30, 14]
+    },
+    "gsnet_estimator_configs": {
+        "checkpoint_path": "checkpoints/posenet.pt",
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.010,
+        "collision_thresh": 0.00
+    },
+
+    "refine_mode": "NO"
+}
--- a/vision_detect/configs/launch/default_service_config.json
+++ b/vision_detect/configs/launch/default_service_config.json
@@ -0,0 +1,84 @@
+{
+    "node_name": "default_config_detect_service",
+    "output_boxes": false,
+    "output_masks": false,
+    "save_image": true,
+    "image_save_dir": "~/images",
+
+    "calibration": {
+        "left_hand": "calibration/eye_in_left_hand.json",
+        "right_hand": "calibration/eye_in_right_hand.json",
+        "head": "calibration/eye_to_hand.json"
+    },
+
+    "node_mode": "SERVICE",
+    "service_node_configs": {
+        "service_name": "/vision_object_recognition"
+    },
+    "publisher_node_configs": {
+        "publish_time": 0.1,
+        "position": "right",
+        "publisher_name": "/detect/pose"
+    },
+    "action_node_configs": {
+        "action_name": "/vision_object_recognition"
+    },
+
+    "image_source": "DRIVER",
+    "preprocess_configs": {
+        "distortion": false,
+        "denoising": false,
+        "enhancement": false,
+        "quality": false,
+        "quality_threshold": 100.0
+    },
+    "driver_source_configs": {
+        "subscription_name": "/img_msg"
+    },
+    "direct_source_configs": {
+        "position": "right",
+        "color_image_topic_name": "/camera/color/image_raw",
+        "depth_image_topic_name": "/camera/depth/image_raw",
+        "camera_info_topic_name": "/camera/color/camera_info"
+    },
+
+    "detect_mode": "OBJECT",
+    "object_detector_configs": {
+        "checkpoint_path": "checkpoints/yolo11s-seg.onnx",
+        "confidence": 0.70,
+        "label_map_path": "map/label/medical_sense.json",
+        "classes": []
+    },
+    "color_detector_configs": {
+        "distance": 1500,
+        "color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
+    },
+    "crossboard_detector_configs": {
+        "pattern_size": [8, 5]
+    },
+
+    "estimate_mode": "PCA",
+    "pca_estimator_configs": {
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.004
+    },
+    "icp_estimator_configs": {
+        "complete_model_path": "pointclouds/bottle_model.pcd",
+        "depth_scale": 1000.0,
+        "depth_trunc": 2.0,
+        "voxel_size": 0.002,
+        "ransac_voxel_size": 0.005,
+        "icp_voxel_radius": [0.004, 0.002, 0.001],
+        "icp_max_iter": [50, 30, 14]
+    },
+    "gsnet_estimator_configs": {
+        "checkpoint_path": "checkpoints/posenet.pt",
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.010,
+        "collision_thresh": 0.00
+    },
+
+    "refine_mode": "FIXED"
+}
--- a/vision_detect/configs/launch_configs/detect_service_pca.json
+++ b/vision_detect/configs/launch_configs/detect_service_pca.json
@@ -2,27 +2,29 @@
    "node_name": "detect_service",
    "output_boxes": "True",
    "output_masks": "False",
-    "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
-    "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
-    "head": "configs/hand_eye_mat/eye_to_hand.json",
+
+    "calibration": {
+        "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
+        "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
+        "head": "configs/hand_eye_mat/eye_to_hand.json"
+    },
+
    "get_camera_mode": "Service",
    "Service_configs": {
        "service_name": "/vision_object_recognition"
    },
+
    "detect_mode": "Detect",
    "Detect_configs": {
        "checkpoint_path": "checkpoints/yolo11s-seg.pt",
        "confidence": 0.50,
        "classes": []
    },
+
    "calculate_mode": "PCA",
    "PCA_configs": {
        "depth_scale": 1000.0,
        "depth_trunc": 3.0,
-        "voxel_size": 0.020,
-        "nb_points": 10,
-        "radius": 0.1,
-        "nb_neighbors": 20,
-        "std_ratio": 3.0
+        "voxel_size": 0.010
    }
 }
--- a/vision_detect/configs/launch/medical_sense.json
+++ b/vision_detect/configs/launch/medical_sense.json
@@ -0,0 +1,37 @@
+{
+    "node_name": "medical_sense_service",
+    "output_boxes": "True",
+    "output_masks": "False",
+
+    "calibration": {
+        "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
+        "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
+        "head": "configs/hand_eye_mat/eye_to_hand.json"
+    },
+
+    "get_camera_mode": "Service",
+    "Service_configs": {
+        "service_name": "/vision_object_recognition"
+    },
+
+    "detect_mode": "Detect",
+    "Detect_configs": {
+        "checkpoint_path": "checkpoints/medical_sense-seg.pt",
+        "confidence": 0.75,
+        "classes": []
+    },
+
+    "calculate_mode": "PCA",
+    "PCA_configs": {
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.004
+    },
+    "E2E_configs": {
+        "checkpoint_path": "checkpoints/posenet.pt",
+        "depth_scale": 1000.0,
+        "depth_trunc": 3.0,
+        "voxel_size": 0.0005,
+        "collision_thresh": 0.01
+    }
+}
--- a/vision_detect/configs/launch/source_test_config.json
+++ b/vision_detect/configs/launch/source_test_config.json
@@ -0,0 +1,47 @@
+{
+    "node_name": "source_test_node",
+    "output_boxes": false,
+    "output_masks": false,
+    "save_image": false,
+    "image_save_dir": "~/images",
+
+    "calibration": {
+        "left_hand": "calibration/eye_in_left_hand.json",
+        "right_hand": "calibration/eye_in_right_hand.json",
+        "head": "calibration/eye_to_hand.json"
+    },
+
+    "node_mode": "SERVICE",
+    "service_node_configs": {
+        "service_name": "/vision_object_recognition"
+    },
+    "publisher_node_configs": {
+        "publish_time": 0.1,
+        "position": "right",
+        "publisher_name": "/source_test/processed_image"
+    },
+    "action_node_configs": {
+        "action_name": "/vision_object_recognition"
+    },
+
+    "image_source": "DIRECT",
+    "direct_source_configs": {
+        "position": "right",
+        "color_image_topic_name": "/camera/color/image_raw",
+        "depth_image_topic_name": "/camera/depth/image_raw",
+        "camera_info_topic_name": "/camera/color/camera_info"
+    },
+    "preprocess_configs": {},
+
+    "detect_mode": "OBJECT",
+    "object_detector_configs": {},
+    "color_detector_configs": {},
+    "crossboard_detector_configs": {},
+
+    "estimate_mode": "PCA",
+    "pca_estimator_configs": {},
+    "icp_estimator_configs": {},
+    "gsnet_estimator_configs": {},
+
+    "refine_mode": "FIXED"
+}
--- a/vision_detect/configs/launch_configs/default_config.json
+++ b/vision_detect/configs/launch_configs/default_config.json
@@ -1,53 +0,0 @@
-{
-    "node_name": "default_config_detect_service",
-    "output_boxes": "True",
-    "output_masks": "False",
-    "left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
-    "right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
-    "head": "configs/hand_eye_mat/eye_to_hand.json",
-    "get_camera_mode": "Service",
-    "Service_configs": {
-        "service_name": "/vision_object_recognition"
-    },
-    "Topic_configs": {
-        "position": "right",
-        "color_image_topic_name": "/camera/color/image_raw",
-        "depth_image_topic_name": "/camera/depth/image_raw",
-        "camera_info_topic_name": "/camera/color/camera_info"
-    },
-    "detect_mode": "Detect",
-    "Detect_configs": {
-        "checkpoint_path": "checkpoints/yolo11s-seg.pt",
-        "confidence": 0.50,
-        "classes": []
-    },
-    "Color_configs": {
-        "distance": 1500,
-        "color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
-    },
-    "Crossboard_configs": {
-        "pattern_size": [8, 5]
-    },
-    "calculate_mode": "PCA",
-    "PCA_configs": {
-        "depth_scale": 1000.0,
-        "depth_trunc": 3.0,
-        "voxel_size": 0.020,
-        "nb_points": 10,
-        "radius": 0.1,
-        "nb_neighbors": 20,
-        "std_ratio": 3.0
-    },
-    "ICP_configs": {
-        "complete_model_path": "pointclouds/bottle_model.pcd",
-        "depth_scale": 1000.0,
-        "depth_trunc": 2.0,
-        "nb_points": 10,
-        "radius": 0.1,
-        "nb_neighbors": 20,
-        "std_ratio": 3.0,
-        "ransac_voxel_size": 0.005,
-        "icp_voxel_radius": [0.004, 0.002, 0.001],
-        "icp_max_iter": [50, 30, 14]
-    }
-}
--- a/vision_detect/launch/bottle_detect_service_icp.launch.py
+++ b/vision_detect/launch/bottle_detect_service_icp.launch.py
@@ -9,41 +9,17 @@ from ament_index_python.packages import get_package_share_directory


 share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/bottle_detect_service_icp.json')
+config_dir = os.path.join(share_dir, 'configs/launch/bottle_detect_service_icp.json')
 with open(config_dir, "r") as f:
    configs = json.load(f)

 def generate_launch_description():
    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('left_hand', default_value=configs['left_hand']),
-        DeclareLaunchArgument('right_hand', default_value=configs['right_hand']),
-        DeclareLaunchArgument('head', default_value=configs['eye_to_handhead_mat_path']),
-
-        DeclareLaunchArgument('get_camera_mode', default_value=configs['get_camera_mode']),
-        DeclareLaunchArgument('detect_mode', default_value=configs['detect_mode']),
-        DeclareLaunchArgument('calculate_mode', default_value=configs['calculate_mode']),
-
-        DeclareLaunchArgument("Service_configs", default_value=json.dumps(configs['Service_configs'])),
-        DeclareLaunchArgument("Detect_configs", default_value=json.dumps(configs['Detect_configs'])),
-        DeclareLaunchArgument("ICP_configs", default_value=json.dumps(configs['ICP_configs'])),
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
    ]

    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        left_hand = LaunchConfiguration('left_hand').perform(context)
-        right_hand = LaunchConfiguration('right_hand').perform(context)
-        head = LaunchConfiguration('head').perform(context)
-
-        get_camera_mode = LaunchConfiguration('get_camera_mode').perform(context)
-        detect_mode = LaunchConfiguration('detect_mode').perform(context)
-        calculate_mode = LaunchConfiguration('calculate_mode').perform(context)
-
-        Service_configs = LaunchConfiguration('Service_configs').perform(context)
-        Detect_configs = LaunchConfiguration('Detect_configs').perform(context)
-        ICP_configs = LaunchConfiguration('ICP_configs').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)

        return [
            Node(
@@ -51,19 +27,7 @@ def generate_launch_description():
                executable='detect_node',
                name=configs['node_name'],
                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'left_hand': left_hand,
-                    'right_hand': right_hand,
-                    'head': head,
-
-                    'get_camera_mode': get_camera_mode,
-                    'detect_mode': detect_mode,
-                    'calculate_mode': calculate_mode,
-
-                    'Service_configs': Service_configs,
-                    'Detect_configs': Detect_configs,
-                    'ICP_configs': ICP_configs,
+                    'configs_path': configs_path,
                }]
            )
        ]
--- a/vision_detect/launch/bottle_detect_service_pca.launch.py
+++ b/vision_detect/launch/bottle_detect_service_pca.launch.py
@@ -9,41 +9,17 @@ from ament_index_python.packages import get_package_share_directory


 share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/bottle_detect_service_pca.json')
+config_dir = os.path.join(share_dir, 'configs/launch/bottle_detect_service_pca.json')
 with open(config_dir, "r") as f:
    configs = json.load(f)

 def generate_launch_description():
    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('left_hand', default_value=configs['left_hand']),
-        DeclareLaunchArgument('right_hand', default_value=configs['right_hand']),
-        DeclareLaunchArgument('head', default_value=configs['head']),
-
-        DeclareLaunchArgument('get_camera_mode', default_value=configs['get_camera_mode']),
-        DeclareLaunchArgument('detect_mode', default_value=configs['detect_mode']),
-        DeclareLaunchArgument('calculate_mode', default_value=configs['calculate_mode']),
-
-        DeclareLaunchArgument("Service_configs", default_value=json.dumps(configs['Service_configs'])),
-        DeclareLaunchArgument("Detect_configs", default_value=json.dumps(configs['Detect_configs'])),
-        DeclareLaunchArgument("PCA_configs", default_value=json.dumps(configs['PCA_configs'])),
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
    ]

    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        left_hand = LaunchConfiguration('left_hand').perform(context)
-        right_hand = LaunchConfiguration('right_hand').perform(context)
-        head = LaunchConfiguration('head').perform(context)
-
-        get_camera_mode = LaunchConfiguration('get_camera_mode').perform(context)
-        detect_mode = LaunchConfiguration('detect_mode').perform(context)
-        calculate_mode = LaunchConfiguration('calculate_mode').perform(context)
-
-        Service_configs = LaunchConfiguration('Service_configs').perform(context)
-        Detect_configs = LaunchConfiguration('Detect_configs').perform(context)
-        PCA_configs = LaunchConfiguration('PCA_configs').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)

        return [
            Node(
@@ -51,19 +27,7 @@ def generate_launch_description():
                executable='detect_node',
                name=configs['node_name'],
                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'left_hand': left_hand,
-                    'right_hand': right_hand,
-                    'head': head,
-
-                    'get_camera_mode': get_camera_mode,
-                    'detect_mode': detect_mode,
-                    'calculate_mode': calculate_mode,
-
-                    'Service_configs': Service_configs,
-                    'Detect_configs': Detect_configs,
-                    'PCA_configs': PCA_configs,
+                    'configs_path': configs_path,
                }]
            )
        ]
--- a/vision_detect/launch/crossboard.launch.py
+++ b/vision_detect/launch/crossboard.launch.py
@@ -1,47 +0,0 @@
-from launch import LaunchDescription
-from launch_ros.actions import Node
-from launch.actions import DeclareLaunchArgument, OpaqueFunction
-from launch.substitutions import LaunchConfiguration
-
-import os
-import ast
-import json
-from ament_index_python.packages import get_package_share_directory
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def generate_launch_description():
-    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value='True'),
-        DeclareLaunchArgument('output_masks', default_value='False'),
-        DeclareLaunchArgument('color_image_topic', default_value='/camera/color/image_raw'),
-        DeclareLaunchArgument('depth_image_topic', default_value='/camera/depth/image_raw'),
-        DeclareLaunchArgument('camera_info_topic', default_value='/camera/color/camera_info'),
-    ]
-
-    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
-        depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
-        camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
-
-        return [
-            Node(
-                package='vision_detect',
-                executable='crossboard_detect_node',
-                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'color_image_topic': color_image_topic,
-                    'depth_image_topic': depth_image_topic,
-                    'camera_info_topic': camera_info_topic,
-                }]
-            )
-        ]
-
-    return LaunchDescription(args_detect + [
-        OpaqueFunction(function=create_detect_node),
-    ])
--- a/vision_detect/launch/crossboard_topic_pca.launch.py
+++ b/vision_detect/launch/crossboard_topic_pca.launch.py
@@ -9,42 +9,17 @@ from ament_index_python.packages import get_package_share_directory


 share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/crossboard_topic_pca.json')
+config_dir = os.path.join(share_dir, 'configs/launch/crossboard_topic_pca.json')
 with open(config_dir, "r") as f:
    configs = json.load(f)

 def generate_launch_description():
    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('left_hand', default_value=configs['left_hand']),
-        DeclareLaunchArgument('right_hand', default_value=configs['right_hand']),
-        DeclareLaunchArgument('head', default_value=configs['head']),
-
-
-        DeclareLaunchArgument('get_camera_mode', default_value=configs['get_camera_mode']),
-        DeclareLaunchArgument('detect_mode', default_value=configs['detect_mode']),
-        DeclareLaunchArgument('calculate_mode', default_value=configs['calculate_mode']),
-
-        DeclareLaunchArgument("Topic_configs", default_value=json.dumps(configs['Topic_configs'])),
-        DeclareLaunchArgument("Crossboard_configs", default_value=json.dumps(configs['Crossboard_configs'])),
-        DeclareLaunchArgument("PCA_configs", default_value=json.dumps(configs['PCA_configs'])),
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
    ]

    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        left_hand = LaunchConfiguration('left_hand').perform(context)
-        right_hand = LaunchConfiguration('right_hand').perform(context)
-        head = LaunchConfiguration('head').perform(context)
-
-        get_camera_mode = LaunchConfiguration('get_camera_mode').perform(context)
-        detect_mode = LaunchConfiguration('detect_mode').perform(context)
-        calculate_mode = LaunchConfiguration('calculate_mode').perform(context)
-
-        Topic_configs = LaunchConfiguration('Topic_configs').perform(context)
-        Crossboard_configs = LaunchConfiguration('Crossboard_configs').perform(context)
-        PCA_configs = LaunchConfiguration('PCA_configs').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)

        return [
            Node(
@@ -52,19 +27,7 @@ def generate_launch_description():
                executable='detect_node',
                name=configs['node_name'],
                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'left_hand': left_hand,
-                    'right_hand': right_hand,
-                    'head': head,
-
-                    'get_camera_mode': get_camera_mode,
-                    'detect_mode': detect_mode,
-                    'calculate_mode': calculate_mode,
-
-                    'Topic_configs': Topic_configs,
-                    'Crossboard_configs': Crossboard_configs,
-                    'PCA_configs': PCA_configs,
+                    'configs_path': configs_path,
                }]
            )
        ]
--- a/vision_detect/launch/detect_service.launch.py
+++ b/vision_detect/launch/detect_service.launch.py
@@ -1,49 +0,0 @@
-from launch import LaunchDescription
-from launch_ros.actions import Node
-from launch.actions import DeclareLaunchArgument, OpaqueFunction
-from launch.substitutions import LaunchConfiguration
-
-import os
-import ast
-import json
-from ament_index_python.packages import get_package_share_directory
-
-
-share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/detect_service.json')
-with open(config_dir, "r") as f:
-    configs = json.load(f)
-
-def generate_launch_description():
-    args_detect = [
-        DeclareLaunchArgument('checkpoint_name', default_value=configs['checkpoint_name']),
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('set_confidence', default_value=configs['set_confidence']),
-        DeclareLaunchArgument('classes', default_value=configs['classes']),
-    ]
-
-    def create_detect_node(context):
-        checkpoint = LaunchConfiguration('checkpoint_name').perform(context)
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        conf = LaunchConfiguration('set_confidence').perform(context)
-        classes = LaunchConfiguration('classes').perform(context)
-
-        return [
-            Node(
-                package='vision_detect',
-                executable='detect_service_node',
-                parameters=[{
-                    'checkpoint_name': checkpoint,
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'set_confidence': float(conf),
-                    'classes': classes,
-                }]
-            )
-        ]
-
-    return LaunchDescription(args_detect + [
-        OpaqueFunction(function=create_detect_node),
-    ])
--- a/vision_detect/launch/detect_service_pca.launch.py
+++ b/vision_detect/launch/detect_service_pca.launch.py
@@ -9,41 +9,17 @@ from ament_index_python.packages import get_package_share_directory


 share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/detect_service_pca.json')
+config_dir = os.path.join(share_dir, 'configs/launch/detect_service_pca.json')
 with open(config_dir, "r") as f:
    configs = json.load(f)

 def generate_launch_description():
    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('left_hand', default_value=configs['left_hand']),
-        DeclareLaunchArgument('right_hand', default_value=configs['right_hand']),
-        DeclareLaunchArgument('head', default_value=configs['head']),
-
-        DeclareLaunchArgument('get_camera_mode', default_value=configs['get_camera_mode']),
-        DeclareLaunchArgument('detect_mode', default_value=configs['detect_mode']),
-        DeclareLaunchArgument('calculate_mode', default_value=configs['calculate_mode']),
-
-        DeclareLaunchArgument("Service_configs", default_value=json.dumps(configs['Service_configs'])),
-        DeclareLaunchArgument("Detect_configs", default_value=json.dumps(configs['Detect_configs'])),
-        DeclareLaunchArgument("PCA_configs", default_value=json.dumps(configs['PCA_configs'])),
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
    ]

    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        left_hand = LaunchConfiguration('left_hand').perform(context)
-        right_hand = LaunchConfiguration('right_hand').perform(context)
-        head = LaunchConfiguration('head').perform(context)
-
-        get_camera_mode = LaunchConfiguration('get_camera_mode').perform(context)
-        detect_mode = LaunchConfiguration('detect_mode').perform(context)
-        calculate_mode = LaunchConfiguration('calculate_mode').perform(context)
-
-        Service_configs = LaunchConfiguration('Service_configs').perform(context)
-        Detect_configs = LaunchConfiguration('Detect_configs').perform(context)
-        PCA_configs = LaunchConfiguration('PCA_configs').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)

        return [
            Node(
@@ -51,19 +27,7 @@ def generate_launch_description():
                executable='detect_node',
                name=configs['node_name'],
                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'left_hand': left_hand,
-                    'right_hand': right_hand,
-                    'head': head,
-
-                    'get_camera_mode': get_camera_mode,
-                    'detect_mode': detect_mode,
-                    'calculate_mode': calculate_mode,
-
-                    'Service_configs': Service_configs,
-                    'Detect_configs': Detect_configs,
-                    'PCA_configs': PCA_configs,
+                    'configs_path': configs_path,
                }]
            )
        ]
--- a/vision_detect/launch/example_configs.launch.py
+++ b/vision_detect/launch/example_configs.launch.py
@@ -9,78 +9,18 @@ from ament_index_python.packages import get_package_share_directory


 share_dir = get_package_share_directory('vision_detect')
-config_dir = os.path.join(share_dir, 'configs/launch_configs/default_config.json')
+config_dir = os.path.join(share_dir, 'configs/launch/default_service_config.json')
 with open(config_dir, "r") as f:
    configs = json.load(f)

-# def get_param_configs():
-#     param_configs = {}
-#     if configs["get_camera_mode"] == "Service":
-#         param_configs.update(configs["Service_configs"])
-#     elif configs["get_camera_mode"] == "Topic":
-#         param_configs.update(configs["Topic_configs"])
-#     else:
-#         param_configs.update(configs["Service_configs"])
-#
-#     if configs["detect_mode"] == "Detect":
-#         param_configs.update(configs["Detect_configs"])
-#     elif configs["detect_mode"] == "Color":
-#         param_configs.update(configs["Color_configs"])
-#     elif configs["detect_mode"] == "Crossboard":
-#         param_configs.update(configs["Crossboard_configs"])
-#     else:
-#         param_configs.update(configs["Detect_configs"])
-#
-#     if configs["calculate_mode"] == "PCA":
-#         param_configs.update(configs["PCA_configs"])
-#     elif configs["calculate_mode"] == "ICP":
-#         param_configs.update(configs["ICP_configs"])
-#     else:
-#         param_configs.update(configs["PCA_configs"])
-#     return param_configs
-

 def generate_launch_description():
    args_detect = [
-        DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
-        DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
-        DeclareLaunchArgument('left_hand', default_value=configs['left_hand']),
-        DeclareLaunchArgument('right_hand', default_value=configs['right_hand']),
-        DeclareLaunchArgument('head', default_value=configs['head']),
-
-        DeclareLaunchArgument('get_camera_mode', default_value=configs['get_camera_mode']),
-        DeclareLaunchArgument('detect_mode', default_value=configs['detect_mode']),
-        DeclareLaunchArgument('calculate_mode', default_value=configs['calculate_mode']),
-
-        # DeclareLaunchArgument("param_configs", default_value=json.dumps(get_param_configs())),
-        DeclareLaunchArgument("Service_configs", default_value=json.dumps(configs['Service_configs'])),
-        DeclareLaunchArgument("Topic_configs", default_value=json.dumps(configs['Topic_configs'])),
-        DeclareLaunchArgument("Detect_configs", default_value=json.dumps(configs['Detect_configs'])),
-        DeclareLaunchArgument("Color_configs", default_value=json.dumps(configs['Color_configs'])),
-        DeclareLaunchArgument("Crossboard_configs", default_value=json.dumps(configs['Crossboard_configs'])),
-        DeclareLaunchArgument("PCA_configs", default_value=json.dumps(configs['PCA_configs'])),
-        DeclareLaunchArgument("ICP_configs", default_value=json.dumps(configs['ICP_configs'])),
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
    ]

    def create_detect_node(context):
-        output_boxes = LaunchConfiguration('output_boxes').perform(context)
-        output_masks = LaunchConfiguration('output_masks').perform(context)
-        left_hand = LaunchConfiguration('left_hand').perform(context)
-        right_hand = LaunchConfiguration('right_hand').perform(context)
-        head = LaunchConfiguration('head').perform(context)
-
-        get_camera_mode = LaunchConfiguration('get_camera_mode').perform(context)
-        detect_mode = LaunchConfiguration('detect_mode').perform(context)
-        calculate_mode = LaunchConfiguration('calculate_mode').perform(context)
-
-        # param_configs = LaunchConfiguration('param_configs').perform(context)
-        Service_configs = LaunchConfiguration('Service_configs').perform(context)
-        Topic_configs = LaunchConfiguration('Topic_configs').perform(context)
-        Detect_configs = LaunchConfiguration('Detect_configs').perform(context)
-        Color_configs = LaunchConfiguration('Color_configs').perform(context)
-        Crossboard_configs = LaunchConfiguration('Crossboard_configs').perform(context)
-        PCA_configs = LaunchConfiguration('PCA_configs').perform(context)
-        ICP_configs = LaunchConfiguration('ICP_configs').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)

        return [
            Node(
@@ -88,24 +28,7 @@ def generate_launch_description():
                executable='detect_node',
                name=configs['node_name'],
                parameters=[{
-                    'output_boxes': output_boxes.lower() == 'true',
-                    'output_masks': output_masks.lower() == 'true',
-                    'left_hand': left_hand,
-                    'right_hand': right_hand,
-                    'head': head,
-
-                    'get_camera_mode': get_camera_mode,
-                    'detect_mode': detect_mode,
-                    'calculate_mode': calculate_mode,
-
-                    # 'param_configs': param_configs,
-                    'Service_configs': Service_configs,
-                    'Topic_configs': Topic_configs,
-                    'Detect_configs': Detect_configs,
-                    'Color_configs': Color_configs,
-                    'Crossboard_configs': Crossboard_configs,
-                    'PCA_configs': PCA_configs,
-                    'ICP_configs': ICP_configs,
+                    'configs_path': configs_path,
                }]
            )
        ]
--- a/vision_detect/launch/medical_sense.launch.py
+++ b/vision_detect/launch/medical_sense.launch.py
@@ -0,0 +1,38 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+from launch.actions import DeclareLaunchArgument, OpaqueFunction
+from launch.substitutions import LaunchConfiguration
+
+import os
+import json
+from ament_index_python.packages import get_package_share_directory
+
+
+share_dir = get_package_share_directory('vision_detect')
+config_dir = os.path.join(share_dir, 'configs/launch/medical_sense.json')
+with open(config_dir, "r") as f:
+    configs = json.load(f)
+
+def generate_launch_description():
+    args_detect = [
+        DeclareLaunchArgument('configs_path', default_value=config_dir),
+    ]
+
+    def create_detect_node(context):
+        configs_path = LaunchConfiguration('configs_path').perform(context)
+
+        return [
+            Node(
+                package='vision_detect',
+                executable='detect_node',
+                name=configs['node_name'],
+                output="screen",
+                parameters=[{
+                    'configs_path': configs_path,
+                }]
+            )
+        ]
+
+    return LaunchDescription(args_detect + [
+        OpaqueFunction(function=create_detect_node),
+    ])
--- a/vision_detect/launch/test_action.launch.py
+++ b/vision_detect/launch/test_action.launch.py
@@ -0,0 +1,18 @@
+from launch import LaunchDescription
+from launch.actions import ExecuteProcess, TimerAction
+
+
+def cmd(_cmd, period=None, output="screen"):
+    command = ExecuteProcess(cmd=_cmd, output=output, sigterm_timeout='5', sigkill_timeout='5')
+    if period is None:
+        return command
+    else:
+        return TimerAction(period=float(period), actions=[command])
+
+
+def generate_launch_description():
+    return LaunchDescription([
+        cmd(['ros2', 'launch', 'img_dev', 'img_dev.launch.py']),
+        cmd(['ros2', 'launch', 'vision_detect', 'test_action_medical_sense.launch.py'], period=2.0),
+        cmd(['ros2', 'run', 'vision_detect', 'test_action_client'], period=2.0)
+    ])
--- a/vision_detect/launch/test_action_medical_sense.launch.py
+++ b/vision_detect/launch/test_action_medical_sense.launch.py
@@ -0,0 +1,47 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+from launch.actions import DeclareLaunchArgument, OpaqueFunction
+from launch.substitutions import LaunchConfiguration
+
+import os
+import json
+from ament_index_python.packages import get_package_share_directory
+
+
+SHARED_DIR = get_package_share_directory('vision_detect')
+CONFIGS_PATH = os.path.join(SHARED_DIR, 'configs/launch/default_action_config.json')
+LOGGING_MAP_PATH = os.path.join(SHARED_DIR, 'map/logging/report_logging_define.json')
+
+
+def get_name(path):
+    with open(path, "r") as f:
+        name = json.load(f)["node_name"]
+    return name
+
+
+def generate_launch_description():
+    args_detect = [
+        DeclareLaunchArgument('logging_map_path', default_value=LOGGING_MAP_PATH),
+        DeclareLaunchArgument('configs_path', default_value=CONFIGS_PATH)
+    ]
+
+    def create_detect_node(context):
+        logging_map_path = LaunchConfiguration('logging_map_path').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)
+
+        return [
+            Node(
+                package='vision_detect',
+                executable='detect_node_test',
+                name=get_name(CONFIGS_PATH),
+                output="screen",
+                parameters=[{
+                    'logging_map_path': logging_map_path,
+                    'configs_path': configs_path
+                }]
+            )
+        ]
+
+    return LaunchDescription(args_detect + [
+        OpaqueFunction(function=create_detect_node),
+    ])
--- a/vision_detect/launch/test_service.launch.py
+++ b/vision_detect/launch/test_service.launch.py
@@ -0,0 +1,18 @@
+from launch import LaunchDescription
+from launch.actions import ExecuteProcess, TimerAction
+
+
+def cmd(_cmd, period=None, output="screen"):
+    command = ExecuteProcess(cmd=_cmd, output=output, sigterm_timeout='5', sigkill_timeout='5')
+    if period is None:
+        return command
+    else:
+        return TimerAction(period=float(period), actions=[command])
+
+
+def generate_launch_description():
+    return LaunchDescription([
+        cmd(['ros2', 'launch', 'img_dev', 'img_dev.launch.py']),
+        cmd(['ros2', 'launch', 'vision_detect', 'test_service_medical_sense.launch.py'], period=2.0),
+        cmd(['ros2', 'run', 'vision_detect', 'service_client_node'], period=2.0)
+    ])
--- a/vision_detect/launch/test_service_medical_sense.launch.py
+++ b/vision_detect/launch/test_service_medical_sense.launch.py
@@ -0,0 +1,47 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+from launch.actions import DeclareLaunchArgument, OpaqueFunction
+from launch.substitutions import LaunchConfiguration
+
+import os
+import json
+from ament_index_python.packages import get_package_share_directory
+
+
+SHARED_DIR = get_package_share_directory('vision_detect')
+CONFIGS_PATH = os.path.join(SHARED_DIR, 'configs/launch/default_service_config.json')
+LOGGING_MAP_PATH = os.path.join(SHARED_DIR, 'map/logging/report_logging_define.json')
+
+
+def get_name(path):
+    with open(path, "r") as f:
+        name = json.load(f)["node_name"]
+    return name
+
+
+def generate_launch_description():
+    args_detect = [
+        DeclareLaunchArgument('logging_map_path', default_value=LOGGING_MAP_PATH),
+        DeclareLaunchArgument('configs_path', default_value=CONFIGS_PATH)
+    ]
+
+    def create_detect_node(context):
+        logging_map_path = LaunchConfiguration('logging_map_path').perform(context)
+        configs_path = LaunchConfiguration('configs_path').perform(context)
+
+        return [
+            Node(
+                package='vision_detect',
+                executable='detect_node_test',
+                name=get_name(CONFIGS_PATH),
+                output="screen",
+                parameters=[{
+                    'logging_map_path': logging_map_path,
+                    'configs_path': configs_path
+                }]
+            )
+        ]
+
+    return LaunchDescription(args_detect + [
+        OpaqueFunction(function=create_detect_node),
+    ])
--- a/vision_detect/map/label/medical_sense.json
+++ b/vision_detect/map/label/medical_sense.json
@@ -0,0 +1,5 @@
+{
+  "bottle_plate": 0,
+  "medicine_box": 1,
+  "paper": 2
+}
--- a/vision_detect/map/logging/report_logging_define.json
+++ b/vision_detect/map/logging/report_logging_define.json
@@ -0,0 +1,56 @@
+{
+  "info": {},
+
+  "warning": {
+    "0000": "Success",
+
+    "0100": "Fail to load config file: File is not open or JSON parse error",
+    "0101": "config file have no attribute 'node_name'",
+    "0102": "config file have no attribute 'output_boxe' or 'output_mask'",
+    "0103": "Fail to load source part",
+    "0104": "Fail to load detect part",
+    "0105": "Fail to load estimate part",
+    "0110": "Cannot load calibration file, use E(4, 4)",
+    "0111": "Calibration file have not KEY: 'T', use E(4, 4)",
+    "0112": "Calibration file has wrong shape of mat, use E(4, 4)",
+
+    "0200": "Have not receive any camera data",
+    "0201": "Receive wrong position, or this position have no camera data",
+    "0202": "All input position have no camera data",
+
+    "0210": "The image is too blurry.",
+
+    "0300": "Worker thread is not alive",
+    "0301": "Can't submit task, task executor is already stop",
+    "0302": "Task is aborted",
+    "0303": "Worker time out",
+
+    "0400": "task executor internal error",
+
+    "1000": "Detected object count is 0",
+    "1001": "Depth crop is None",
+    "1003": "Failed to detect a valid pose",
+    "1010": "No specified color within the designated distance.",
+    "1020": "Didn't detect Crossboard",
+
+    "1100": "Object point cloud contains excessive noise",
+    "1101": "The point cloud is empty",
+    "1102": "Points is too closely",
+
+    "1200": "The number of points is insufficient to compute an OBB",
+    "1201": "PCA output vector is None",
+    "1202": "This pose cannot be grab, and position refine fail",
+    "1203": "All pose refine failed",
+    "1210": "No object can be estimate",
+
+    "1300": "E2E model input data 'coors' are fewer than 128",
+    "1301": "E2E model input data 'point_clouds' are fewer than 128",
+    "1302": "The 'true num' of points is 0; No graspable points are available",
+    "1303": "The model returned no predictions",
+    "1304": "All rotation vector processing failed; no valid rotation matrix was generated"
+  },
+
+  "error": {},
+
+  "fatal": {}
+}
--- a/vision_detect/setup.py
+++ b/vision_detect/setup.py
@@ -1,28 +1,54 @@
+import os
 from glob import glob
 from setuptools import find_packages, setup

 package_name = 'vision_detect'

+openvino_files = []
+for model_dir in glob("checkpoints/*_openvino_model"):
+    model_name = os.path.basename(model_dir)
+    model_files = glob(os.path.join(model_dir, '*'))
+    openvino_files.append(
+        ('share/' + package_name + '/checkpoints/' + model_name, model_files)
+    )
+
+data_files = [
+    ('share/ament_index/resource_index/packages', ['resource/' + package_name]),
+    ('share/' + package_name, ['package.xml']),
+    ('share/' + package_name + '/launch', glob('launch/*.launch.py')),
+    ('share/' + package_name + '/configs', glob('configs/*.json')),
+    ('share/' + package_name + '/configs/flexiv', glob('configs/flexiv/*.json')),
+    ('share/' + package_name + '/configs/launch', glob('configs/launch/*.json')),
+
+    ('share/' + package_name + '/calibration', glob('calibration/*.json')),
+    ('share/' + package_name + '/map/logging', glob('map/logging/*.json')),
+    ('share/' + package_name + '/map/label', glob('map/label/*.json')),
+
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
+    ('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd'))
+]
+
+data_files.extend(openvino_files)
+
 setup(
    name=package_name,
    version='0.0.0',
    packages=find_packages(exclude=['test']),
-    data_files=[
-        ('share/ament_index/resource_index/packages', ['resource/' + package_name]),
-        ('share/' + package_name, ['package.xml']),
-        ('share/' + package_name + '/launch', glob('launch/*.launch.py')),
-        ('share/' + package_name + '/configs', glob('configs/*.json')),
-        ('share/' + package_name + '/configs/flexiv_configs', glob('configs/flexiv_configs/*.json')),
-        ('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
-        ('share/' + package_name + '/configs/launch_configs', glob('configs/launch_configs/*.json')),
-
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
-        ('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
+    package_data={
+        'vision_detect': [
+            'vision_core/model/pointnet2/pointnet2/*.so',
+            'vision_core/model/knn/knn_pytorch/*.so'
+        ]
+    },
+    data_files=data_files,
+    install_requires=[
+        'setuptools',
+        'numpy >= 1.23.0, < 2.0.0',
+        'opencv-python > 4.0.0, < 4.12.0',
    ],
-    install_requires=['setuptools'],
-    zip_safe=True,
+    zip_safe=False,
    include_package_data=True,
    maintainer='lyx',
    maintainer_email='lyx@todo.todo',
@@ -31,24 +57,22 @@ setup(
    tests_require=['pytest'],
    entry_points={
        'console_scripts': [
-            'detect_service_node = vision_detect.detect_service:main',
-            'detect_topic_node = vision_detect.detect_topic:main',
-
-            'box_detect_service_node = vision_detect.detect_box_service:main',
-            'red_detect_topic_node = vision_detect.detect_red_topic:main',
-            'red_detect_service_node = vision_detect.detect_red_service:main',
+            # 'red_detect_topic_node = vision_detect.detect_red_topic:main',
+            # 'red_detect_service_node = vision_detect.detect_red_service:main',

            'flexiv_detect_topic_node = vision_detect.flexivaidk_detect_topic:main',
            'flexiv_detect_service_node = vision_detect.flexivaidk_detect_service:main',

            'sub_pose_node = vision_detect.sub_pose:main',
-            'calibration_node = vision_detect.hand_eye_calibration:main',
-            'crossboard_detect_node = vision_detect.crossboard_detect:main',
-            'service_client_node = vision_detect.service_client:main',
            'get_camera_pose_node = vision_detect.get_camera_pose:main',
-            'calculate_node = vision_detect.calculate:main',
            
            'detect_node = vision_detect.detect_node:main',
+            'source_test_node = vision_detect.source_test_node:main',
+
+            'service_client_node = vision_detect.service_client:main',
+            'test_action_client = vision_detect.action_client_node:main',
+            'once_test_action_client = vision_detect.action_client_once:main',
+            'concurrent_test_action_client = vision_detect.action_client_once_concurrent:main'
        ],
    },
 )
--- a/vision_detect/test/test_flake8.py
+++ b/vision_detect/test/test_flake8.py
@@ -21,5 +21,5 @@ import pytest
 def test_flake8():
    rc, errors = main_with_errors(argv=[])
    assert rc == 0, \
-        'Found %d code style errors / warnings:\n' % len(errors) + \
+        'Found %d enum style errors / warnings:\n' % len(errors) + \
        '\n'.join(errors)
--- a/vision_detect/test/test_pep257.py
+++ b/vision_detect/test/test_pep257.py
@@ -20,4 +20,4 @@ import pytest
@pytest.mark.pep257
 def test_pep257():
    rc = main(argv=['.', 'test'])
-    assert rc == 0, 'Found code style errors / warnings'
+    assert rc == 0, 'Found enum style errors / warnings'
--- a/vision_detect/vision_detect/VisionDetect/init.py
+++ b/vision_detect/vision_detect/VisionDetect/init.py
@@ -1,3 +0,0 @@
-
-
-__all__ = []
--- a/vision_detect/vision_detect/VisionDetect/node/init.py
+++ b/vision_detect/vision_detect/VisionDetect/node/init.py
@@ -1,3 +0,0 @@
-from .detect_node import DetectNode
-
-__all__ = ['detect_node']
--- a/vision_detect/vision_detect/VisionDetect/node/detect_node.py
+++ b/vision_detect/vision_detect/VisionDetect/node/detect_node.py
@@ -1,656 +0,0 @@
-"""Vision Detect Node"""
-import os
-import time
-import threading
-import json
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import torch
-import numpy as np
-import open3d as o3d
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-from rclpy.task import Future
-from cv_bridge import CvBridge
-from message_filters import Subscriber, ApproximateTimeSynchronizer
-from sensor_msgs.msg import Image, CameraInfo
-from geometry_msgs.msg import Pose, Point, Quaternion
-
-from interfaces.msg import PoseClassAndID, PoseArrayClassAndID, ImgMsg
-from interfaces.srv import VisionObjectRecognition
-
-from ..utils import *
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-class DetectNode(Node):
-    """Detect Node"""
-    def __init__(self, name):
-        super().__init__(name)
-        self.device = None
-        self.checkpoint_path = None
-        self.output_boxes = None
-        self.output_masks = None
-
-        self.function = None
-        self.source = None
-
-        self.server = None
-        self.model = None
-
-        self.k = self.d = None
-        self.eye_in_left_hand_mat = None
-        self.eye_in_right_hand_mat = None
-        self.eye_to_hand_mat = None
-        self.hand_eye_mat = None
-
-        self.eye_in_left_hand_path = None
-        self.eye_in_right_hand_path = None
-        self.eye_to_hand_path = None
-
-        self.get_camera_mode = None
-        self.detect_mode = None
-        self.calculate_mode = None
-
-        self.camera_size = None
-
-        self.map1 = self.map2 = None
-
-        self.calculate_function = calculate_pose_pca
-        self.fx = self.fy = 0.5
-        self.camera_data = {}
-        self.future = Future()
-        self.cv_bridge = CvBridge()
-        self.lock = threading.Lock()
-        self.distance = 1500
-        self.color_range = [
-            [[0, 120, 70], [10, 255, 255]],
-            [[170, 120, 70], [180, 255, 255]]
-        ]
-
-        '''init'''
-        self._init_param()
-        self._init_publisher()
-        self._init_subscriber()
-
-        self.get_logger().info("Initialize done")
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('output_boxes', True)
-        self.declare_parameter('output_masks', False)
-
-        self.output_boxes = self.get_parameter('output_boxes').value
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('get_camera_mode', 'service')
-        self.declare_parameter('detect_mode', 'detect')
-        self.declare_parameter('calculate_mode', 'pca')
-
-        self.get_camera_mode = self.get_parameter('get_camera_mode').value
-        self.detect_mode = self.get_parameter('detect_mode').value
-        self.calculate_mode = self.get_parameter("calculate_mode").value
-
-        self.declare_parameter('left_hand', 'configs/hand_eye_mat/hand_in_left_hand.json')
-        self.declare_parameter('right_hand', 'configs/hand_eye_mat/hand_in_right_hand.json')
-        self.declare_parameter('head', 'configs/hand_eye_mat/hand_to_hand.json')
-
-        self.declare_parameter('Service_configs', '{}')
-        self.declare_parameter('Topic_configs', '{}')
-
-        self.declare_parameter('Detect_configs', '{}')
-        self.declare_parameter('Color_configs', '{}')
-        self.declare_parameter('Crossboard_configs', '{}')
-
-        self.declare_parameter('PCA_configs', '{}')
-        self.declare_parameter('ICA_configs', '{}')
-
-        self._init_json_file()
-        if self.get_camera_mode == "Service":
-            self.service_name = json.loads(self.get_parameter('Service_configs').value)["service_name"]
-            self._init_service()
-        elif self.get_camera_mode == "Topic":
-            self.declare_parameter('Topic_configs', '{}')
-            self.color_image_topic_name = json.loads(self.get_parameter('Topic_configs').value)["color_image_topic_name"]
-            self.depth_image_topic_name = json.loads(self.get_parameter('Topic_configs').value)["depth_image_topic_name"]
-            self.camera_info_topic_name = json.loads(self.get_parameter('Topic_configs').value)["camera_info_topic_name"]
-            self.position = json.loads(self.get_parameter('Topic_configs').value)["position"]
-            if self.position == 'left':
-                self.hand_eye_mat = self.eye_in_left_hand_mat
-            elif self.position == 'right':
-                self.hand_eye_mat = self.eye_in_right_hand_mat
-            else:
-                self.hand_eye_mat = self.eye_to_hand_mat
-        else:
-            self.service_name = json.loads(self.get_parameter('Service_configs').value)["service_name"]
-
-        if self.detect_mode == 'Detect':
-            self.function = self._seg_image
-            self.confidence = json.loads(self.get_parameter('Detect_configs').value)["confidence"]
-            self.classes = json.loads(self.get_parameter('Detect_configs').value)["classes"]
-            if not self.classes:
-                self.classes = None
-            self.checkpoint_path = os.path.join(share_dir, json.loads(self.get_parameter('Detect_configs').value)["checkpoint_path"])
-            self._init_model()
-        elif self.detect_mode == 'Color':
-            self.function = self._seg_color
-            self.color_range = json.loads(self.get_parameter('Color_configs').value)["color_range"]
-            self.color_range = [[np.array(lower), np.array(upper)] for lower, upper in self.color_range]
-        elif self.detect_mode == 'Crossboard':
-            self.function = self._seg_crossboard
-            self.pattern_size = json.loads(self.get_parameter('Crossboard_configs').value)["pattern_size"]
-        else:
-            self.get_logger().warning("Unknown mode, use detect")
-            self.function = self._seg_image
-            self.confidence = json.loads(self.get_parameter('Detect_configs').value)["confidence"]
-            self.classes = json.loads(self.get_parameter('Detect_configs').value)["classes"]
-            if not self.classes:
-                self.classes = None
-            self.checkpoint_path = json.loads(self.get_parameter('Detect_configs').value)["checkpoint_path"]
-            self._init_model()
-
-        if self.calculate_mode == "PCA":
-            self.configs = json.loads(self.get_parameter('PCA_configs').value)
-            self.calculate_function = calculate_pose_pca
-        elif self.calculate_mode == "ICP":
-            self.configs = json.loads(self.get_parameter('ICA_configs').value)
-            source = o3d.io.read_point_cloud(
-                os.path.join(share_dir, self.configs['complete_model_path'])
-                )
-            self.configs["source"] = source
-            self.calculate_function = calculate_pose_icp
-
-        else:
-            self.get_logger().warning("Unknown calculate_mode, use PCA")
-            self.configs = json.loads(self.get_parameter('PCA_configs').value)
-            self.calculate_function = calculate_pose_pca
-
-        self.get_logger().info("Initialize parameters done")
-
-    def _init_json_file(self):
-        self.eye_in_left_hand_path = os.path.join(
-            share_dir, self.get_parameter('left_hand').value
-            )
-        self.eye_in_right_hand_path = os.path.join(
-            share_dir, self.get_parameter('right_hand').value
-            )
-        self.eye_to_hand_path = os.path.join(
-            share_dir, self.get_parameter('head').value
-            )
-
-        self.eye_in_left_hand_mat, info, sign = read_calibration_mat(self.eye_in_left_hand_path)
-        if not sign:
-            self.get_logger().warning(f"left_mat: {info}")
-
-        self.eye_in_right_hand_mat, info, sign = read_calibration_mat(self.eye_in_right_hand_path)
-        if not sign:
-            self.get_logger().warning(f"right_mat: {info}")
-
-        self.eye_to_hand_mat, info, sign = read_calibration_mat(self.eye_to_hand_path)
-        if not sign:
-            self.get_logger().warning(f"head_mat: {info}")
-
-        self.get_logger().info("Initialize read json file done")
-
-    def _init_model(self):
-        """init model"""
-        if self.checkpoint_path.endswith('-seg.pt'):
-            device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-            try:
-                self.model = YOLO(self.checkpoint_path).to(device_model)
-            except Exception as e:
-                self.get_logger().error(f'Failed to load YOLO model: {e}')
-                raise 
-            self.function = self._seg_image
-
-        elif self.checkpoint_path.endswith('.onnx') or self.checkpoint_path.endswith('.engine'):
-            self.model = YOLO(self.checkpoint_path)
-            self.function = self._seg_image
-
-        else:
-            self.function = None
-            self.get_logger().fatal(f'Unknown checkpoint: {self.checkpoint_path}')
-            raise ValueError(f'Unknown checkpoint: {self.checkpoint_path}')
-
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        self.get_logger().info("Initialize model done")
-
-    def _init_publisher(self):
-        """init publisher"""
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-        if self.get_camera_mode == "Topic":
-            self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
-
-        self.get_logger().info("Initialize publisher done")
-
-    def _init_service(self):
-        """init service server"""
-        self.server = self.create_service(
-            VisionObjectRecognition,
-            self.service_name,
-            self._service_callback
-        )
-        self.get_logger().info("Initialize service done")
-
-    def _init_subscriber(self):
-        """init subscriber"""
-        if self.get_camera_mode == "Service":
-            self.sub_img = self.create_subscription(
-                ImgMsg,
-                "/img_msg",
-                self._service_sub_callback,
-                10
-            )
-        elif self.get_camera_mode == "Topic":
-            self.sub_camera_info = self.create_subscription(
-                CameraInfo,
-                self.camera_info_topic_name,
-                self._camera_info_callback,
-                10
-            )
-
-            self.get_logger().info("Waiting for camera info...")
-            rclpy.spin_until_future_complete(self, self.future)
-            self.get_logger().info("Camera info received.")
-
-            # sync get color and depth img
-            self.sub_color_image = Subscriber(self, Image, self.color_image_topic_name)
-            self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic_name)
-
-            self.sync_subscriber = ApproximateTimeSynchronizer(
-                [self.sub_color_image, self.sub_depth_image],
-                queue_size=10,
-                slop=0.1
-            )
-            self.sync_subscriber.registerCallback(self._sync_sub_callback)
-        else:
-            self.get_logger().warning("get_camera_mode is wrong, use service")
-            self.sub_img = self.create_subscription(
-                ImgMsg,
-                "/img_msg",
-                self._service_sub_callback,
-                10
-            )
-
-        self.get_logger().info("Initialize subscriber done")
-
-    def _camera_info_callback(self, msg: CameraInfo):
-        """Get camera info"""
-        self.k = msg.k
-        self.d = msg.d
-
-        self.camera_size = [msg.width, msg.height]
-
-        if self.k is not None and len(self.k) > 0 and self.d is not None and len(self.d) > 0:
-            self.map1, self.map2, self.k = get_map(msg.k, msg.d, self.camera_size)
-            if not self.future.done(): 
-                self.future.set_result(True)
-            self.destroy_subscription(self.sub_camera_info)
-        else:
-            self.get_logger().warning("K and d are not defined")
-
-    def _service_sub_callback(self, msgs):
-        """同步回调函数"""
-        with self.lock:
-            # self.get_logger().info("get msgs")
-            self.camera_data[msgs.position] = [
-                msgs.image_color,
-                msgs.image_depth,
-                msgs.karr,
-                msgs.darr
-            ]
-
-    def _sync_sub_callback(self, color_img_ros, depth_img_ros):
-        """同步回调函数"""
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
-
-        img, pose_list = self.function(color_img_cv, depth_img_cv)
-
-        # masks为空，结束这一帧
-        if img is None:
-            img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image.publish(img)
-
-        if pose_list:
-            pose_list_all = PoseArrayClassAndID()
-            for item in pose_list:
-                pose_list_all.objects.append(
-                    PoseClassAndID(
-                        class_name = item["class_name"],
-                        class_id = item["class_id"],
-                        pose = item["pose"],
-                        grab_width = item["grab_width"]
-                    )
-                )
-            pose_list_all.header.stamp = self.get_clock().now().to_msg()
-            pose_list_all.header.frame_id = "pose_list"
-            self.pub_pose_list.publish(pose_list_all)
-
-    def _service_callback(self, request, response):
-        # self.get_logger().info("service callback start")
-        response.header.stamp = self.get_clock().now().to_msg()
-        response.header.frame_id = "camera_detect"
-
-        with self.lock:
-            if request.camera_position in self.camera_data:
-                color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position]
-            else:
-                if len(self.camera_data) == 0:
-                    response.success = False
-                    response.info = "Camera data have not objects"
-                    response.objects = []
-
-                    # self.get_logger().info("service callback done")
-                    return response
-
-                response.success = False
-                response.info = f"Name is wrong: {request.camera_position}"
-                response.objects = []
-
-                # self.get_logger().info("service callback done")
-                return response
-
-        if request.camera_position == 'left':
-            self.hand_eye_mat = self.eye_in_left_hand_mat
-        elif request.camera_position == 'right':
-            self.hand_eye_mat = self.eye_in_right_hand_mat
-        else:
-            self.hand_eye_mat = self.eye_to_hand_mat
-
-        self.camera_size = [color_img_ros.width, color_img_ros.height]
-
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        map1, map2, self.k = get_map(self.k, d, self.camera_size)
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
-
-        img, pose_list = self.function(color_img_cv, depth_img_cv)
-
-        # masks为空，结束这一帧
-        if self.output_boxes or self.output_masks:
-            if img is None:
-                img = color_img_ros
-            self.pub_detect_image.publish(img)
-
-        if pose_list:
-            response.info = "Success get pose"
-            response.success = True
-            for item in pose_list:
-                response.objects.append(
-                    PoseClassAndID(
-                        class_name = item["class_name"],
-                        class_id = item["class_id"],
-                        pose = item["pose"],
-                        grab_width = item["grab_width"]
-                    )
-                )
-        else:
-            response.info = "pose dict is empty"
-            response.success = False
-            response.objects = []
-
-        # self.get_logger().info("service callback done")
-        return response
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
-        """Use segmentation model"""
-        pose_list = []
-
-        # Get Predict Results
-        time1 = time.time()
-        results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
-        time2 = time.time()
-        result = results[0]
-
-        # Get masks
-        if result.masks is None or len(result.masks) == 0:
-            self.get_logger().info(f"Detect object num: 0")
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-
-        # Get boxes
-        boxes = result.boxes.xywh.cpu().numpy()
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        time3 = time.time()
-
-        self.get_logger().info(f"Detect object num: {len(masks)}")
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_img, mask], box, True)
-            depth_crop, mask_crop = imgs_crop
-
-            if depth_crop is None:
-                continue
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.k[0],
-                self.k[4],
-                self.k[2] - x_min,
-                self.k[5] - y_min
-            )
-
-            rmat = self.calculate_function(
-                mask_crop,
-                depth_crop,
-                intrinsics,
-                **self.configs
-            )
-            if rmat is None:
-                self.get_logger().warning("Object point cloud have too many noise")
-                continue
-
-            grab_width = calculate_grav_width(mask, self.k, rmat[2, 3])
-            rmat[2, 3] = rmat[2, 3] + grab_width * 0.38
-
-            rmat = self.hand_eye_mat @ rmat
-
-            x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_list.append(
-                    {
-                        "class_id": int(class_ids[i]),
-                        "class_name": labels[class_ids[i]],
-                        "pose": pose,
-                        "grab_width": grab_width * 1.05
-                    }
-                )
-
-        time4 = time.time()
-
-        self.get_logger().info('start')
-        self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
-        # self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
-        self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
-        self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
-        self.get_logger().info('end')
-
-        # mask_img and box_img is or not output
-        if self.output_boxes and not self.output_masks:
-            home = os.path.expanduser("~")
-            save_path = os.path.join(home, "detect_image.png")
-            draw_box(rgb_img, result, save_path=save_path)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        elif self.output_boxes and self.output_masks:
-            draw_box(rgb_img, result)
-            draw_mask(rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        elif not self.output_boxes and self.output_masks:
-            draw_mask(rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        else:
-            return None, pose_list
-
-    def _seg_color(self, rgb_img: np.ndarray, depth_img: np.ndarray):
-        """Use segmentation model"""
-        pose_list = []
-
-        hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2HSV)
-
-        depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
-        depth_filter_mask[(depth_img > 0) & (depth_img < self.distance)] = 1
-
-        hsv_img[depth_filter_mask == 0] = 0
-
-        mask_part_list = [cv2.inRange(hsv_img, color[0], color[1]) for color in self.color_range]
-        mask = sum(mask_part_list[1:], mask_part_list[0])
-        mask = mask // 255
-
-        rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([255, 0, 0]) * 0.5
-
-        imgs_crop, mask_crop, (x_min, y_min) = crop_imgs_mask([depth_img], mask, True)
-        depth_crop = imgs_crop[0]
-
-        if depth_crop is None:
-            self.get_logger().warning("depth_crop is None")
-            return None, None
-
-        intrinsics = o3d.camera.PinholeCameraIntrinsic(
-            int(self.camera_size[0]),
-            int(self.camera_size[1]),
-            self.k[0],
-            self.k[4],
-            self.k[2] - x_min,
-            self.k[5] - y_min
-        )
-
-        rmat = self.calculate_function(
-            mask_crop,
-            depth_crop,
-            intrinsics,
-            **self.configs
-        )
-
-        rmat = self.hand_eye_mat @ rmat
-
-        x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
-
-        if (x, y, z) != (0.0, 0.0, 0.0):
-            pose = Pose()
-            pose.position = Point(x=x, y=y, z=z)
-            pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-            pose_list.append(
-                {
-                    "class_id": int(98),
-                    "class_name": "red",
-                    "pose": pose,
-                    "grab_width": 0.0
-                }
-            )
-
-        return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-
-    def _seg_crossboard(self, rgb_img, depth_img):
-        pose_list = []
-        rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
-        ret, corners = cv2.findChessboardCorners(rgb_img_gray, self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
-        if ret:
-            # 角点亚像素精确化（提高标定精度）
-            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
-            corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
-
-            corners_subpix = corners_subpix.reshape(self.pattern_size[1], self.pattern_size[0], 2)
-            mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
-
-            for i in range(0, self.pattern_size[1] - 1):
-                for j in range(0, self.pattern_size[0] - 1):
-                    pts = np.array([
-                        corners_subpix[i, j],
-                        corners_subpix[i, j + 1],
-                        corners_subpix[i + 1, j + 1],
-                        corners_subpix[i + 1, j]
-                    ], dtype=np.int32)
-                    cv2.fillConvexPoly(mask, pts, 1)
-
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-
-            img_crop, mask_crop, (x_min, y_min) = crop_imgs_mask([depth_img], mask)
-            depth_crop = img_crop[0]
-
-            if depth_crop is None:
-                return None, None
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.k[0],
-                self.k[4],
-                self.k[2] - x_min,
-                self.k[5] - y_min
-            )
-
-            rmat = self.calculate_function(
-                mask_crop,
-                depth_crop,
-                intrinsics,
-                **self.configs
-            )
-
-            x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
-
-            self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_list.append(
-                    {
-                        "class_id": int(99),
-                        "class_name": 'crossboard',
-                        "pose": pose,
-                        "grab_width": 0.0
-                    }
-                )
-
-            cv2.putText(
-                rgb_img,
-                f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
-                (0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX,
-                1,
-                (255, 255, 0),
-                2
-            )
-            cv2.putText(
-                rgb_img,
-                f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
-                (0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX,
-                1,
-                (255, 255, 0),
-                2
-            )
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-            
-        return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
-
-
-if __name__ == '__main__':
-    rclpy.init(args=None)
-    node = DetectNode('detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
--- a/vision_detect/vision_detect/VisionDetect/utils/init.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/init.py
@@ -1,4 +0,0 @@
-from .image_tools import *
-from .draw_tools import *
-from .calculate_tools import *
-from .file_tools import *
--- a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
@@ -1,244 +0,0 @@
-"""计算工具"""
-import logging
-
-import cv2
-import numpy as np
-import open3d as o3d
-import transforms3d as tfs
-
-from .object_icp import object_icp
-
-__all__ = [
-    "calculate_pose_pca", "calculate_pose_icp", "calculate_grav_width",
-    "rmat2quat", "quat2rmat",
-]
-
-def pca(data: np.ndarray, sort=True):
-    """主成分分析 """
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-
-def calculate_pose_pca(
-        mask,
-        depth_img: np.ndarray,
-        intrinsics,
-        **kwargs
-):
-    """点云主成分分析法计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=kwargs.get("depth_scale", 1000.0),
-        depth_trunc=kwargs.get("depth_trunc", 3.0),
-    )
-
-    point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
-    if point_cloud is None:
-        return None
-
-    if len(point_cloud.points) == 0:
-        logging.warning("clean_pcd is empty")
-        return np.eye(4)
-    center = point_cloud.get_center()
-    x, y, z = center
-
-    w, v = pca(np.asarray(point_cloud.points))
-
-    if w is None or v is None:
-        logging.warning("PCA output w or v is None")
-        return np.eye(4)
-    
-    vx, vy, vz = v[:,0], v[:,1], v[:,2]
-
-    if vx[0] < 0:
-        vx = -vx
-    if vy[1] < 0:
-        vy = -vy
-    if not np.allclose(np.cross(vx, vy), vz):
-        vz = -vz
-
-    R = np.column_stack((vx, vy, vz))
-    rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
-
-    # draw(point_cloud, rmat)
-    # draw(point_cloud_1, rmat)
-
-    return rmat
-
-
-def calculate_pose_icp(
-        mask,
-        depth_img: np.ndarray,
-        intrinsics,
-        **kwargs
-):
-    """点云配准法计算位姿"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=kwargs.get("depth_scale", 1000.0),
-        depth_trunc=kwargs.get("depth_trunc", 3.0)
-    )
-
-    point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
-    if point_cloud is None:
-        return None
-
-    if len(point_cloud.points) == 0:
-        logging.warning("clean_pcd is empty")
-        return np.eye(4)
-
-    rmat = object_icp(
-        kwargs.get("source"),
-        point_cloud,
-        ransac_voxel_size=kwargs.get("ransac_voxel_size", 0.005),
-        icp_voxel_radius=kwargs.get("icp_voxel_radius", [0.004, 0.002, 0.001]),
-        icp_max_iter=kwargs.get("icp_max_iter", [50, 30, 14])
-    )
-
-    return rmat
-
-
-def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.010):
-    """点云去噪"""
-    point_cloud = point_cloud.remove_non_finite_points()
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
-
-    # 半径滤波
-    clean_pcd, _ = down_pcd.remove_radius_outlier(
-        nb_points=10,
-        radius=voxel_size * 5
-        )
-
-    # 统计滤波
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(
-        nb_neighbors=10,
-        std_ratio=2.0
-        )
-
-    # 过滤过近的点
-    points = np.asarray(clean_pcd.points)
-    clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
-
-    # # 使用数量最大簇判定噪声强度
-    # _, counts = np.unique(labels[labels >= 0], return_counts=True)
-    # largest_cluster_ratio = counts.max() / len(labels)
-    # if largest_cluster_ratio < 0.5:
-    #     return None
-
-    labels = np.array(clean_pcd.cluster_dbscan(eps=voxel_size * 5, min_points=10))
-    if len(labels[labels >= 0]) == 0:
-        return clean_pcd
-
-    # 使用距离最近簇作为物体
-    points = np.asarray(clean_pcd.points)
-    cluster_label = set(labels)
-    point_cloud_clusters = []
-    for label in cluster_label:
-        if label == -1:
-            continue
-        idx = np.where(labels == label)[0]
-        point_cloud_cluster = clean_pcd.select_by_index(idx)
-        points_cluster_z = points[idx, 2]
-        z_avg = np.mean(points_cluster_z)
-        if z_avg < 0.2:
-            continue
-        point_cloud_clusters.append((point_cloud_cluster, z_avg))
-
-    if len(point_cloud_clusters) == 0:
-        return clean_pcd
-
-    point_cloud_clusters.sort(key=lambda x: x[1])
-    clean_pcd = point_cloud_clusters[0][0]
-
-    # 使用最近簇判断噪音强度
-    largest_cluster_ratio = len(clean_pcd.points) / len(points)
-    if largest_cluster_ratio < 0.2:
-        return None
-
-    return clean_pcd
-
-
-def calculate_grav_width(mask, k, depth):
-    """计算重心宽度"""
-    mask = mask.astype(np.uint8) * 255
-    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    
-    if not contours:
-        return 0.0
-    
-    c = max(contours, key=cv2.contourArea)
-    box = cv2.boxPoints(cv2.minAreaRect(c))
-    if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
-        point_diff = box[1] - box[0]
-    else:
-        point_diff = box[1] - box[2]
-
-    grab_width = depth * np.sqrt(
-        point_diff[0]**2 / k[0]**2 + point_diff[1]**2 / k[4]**2
-        )
-    return grab_width
-    
-
-def rmat2quat(rmat):
-    """Convert rotation matrix to quaternion."""
-    x, y, z = rmat[0:3, 3:4].flatten()
-    rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-    quat = [x, y, z, rw, rx, ry, rz]
-    return quat
-
-
-def quat2rmat(quat):
-    """Convert quaternion to rotation matrix."""
-    x, y, z, rw, rx, ry, rz = quat
-    r = tfs.quaternions.quat2mat([rw, rx, ry, rz])
-    rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), r, [1, 1, 1])
-    return rmat
-
-# def draw(pcd, mat):
-#     R = mat[0:3, 0:3]
-#     point = mat[0:3, 3:4].flatten()
-#     x, y, z = R[:, 0], R[:, 1], R[:, 2]
-#
-#     points = [
-#         [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
-#         point, point + x, point + y, point + z
-#
-#     ]  # 画点：原点、第一主成分、第二主成分
-#     lines = [
-#         [0, 1], [0, 2], [0, 3],
-#         [4, 5], [4, 6], [4, 7]
-#     ]  # 画出三点之间两两连线
-#     colors = [
-#         [1, 0, 0], [0, 1, 0], [0, 0, 1],
-#         [1, 0, 0], [0, 1, 0], [0, 0, 1]
-#     ]
-#     line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
-#     line_set.colors = o3d.utility.Vector3dVector(colors)
-#
-#     o3d.visualization.draw_geometries([pcd, line_set])
--- a/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py
@@ -1,90 +0,0 @@
-import os
-import logging
-from typing import Union
-
-import cv2
-import numpy as np
-import open3d as o3d
-
-
-
-__all__ = [
-    "draw_box", "draw_mask", "draw_pointcloud",
-]
-
-def draw_box(
-        rgb_img: np.ndarray,
-        segmentation_result,
-        put_text: bool = True,
-        save_path: Union[bool, str] = False
-):
-    """
-    绘制目标检测边界框
-    """
-    boxes = segmentation_result.boxes.xywh.cpu().numpy()
-    confidences = segmentation_result.boxes.conf.cpu().numpy()
-    class_ids = segmentation_result.boxes.cls.cpu().numpy()
-    labels = segmentation_result.names
-
-    for i, box in enumerate(boxes):
-        x_center, y_center, width, height = box[:4]
-
-        p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
-        p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
-        cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
-
-        if put_text:
-            cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}',
-                        (p1[0], p1[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
-                        (255, 255, 0), 2)
-
-            cv2.putText(rgb_img, f"{i}", (p1[0] + 5, p1[1] + 15),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-
-    if save_path:
-        if isinstance(save_path, str):
-            dir_path = os.path.dirname(save_path)
-            if dir_path:
-                os.makedirs(dir_path, exist_ok=True)
-        else:
-            home_path = os.path.expanduser("~")
-            save_path = os.path.join(home_path, "detect_color_img.png")
-        cv2.imwrite(save_path, rgb_img)
-
-
-def draw_mask(rgb_img: np.ndarray, segmentation_result):
-    """
-    绘制实例分割mask
-    """
-    masks = segmentation_result.masks.data.cpu().numpy()
-    orig_shape = segmentation_result.masks.orig_shape
-
-    for i, mask in enumerate(masks):
-        mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-        rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-
-
-def draw_pointcloud(pcd, axis: bool = True):
-    """绘制点云"""
-    if not pcd:
-        logging.warning("pcd is empty")
-    if axis:
-        axis_point = [
-        [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        ]  # 画点：原点、第一主成分、第二主成分
-        axis = [
-            [0, 1], [0, 2], [0, 3]
-        ]  # 画出三点之间两两连线
-        axis_colors = [
-            [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        ]
-        # 构造open3d中的LineSet对象，用于主成分显示
-        axis_set = o3d.geometry.LineSet(
-            points=o3d.utility.Vector3dVector(axis_point),
-            lines=o3d.utility.Vector2iVector(axis)
-        )
-
-        axis_set.colors = o3d.utility.Vector3dVector(axis_colors)
-
-        pcd.append(axis_set)
-    o3d.visualization.draw_geometries(pcd)
--- a/vision_detect/vision_detect/VisionDetect/utils/file_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/file_tools.py
@@ -1,26 +0,0 @@
-"""Utility functions for file operations."""
-import os
-import json
-
-import numpy as np
-
-__all__ = [
-    "read_calibration_mat",
-]
-
-def read_calibration_mat(mat_path):
-    """Read calibration matrix from a json file."""
-    sign = True
-    info = "Success"
-    if not os.path.exists(mat_path):
-        info = f"{mat_path} not found, use E(4, 4)"
-        sign = False
-        mat = np.eye(4)
-    else:
-        with open(mat_path, "r", encoding="utf-8") as f:
-            mat = np.array(json.load(f)["T"])
-            if mat.shape != (4, 4):
-                info = "The shape is wrong, use E(4, 4)"
-                sign = False
-                mat = np.eye(4)
-    return mat, info, sign
--- a/vision_detect/vision_detect/VisionDetect/utils/object_icp.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/object_icp.py
@@ -1,140 +0,0 @@
-import numpy as np
-import open3d as o3d
-from typing import Union, List, Tuple
-
-
-__all__ = [
-    "object_icp",
-]
-
-def _draw_pointcloud(pcd, T):
-
-    R = T[0:3, 0:3]
-    point = T[0:3, 3:4].flatten()
-    x, y, z = R[:, 0], R[:, 1], R[:, 2]
-
-    points = [
-        [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
-        point, point + x, point + y, point + z
-
-    ]  # 画点：原点、第一主成分、第二主成分
-    lines = [
-        [0, 1], [0, 2], [0, 3],
-        [4, 5], [4, 6], [4, 7]
-    ]  # 画出三点之间两两连线
-    colors = [
-        [1, 0, 0], [0, 1, 0], [0, 0, 1],
-        [1, 0, 0], [0, 1, 0], [0, 0, 1]
-    ]
-
-    line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
-    line_set.colors = o3d.utility.Vector3dVector(colors)
-
-    pcd.append(line_set)
-    o3d.visualization.draw_geometries(pcd)
-
-
-def _preprocess_point_cloud(pcd, voxel_size):
-
-    pcd_down = pcd.voxel_down_sample(voxel_size)
-    radius_normal = voxel_size * 2
-
-    pcd_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
-
-    radius_feature = voxel_size * 5
-
-    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
-        pcd_down,
-        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
-    )
-
-    return pcd_down, pcd_fpfh
-
-def _prepare_dataset(source, target, voxel_size):
-
-    trans_init = np.identity(4)
-    source.transform(trans_init)
-
-    source_down, source_fpfh = _preprocess_point_cloud(source, voxel_size)
-    target_down, target_fpfh = _preprocess_point_cloud(target, voxel_size)
-
-    return source_down, target_down, source_fpfh, target_fpfh
-
-
-def _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size):
-
-    distance_threshold = voxel_size * 1.5
-    result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
-        source_down,
-        target_down,
-        source_fpfh,
-        target_fpfh,
-        True,
-        distance_threshold,
-        o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
-        3,
-        [
-            o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
-            o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
-        ],
-        o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
-
-    return result.transformation
-
-
-def object_icp(
-        target: o3d.geometry.PointCloud,
-        source: Union[o3d.geometry.PointCloud, str],
-        ransac_voxel_size: float = 0.005,
-        icp_voxel_radius: Union[List[float], Tuple[float, ...], None] = (0.004, 0.002, 0.001),
-        icp_max_iter: Union[List[int], Tuple[int, ...], None] = (50, 30, 14),
-):
-    if isinstance(source, str):
-        source = o3d.io.read_point_cloud(source)
-    elif isinstance(source, o3d.geometry.PointCloud):
-        pass
-    else:
-        raise TypeError(f"Unsupported Type {type(source)}")
-
-    voxel_size = 0.005  # means 5mm for this dataset
-    source_down, target_down, source_fpfh, target_fpfh = _prepare_dataset(source, target, voxel_size)
-
-    T = _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, ransac_voxel_size)
-
-    for scale in range(3):
-        iter = icp_max_iter[scale]
-        radius = icp_voxel_radius[scale]
-        # print([iter, radius, scale])
-
-        source_down = source.voxel_down_sample(radius)
-        target_down = target.voxel_down_sample(radius)
-
-        source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
-        target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
-
-        result_icp = o3d.pipelines.registration.registration_icp(
-            source_down,
-            target_down,
-            radius * 5,
-            T,
-            o3d.pipelines.registration.TransformationEstimationPointToPoint(),
-            o3d.pipelines.registration.ICPConvergenceCriteria(
-                relative_fitness=1e-6,
-                relative_rmse=1e-6,
-                max_iteration=iter
-            )
-        )
-
-        T = result_icp.transformation
-
-    _draw_pointcloud([source.transform(T), target], T)
-    
-    return T
-
-
-if __name__ == "__main__":
-    target = o3d.io.read_point_cloud("pointcloud/pointcloud_0.pcd")
-    source = o3d.io.read_point_cloud("pointcloud/bottle_model.pcd")
-    Rmat = object_icp(target, source)
-
-
--- a/vision_detect/vision_detect/action_client_node.py
+++ b/vision_detect/vision_detect/action_client_node.py
@@ -0,0 +1,72 @@
+import rclpy
+from rclpy.node import Node
+from rclpy.action import ActionClient
+from interfaces.action import VisionObjectRecognition  # 修改为你的包名
+
+class VisionObjectRecognitionClient(Node):
+    def __init__(self):
+        super().__init__('vision_object_recognition_client')
+        self._action_client = ActionClient(
+            self,
+            VisionObjectRecognition,
+            '/vision_object_recognition'
+        )
+
+        # 示例目标参数
+        self.camera_position = ["left"]
+        self.classes = ["medical_box"]
+
+        # 创建 1 秒定时器
+        self.timer = self.create_timer(1.0, self.timer_callback)
+
+    def timer_callback(self):
+        if not self._action_client.server_is_ready():
+            self.get_logger().info("Waiting for action server...")
+            return
+
+        # 发送目标
+        goal_msg = VisionObjectRecognition.Goal()
+        goal_msg.camera_position = self.camera_position
+        goal_msg.classes = self.classes
+
+        self.get_logger().info(f"Sending goal: camera_position={self.camera_position}, classes={self.classes}")
+        future = self._action_client.send_goal_async(
+            goal_msg,
+            feedback_callback=self.feedback_callback
+        )
+        future.add_done_callback(self.goal_response_callback)
+
+    def feedback_callback(self, feedback_msg):
+        feedback = feedback_msg.feedback
+        self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
+
+    def goal_response_callback(self, future_response):
+        goal_handle = future_response.result()
+        if not goal_handle.accepted:
+            self.get_logger().warn("Goal rejected by server")
+            return
+
+        self.get_logger().info("Goal accepted by server")
+        result_future = goal_handle.get_result_async()
+        result_future.add_done_callback(self.result_callback)
+
+    def result_callback(self, future_result):
+        result = future_result.result().result
+        self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
+        for obj in result.objects:
+            self.get_logger().info(
+                f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
+            )
+
+
+def main(args=None):
+    rclpy.init(args=args)
+    client = VisionObjectRecognitionClient()
+    rclpy.spin(client)
+    client.destroy_node()
+    if rclpy.ok():
+        rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/vision_detect/vision_detect/action_client_once.py
+++ b/vision_detect/vision_detect/action_client_once.py
@@ -0,0 +1,76 @@
+import rclpy
+from rclpy.node import Node
+from rclpy.action import ActionClient
+from interfaces.action import VisionObjectRecognition
+
+
+class VisionObjectRecognitionClient(Node):
+    def __init__(self):
+        super().__init__('vision_object_recognition_client')
+        self._action_client = ActionClient(
+            self,
+            VisionObjectRecognition,
+            '/vision_object_recognition'
+        )
+
+        # 示例目标参数
+        self.camera_position = ["left"]
+        self.classes = ["medicine_box"]
+        # self.classes = []
+
+        # 创建 1 秒定时器
+        self.timer = self.create_timer(5.0, self.timer_callback)
+
+    def timer_callback(self):
+        if not self._action_client.server_is_ready():
+            self.get_logger().info("Waiting for action server...")
+            return
+
+        self.timer.cancel()
+
+        # 发送目标
+        goal_msg = VisionObjectRecognition.Goal()
+        goal_msg.camera_position = self.camera_position
+        goal_msg.classes = self.classes
+
+        self.get_logger().info(f"Sending goal: camera_position={self.camera_position}, classes={self.classes}")
+        future = self._action_client.send_goal_async(
+            goal_msg,
+            feedback_callback=self.feedback_callback
+        )
+        future.add_done_callback(self.goal_response_callback)
+
+    def feedback_callback(self, feedback_msg):
+        feedback = feedback_msg.feedback
+        self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
+
+    def goal_response_callback(self, future_response):
+        goal_handle = future_response.result()
+        if not goal_handle.accepted:
+            self.get_logger().warn("Goal rejected by server")
+            return
+
+        self.get_logger().info("Goal accepted by server")
+        result_future = goal_handle.get_result_async()
+        result_future.add_done_callback(self.result_callback)
+
+    def result_callback(self, future_result):
+        result = future_result.result().result
+        self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
+        for obj in result.objects:
+            self.get_logger().info(
+                f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
+            )
+
+
+def main(args=None):
+    rclpy.init(args=args)
+    client = VisionObjectRecognitionClient()
+    rclpy.spin(client)
+    client.destroy_node()
+    if rclpy.ok():
+        rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/vision_detect/vision_detect/action_client_once_concurrent.py
+++ b/vision_detect/vision_detect/action_client_once_concurrent.py
@@ -0,0 +1,82 @@
+import rclpy
+from rclpy.node import Node
+from rclpy.action import ActionClient
+from interfaces.action import VisionObjectRecognition
+
+
+class VisionObjectRecognitionClient(Node):
+    def __init__(self):
+        super().__init__('vision_object_recognition_client')
+        self._action_client = ActionClient(
+            self,
+            VisionObjectRecognition,
+            '/vision_object_recognition'
+        )
+
+        # 示例目标参数
+        self.camera_position = ["left"]
+        self.classes = ["medicine_box"]
+        # self.classes = []
+
+        # 创建 1 秒定时器
+        self.timer = self.create_timer(5.0, self.timer_callback)
+
+    def timer_callback(self):
+        if not self._action_client.server_is_ready():
+            self.get_logger().info("Waiting for action server...")
+            return
+
+        self.timer.cancel()
+
+        # 发送目标
+        for i in range(2):
+            goal_msg = VisionObjectRecognition.Goal()
+            goal_msg.camera_position = self.camera_position
+            goal_msg.classes = self.classes
+
+            self.get_logger().info(
+                f"Sending goal {i}: "
+                f"camera_position={self.camera_position}, "
+                f"classes={self.classes}"
+            )
+
+            future = self._action_client.send_goal_async(
+                goal_msg,
+                feedback_callback=self.feedback_callback
+            )
+            future.add_done_callback(self.goal_response_callback)
+
+    def feedback_callback(self, feedback_msg):
+        feedback = feedback_msg.feedback
+        self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
+
+    def goal_response_callback(self, future_response):
+        goal_handle = future_response.result()
+        if not goal_handle.accepted:
+            self.get_logger().warn("Goal rejected by server")
+            return
+
+        self.get_logger().info("Goal accepted by server")
+        result_future = goal_handle.get_result_async()
+        result_future.add_done_callback(self.result_callback)
+
+    def result_callback(self, future_result):
+        result = future_result.result().result
+        self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
+        for obj in result.objects:
+            self.get_logger().info(
+                f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
+            )
+
+
+def main(args=None):
+    rclpy.init(args=args)
+    client = VisionObjectRecognitionClient()
+    rclpy.spin(client)
+    client.destroy_node()
+    if rclpy.ok():
+        rclpy.shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/vision_detect/vision_detect/calculate.py
+++ b/vision_detect/vision_detect/calculate.py
@@ -1,260 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-import transforms3d as tfs
-import numpy as np
-import math
-import json
-
-import rclpy
-from rclpy.node import Node
-
-
-def get_matrix_quat(x, y, z, rw, rx, ry, rz):
-    """从单位四元数构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    q = [rw, rx, ry, rz]
-    rmat = tfs.quaternions.quat2mat(q)
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def get_matrix_eular_radu(x, y, z, rx, ry, rz):
-    """从欧拉角构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    rmat = tfs.euler.euler2mat(
-        # math.radians(rx), math.radians(ry), math.radians(rz)
-        rx, ry, rz
-    )
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def get_matrix_rotvector(x, y, z, rx, ry, rz):
-    """从旋转向量构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    rvec = np.array([rx, ry, rz])
-    theta = np.linalg.norm(rvec)
-    if theta < 1e-8:
-        rmat = np.eye(3)  # 小角度直接返回单位矩阵
-    else:
-        axis = rvec / theta
-        rmat = tfs.axangles.axangle2mat(axis, theta)
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def skew(v):
-    return np.array([[0, -v[2], v[1]],
-                     [v[2], 0, -v[0]],
-                     [-v[1], v[0], 0]])
-
-
-def R2P(T):
-    """旋转矩阵 --> 修正罗德里格斯向量"""
-    axis, angle= tfs.axangles.mat2axangle(T[0:3, 0:3])
-    P = 2 * math.sin(angle / 2) * axis
-    return P
-
-
-def P2R(P):
-    """修正罗德里格斯向量 --> 旋转矩阵"""
-    P2 = np.dot(P.T, P)
-    part_1 = (1 - P2 / 2) * np.eye(3)
-    part_2 = np.add(np.dot(P, P.T), np.sqrt(4- P2) * skew(P.flatten().T))
-    R = np.add(part_1, np.divide(part_2, 2))
-    return R
-
-
-def calculate(Hgs, Hcs):
-    """计算标定矩阵"""
-    # H代表矩阵、h代表标量
-    Hgijs = []
-    Hcijs = []
-    A = []
-    B = []
-    size = 0
-    for i in range(len(Hgs)):
-        for j in range(i + 1, len(Hgs)):
-            size += 1
-            Hgij = np.dot(np.linalg.inv(Hgs[j]), Hgs[i])
-            Hgijs.append(Hgij)
-            Pgij = np.dot(2, R2P(Hgij))
-
-            Hcij = np.dot(Hcs[j], np.linalg.inv(Hcs[i]))
-            Hcijs.append(Hcij)
-            Pcij = np.dot(2, R2P(Hcij))
-
-            A.append(skew(np.add(Pgij, Pcij)))
-            B.append(np.subtract(Pcij, Pgij).reshape(3, 1))
-
-    MA = np.vstack(A)
-    MB = np.vstack(B)
-    Pcg_ = np.dot(np.linalg.pinv(MA), MB)
-    pcg = np.sqrt(np.add(1, np.dot(Pcg_.T, Pcg_)))
-    Pcg = np.dot(np.dot(2, Pcg_), np.linalg.inv(pcg))
-    Rcg = P2R(Pcg).reshape(3, 3)
-
-    A = []
-    B = []
-    id = 0
-    for i in range(len(Hgs)):
-        for j in range(i + 1, len(Hgs)):
-            Hgij = Hgijs[id]
-            Hcij = Hcijs[id]
-            A.append(np.subtract(Hgij[0:3, 0:3], np.eye(3, 3)))
-            B.append(np.subtract(np.dot(Rcg, Hcij[0:3, 3:4]), Hgij[0:3, 3:4]))
-            id += 1
-
-    MA = np.asarray(A).reshape(size * 3, 3)
-    MB = np.asarray(B).reshape(size * 3, 1)
-    Tcg = np.dot(np.linalg.pinv(MA), MB).reshape(3, )
-
-    return tfs.affines.compose(Tcg, np.squeeze(Rcg), [1, 1, 1])
-
-
-class Calibration(Node):
-    def __init__(self, name):
-        super(Calibration, self).__init__(name)
-        self.sync_subscriber = None
-        self.sub_camera_pose = None
-        self.sub_hand_pose = None
-        self.Hgs, self.Hcs = [], []
-        self.hand, self.camera = [], []
-        self.calibration_matrix = None
-
-        self.declare_parameter('matrix_name', 'eye_to_hand')
-        self.matrix_name = self.get_parameter('matrix_name').value
-
-        self.declare_parameter('mode', 'eye_to_hand')
-        self.mode = self.get_parameter('mode').value.lower()
-
-        if self.mode not in ['eye_in_hand', 'eye_to_hand']:
-            raise ValueError("mode must be 'eye_in_hand' or 'eye_to_hand'")
-
-        self.declare_parameter('input', 'quat')
-        self.input = self.get_parameter('input').value.lower()
-        if self.input == 'eular':
-            self.function = get_matrix_eular_radu
-        elif self.input == 'rotvertor':
-            self.function = get_matrix_rotvector
-        elif self.input == 'quat':
-            self.function = get_matrix_quat
-        else:
-            raise ValueError("INPUT ERROR")
-        
-        self.declare_parameter('camera_pose_path', 'camera_pose_data.json')
-        self.declare_parameter('hand_pose_path', 'hand_pose_data.json')
-        self.camera_pose_path = self.get_parameter('camera_pose_path').value
-        self.hand_pose_path = self.get_parameter('hand_pose_path').value
-
-        self.get_pose()
-        
-        self.done = False
-
-    def get_pose(self):
-        with open(f'{self.camera_pose_path}', 'r', encoding='utf-8') as f:
-            self.camera = json.load(f)
-
-        with open(f'{self.hand_pose_path}', 'r', encoding='utf-8') as f:
-            self.hand = json.load(f)
-
-        self.calculate_calibration()
-
-        print(self.hand)
-        print(self.camera)
-
-        self.get_logger().info(f"{self.calibration_matrix}")
-        hand_eye_result = {
-            "T": self.calibration_matrix.tolist()
-        }
-        with open(f"{self.matrix_name}_matrix.json", "w") as f:
-            json.dump(hand_eye_result, f, indent=4)
-        self.get_logger().info(f"Save as {self.matrix_name}_matrix.json")
-
-        self.done = True
-
-    def calculate_data(self):
-        if self.input == 'quat':
-            for i in range(0, len(self.hand), 7):
-                self.Hgs.append(
-                    np.linalg.inv(
-                        self.function(
-                            self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                            self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
-                        )
-                    )
-                    if self.mode == 'eye_to_hand' else
-                    self.function(
-                        self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                        self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
-                    )
-                )
-                self.Hcs.append(
-                    self.function(
-                        self.camera[i], self.camera[i + 1], self.camera[i + 2],
-                        self.camera[i + 3], self.camera[i + 4], self.camera[i + 5], self.camera[i + 6]
-                    )
-                )
-
-        else:
-            for i in range(0, len(self.hand), 6):
-                self.Hgs.append(
-                    np.linalg.inv(
-                        self.function(
-                            self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                            self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
-                        )
-                    )
-                    if self.mode == 'eye_to_hand' else
-                    self.function(
-                        self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                        self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
-                    )
-                )
-                self.Hcs.append(
-                    self.function(
-                        self.camera[i], self.camera[i + 1], self.camera[i + 2],
-                        self.camera[i + 3], self.camera[i + 4], self.camera[i + 5]
-                    )
-                )
-
-
-    def calculate_calibration(self):
-        self.calculate_data()
-        self.calibration_matrix = calculate(self.Hgs, self.Hcs)
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = Calibration('calibration')
-    try:
-        while rclpy.ok() and not node.done:
-            rclpy.spin_once(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
-
--- a/vision_detect/vision_detect/crossboard_detect.py
+++ b/vision_detect/vision_detect/crossboard_detect.py
@@ -1,323 +0,0 @@
-import os
-from collections import defaultdict
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import open3d as o3d
-import numpy as np
-import transforms3d as tfs
-from cv_bridge import CvBridge
-
-import rclpy
-from rclpy.node import Node
-from message_filters import ApproximateTimeSynchronizer, Subscriber
-
-from sensor_msgs.msg import Image, CameraInfo
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def pca(data, sort=True):
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):
-    """计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=1000.0,
-        depth_trunc=2,
-    )
-
-    point_cloud = point_cloud.remove_non_finite_points()
-
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
-    clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
-
-    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    center = clean_pcd.get_center()
-    x, y, z = center
-
-    w, v = pca(np.asarray(clean_pcd.points))
-
-    if w is not None and v is not None:
-        vx, vy, vz = v[:,0], v[:,1], v[:,2]
-
-        if vx[0] < 0:
-            vx = -vx
-        if vy[1] < 0:
-            vy = -vy
-        if not np.allclose(np.cross(vx, vy), vz):
-            vz = -vz
-
-        R = np.column_stack((vx, vy, vz))
-        rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
-
-        rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-
-        # point = [
-        #     [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
-        #     [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        #     ]  # 画点：原点、第一主成分、第二主成分
-        # lines = [
-        #     [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
-        # ]  # 画出三点之间两两连线
-        # colors = [
-        #     [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        # ]
-        # # 构造open3d中的LineSet对象，用于主成分显示
-        # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
-        # line_set.colors = o3d.utility.Vector3dVector(colors)
-        # o3d.visualization.draw_geometries([point_cloud, line_set])
-        # o3d.visualization.draw_geometries([clean_pcd, line_set])
-
-        return x, y, z, rw, rx, ry, rz
-
-    else:
-        return 0.0, 0.0, 0.0, None, None, None, None
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(depth_img, mask):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    ys, xs = np.where(mask > 0)
-
-    x_min, x_max = int(round(xs.min())), int(round(xs.max()))
-    y_min, y_max = int(round(ys.min())), int(round(ys.max()))
-
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-
-class DetectNode(Node):
-    def __init__(self, name):
-        super().__init__(name)
-        self.output_boxes = None
-        self.output_masks = None
-        self.K = self.D = None
-        self.map1 = self.map2 = None
-        self.intrinsics = None
-
-        self.function = self._test_image
-        self.calculate_function = calculate_pose_cpu
-        self.cv_bridge = CvBridge()
-
-        '''init'''
-        self._init_param()
-        self._init_publisher()
-        self._init_subscriber()
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', False)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('color_image_topic', '/camera/color/image_raw')
-        self.color_image_topic = self.get_parameter('color_image_topic').value
-
-        self.declare_parameter('depth_image_topic', '/camera/depth/image_raw')
-        self.depth_image_topic = self.get_parameter('depth_image_topic').value
-
-        self.declare_parameter('camera_info_topic', '/camera/color/camera_info')
-        self.camera_info_topic = self.get_parameter('camera_info_topic').value
-
-    def _init_publisher(self):
-        """init_publisher"""
-        self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_subscriber(self):
-        """init_subscriber"""
-        self.sub_camera_info = self.create_subscription(
-            CameraInfo,
-            self.camera_info_topic,
-            self._camera_info_callback,
-            10
-        )
-
-        '''sync get color and depth img'''
-        self.sub_color_image = Subscriber(self, Image, self.color_image_topic)
-        self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic)
-
-        self.sync_subscriber = ApproximateTimeSynchronizer(
-            [self.sub_color_image, self.sub_depth_image],
-            queue_size=10,
-            slop=0.1
-        )
-        self.sync_subscriber.registerCallback(self._sync_callback)
-
-    def _camera_info_callback(self, msg: CameraInfo):
-        """Get camera info"""
-        self.K = msg.k
-        self.D = msg.d
-
-        self.camera_size = [msg.width, msg.height]
-
-        if self.K is not None and self.D is not None:
-            self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
-
-            if len(self.D) != 0:
-                self.destroy_subscription(self.sub_camera_info)
-            else:
-                self.D = [0, 0, 0, 0, 0]
-                self.destroy_subscription(self.sub_camera_info)
-        else:
-            raise "K and D are not defined" 
-
-
-    def _sync_callback(self, color_img_ros, depth_img_ros):
-        """同步回调函数"""
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
-
-        img, pose_dict = self.function(color_img_cv, depth_img_cv)
-
-        """masks为空，结束这一帧"""
-        if img is None:
-            img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image.publish(img)
-
-        if pose_dict:
-            pose_list_all = PoseArrayClassAndID()
-            for (class_id, class_name), pose_list in pose_dict.items():
-                pose_list_all.objects.append(
-                    PoseClassAndID(
-                        class_name = class_name,
-                        class_id = class_id,
-                        pose_list = pose_list
-                    )
-                )
-            pose_list_all.header.stamp = self.get_clock().now().to_msg()
-            pose_list_all.header.frame_id = "pose_list"
-            self.pub_pose_list.publish(pose_list_all)
-
-    def _test_image(self, rgb_img, depth_img):
-        pose_dict = defaultdict(list)
-        rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
-        pattern_size = (8, 5)
-        # pattern_size = (15, 7)
-        ret, corners = cv2.findChessboardCorners(rgb_img_gray, pattern_size, cv2.CALIB_CB_FAST_CHECK)
-        if ret:
-            # 角点亚像素精确化（提高标定精度）
-            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
-            corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
-
-            corners_subpix = corners_subpix.reshape(pattern_size[1], pattern_size[0], 2)
-            mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
-
-            for i in range(0, pattern_size[1] - 1):
-                for j in range(0, pattern_size[0] - 1):
-                    pts = np.array([
-                        corners_subpix[i, j],
-                        corners_subpix[i, j + 1],
-                        corners_subpix[i + 1, j + 1],
-                        corners_subpix[i + 1, j]
-                    ], dtype=np.int32)
-                    cv2.fillConvexPoly(mask, pts, 1)
-
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-            orig_shape = rgb_img_gray.shape
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask)
-
-            if depth_crop is None:
-                return None, None
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
-            )
-
-            x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics)
-            self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_dict[int(99), 'crossboard'].append(pose)
-
-            cv2.putText(rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}', (0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-            cv2.putText(rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}', (0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        else:
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/detect_bottle_service.py
+++ b/vision_detect/vision_detect/detect_bottle_service.py
@@ -1,575 +0,0 @@
-import os
-import time
-import threading
-import ast
-from collections import defaultdict
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import open3d as o3d
-import numpy as np
-import transforms3d as tfs
-from cv_bridge import CvBridge
-
-import torch
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-
-from sensor_msgs.msg import Image
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, ImgMsg
-from interfaces.srv import VisionObjectRecognition
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def draw_pointcloud(pcd, T):
-
-    R = T[0:3, 0:3]
-    point = T[0:3, 3:4].flatten()
-    x, y, z = R[:, 0], R[:, 1], R[:, 2]
-
-    points = [
-        [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
-        point, point + x, point + y, point + z
-
-    ]  # 画点：原点、第一主成分、第二主成分
-    lines = [
-        [0, 1], [0, 2], [0, 3],
-        [4, 5], [4, 6], [4, 7]
-    ]  # 画出三点之间两两连线
-    colors = [
-        [1, 0, 0], [0, 1, 0], [0, 0, 1],
-        [1, 0, 0], [0, 1, 0], [0, 0, 1]
-    ]
-
-    line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
-    line_set.colors = o3d.utility.Vector3dVector(colors)
-
-    pcd.append(line_set)
-    o3d.visualization.draw_geometries(pcd)
-
-
-def preprocess_point_cloud(pcd, voxel_size):
-
-    pcd_down = pcd.voxel_down_sample(voxel_size)
-    radius_normal = voxel_size * 2
-
-    pcd_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
-
-    radius_feature = voxel_size * 5
-
-    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
-        pcd_down,
-        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
-    )
-
-    return pcd_down, pcd_fpfh
-
-def prepare_dataset(source, target, voxel_size):
-
-    trans_init = np.identity(4)
-    source.transform(trans_init)
-
-    source_down, source_fpfh = preprocess_point_cloud(source, voxel_size)
-    target_down, target_fpfh = preprocess_point_cloud(target, voxel_size)
-
-    return source_down, target_down, source_fpfh, target_fpfh
-
-
-def execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size):
-
-    distance_threshold = voxel_size * 1.5
-    result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
-        source_down,
-        target_down,
-        source_fpfh,
-        target_fpfh,
-        True,
-        distance_threshold,
-        o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
-        3,
-        [
-            o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
-            o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
-        ],
-        o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
-
-    return result.transformation
-
-
-def object_icp(
-        target: o3d.geometry.PointCloud,
-        source: o3d.geometry.PointCloud | str,
-        ransac_voxel_size: float = 0.005,
-        icp_voxel_radius: list[float] | None = None,
-        icp_max_iter: list[int] | None = None,
-):
-
-    if icp_voxel_radius is None:
-        icp_voxel_radius = [0.004, 0.002, 0.001]
-
-    if icp_max_iter is None:
-        icp_max_iter = [50, 30, 14]
-
-    if isinstance(source, str):
-        source = o3d.io.read_point_cloud(source)
-    elif isinstance(source, o3d.geometry.PointCloud):
-        pass
-    else:
-        raise TypeError(f"Unsupported Type {type(source)}")
-
-    voxel_size = 0.005  # means 5mm for this dataset
-    source_down, target_down, source_fpfh, target_fpfh = prepare_dataset(source, target, voxel_size)
-
-    T = execute_global_registration(
-        source_down,
-        target_down,
-        source_fpfh,
-        target_fpfh,
-        ransac_voxel_size
-    )
-
-    for scale in range(3):
-        iter = icp_max_iter[scale]
-        radius = icp_voxel_radius[scale]
-        # print([iter, radius, scale])
-
-        source_down = source.voxel_down_sample(radius)
-        target_down = target.voxel_down_sample(radius)
-
-        source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
-        target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
-
-        result_icp = o3d.pipelines.registration.registration_icp(
-            source_down,
-            target_down,
-            radius * 5,
-            T,
-            o3d.pipelines.registration.TransformationEstimationPointToPoint(),
-            o3d.pipelines.registration.ICPConvergenceCriteria(
-                relative_fitness=1e-6,
-                relative_rmse=1e-6,
-                max_iteration=iter
-            )
-        )
-
-        T = result_icp.transformation
-
-    draw_pointcloud([source.transform(T), target], T)
-    
-    return T
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def pca(data, sort=True):
-    """主成分分析 """
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, model_pcd):
-    """计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=1000.0,
-        depth_trunc=2.0,
-    )
-
-    point_cloud = point_cloud.remove_non_finite_points()
-
-    clean_pcd, _ = point_cloud.remove_radius_outlier(nb_points=10, radius=0.1)
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
-
-    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    
-    rmat = object_icp(model_pcd, clean_pcd)
-
-    x, y, z = rmat[0:3, 3:4].flatten()
-
-    rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-
-    return x, y, z, rw, rx, ry, rz
-
-
-def draw_box(set_confidence, rgb_img, result):
-    """绘制目标检测边界框"""
-    boxes = result.boxes.xywh.cpu().numpy()
-    confidences = result.boxes.conf.cpu().numpy()
-    class_ids = result.boxes.cls.cpu().numpy()
-    labels = result.names
-
-    for i, box in enumerate(boxes):
-        if confidences[i] >= set_confidence:
-            x_center, y_center, width, height = box[:4]
-
-            p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
-            p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
-            cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
-
-            cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-
-
-def draw_mask(set_confidence, rgb_img, result):
-    """绘制实例分割mask"""
-    masks = result.masks.data.cpu().numpy()
-    orig_shape = result.masks.orig_shape
-    confidences = result.boxes.conf.cpu().numpy()
-
-    for i, mask in enumerate(masks):
-        if confidences[i] >= set_confidence:
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-        else:
-            continue
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    """畸变矫正"""
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(depth_img, mask, box):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    x_center, y_center, w, h = box[:4]
-
-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
-
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-
-def calculate_grav_width(mask, K, depth):
-    """计算重心宽度"""
-    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if contours:
-        box = cv2.boxPoints(cv2.minAreaRect(contours[0]))
-        if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
-            point_diff = box[1] - box[0]
-        else:
-            point_diff = box[1] - box[2]
-
-        grab_width = depth * np.sqrt(point_diff[0]**2 / K[0]**2 + point_diff[1]**2 / K[4]**2)
-        return grab_width
-    else:
-        return 0.0
-
-
-class DetectNode(Node):
-    def __init__(self, name, mode):
-        super().__init__(name)
-        self.mode = mode
-        self.device = None
-        self.checkpoint_path = None
-        self.checkpoint_name = None
-        self.output_boxes = None
-        self.output_masks = None
-        self.function = None
-        self.calculate_function = None
-
-        self.K = None
-
-        self.fx = self.fy = 0.5
-        self.camera_data = {}
-
-        self.cv_bridge = CvBridge()
-        self.lock = threading.Lock()
-        
-        '''init'''
-        self._init_param()
-
-        self.source_model = o3d.io.read_point_cloud(self.source_model_path)
-
-        if mode == 'detect':
-            self._init_model()
-        else:
-            self.function = None
-            self.get_logger().error('Error: Mode Unknown')
-
-        if self.device == 'cpu':
-            self.calculate_function = calculate_pose_cpu
-        elif self.device == 'gpu':
-            raise NotImplementedError('Function: calculate_pose_gpu not implemented')
-        else:
-            raise ValueError(f"device must be cpu or gpu, now {self.device}")
-
-        self._init_publisher()
-        self._init_subscriber()
-        self._init_service()
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
-        self.checkpoint_name = self.get_parameter('checkpoint_name').value
-        self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
-
-        self.source_model_path = os.path.join(share_dir, 'pointclouds/bottle_model.pcd')
-
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', False)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('set_confidence', 0.25)
-        self.set_confidence = self.get_parameter('set_confidence').value
-
-        self.declare_parameter('device', 'cpu')
-        self.device = self.get_parameter('device').value
-
-        self.declare_parameter('classes', 'None')
-        self.classes = ast.literal_eval(self.get_parameter('classes').value)
-
-    def _init_model(self):
-        """init model"""
-        device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        try:
-            self.model = YOLO(self.checkpoint_path).to(device_model)
-        except Exception as e:
-            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        if self.checkpoint_name.endswith('-seg.pt'):
-            self.function = self._seg_image
-        else:
-            self.function = None
-            self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
-
-    def _init_publisher(self):
-        """init publisher"""
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_service(self):
-        """init service server"""
-        self.server = self.create_service(
-            VisionObjectRecognition, 
-            "/vision_object_recognition", 
-            self._service_callback
-        )
-
-    def _init_subscriber(self):
-        """init subscriber"""
-        self.sub_img = self.create_subscription(
-            ImgMsg,
-            "/img_msg",
-            self._sub_callback, 
-            10
-        )
-
-    def _sub_callback(self, msg):
-        """同步回调函数"""
-        with self.lock:
-            self.camera_data[msg.position] = [
-                msg.image_color, 
-                msg.image_depth, 
-                msg.karr, 
-                msg.darr
-            ]
-
-    def _service_callback(self, request, response):
-        response.header.stamp = self.get_clock().now().to_msg()
-        response.header.frame_id = "camera_detect"
-
-        with self.lock:
-            if request.camera_position in self.camera_data:
-                color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
-            else:
-                response.success = False
-                response.info = f"{request.camera_position} Camera data is empty or name is wrong"
-                response.objects = []
-
-                return response
-        
-        self.camera_size = [color_img_ros.width, color_img_ros.height]
-        
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        map1, map2, self.K = get_map(self.K, D, self.camera_size)
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
-
-        img, pose_dict = self.function(color_img_cv, depth_img_cv)
-
-        """masks为空，结束这一帧"""
-        if self.output_boxes or self.output_masks:
-            if img is None:
-                img = color_img_ros
-            self.pub_detect_image.publish(img)
-
-        if pose_dict:
-            response.info = "Success get pose"
-            response.success = True
-            # self.get_logger().info('get_pose')
-            for (class_id, class_name), pose_list in pose_dict.items():
-                response.objects.append(
-                    PoseClassAndID(
-                        class_name = class_name,
-                        class_id = class_id,
-                        pose_list = pose_list
-                    )
-                )
-        
-        else:
-            response.info = "pose dict is empty"
-            response.success = False
-            response.objects = []
-
-        return response
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
-        """Use segmentation model"""
-        pose_dict = defaultdict(list)
-
-        '''Get Predict Results'''
-        # time1 = time.time()
-        results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[39])
-        # time2 = time.time()
-        result = results[0]
-
-        '''Get masks'''
-        if result.masks is None or len(result.masks) == 0:
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-        orig_shape = result.masks.orig_shape
-
-        '''Get boxes'''
-        boxes = result.boxes.data.cpu().numpy()
-
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        # time3 = time.time()
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
-            
-            if depth_crop is None:
-                continue
-
-            if mask.shape[0] >= (orig_shape[0] * 0.5) and mask.shape[1] >= (orig_shape[1] * 0.5):
-                mask_crop = cv2.resize(mask_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
-                depth_crop = cv2.resize(depth_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
-                intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0] * self.fx),
-                int(self.camera_size[1] * self.fy),
-                self.K[0] * self.fx,
-                self.K[4] * self.fy,
-                (self.K[2] - x_min) * self.fx,
-                (self.K[5] - y_min) * self.fy
-            )
-            else:
-                intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                    int(self.camera_size[0]),
-                    int(self.camera_size[1]),
-                    self.K[0],
-                    self.K[4],
-                    self.K[2] - x_min,
-                    self.K[5] - y_min
-                )
-
-            x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, self.source_model)
-
-            grab_width = calculate_grav_width(mask, self.K, z)
-            z = z + grab_width * 0.45
-
-            if (x, y, z) == (None, None, None):
-                self.get_logger().error("have wrong pose")
-                continue
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
-
-        # time4 = time.time()
-
-
-        # self.get_logger().info(f'start')
-        # self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
-        # self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
-        # self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
-        # self.get_logger().info(f'{(time4 - time1)*1000} ms, completing a picture entire process')
-        # self.get_logger().info(f'end')
-
-        '''mask_img and box_img is or not output'''
-        if self.output_boxes and not self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif self.output_boxes and self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif not self.output_boxes and self.output_masks:
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        else:
-            return None, pose_dict
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect', 'detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/detect_box_service.py
+++ b/vision_detect/vision_detect/detect_box_service.py
@@ -1,446 +0,0 @@
-import os
-import time
-import threading
-import ast
-from collections import defaultdict
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import open3d as o3d
-import numpy as np
-import transforms3d as tfs
-from cv_bridge import CvBridge
-
-import torch
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-
-from sensor_msgs.msg import Image
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, ImgMsg
-from interfaces.srv import VisionObjectRecognition
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def pca(data, sort=True):
-    """主成分分析 """
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
-    """计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=1000.0,
-        depth_trunc=8.0,
-    )
-
-    point_cloud = point_cloud.remove_non_finite_points()
-
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
-    clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
-
-    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    center = clean_pcd.get_center()
-    x, y, z = center
-
-    w, v = pca(np.asarray(clean_pcd.points))
-
-    if w is not None and v is not None:
-        vx, vy, vz = v[:,0], v[:,1], v[:,2]
-
-        if vx[0] < 0:
-            vx = -vx
-        if vy[1] < 0:
-            vy = -vy
-        if not np.allclose(np.cross(vx, vy), vz):
-            vz = -vz
-
-        R = np.column_stack((vx, vy, vz))
-        rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
-        rmat = hand_eye_mat @ rmat
-
-        rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-        x, y, z = rmat[0:3, 3].flatten()
-
-        # point = [
-        #     [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
-        #     [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        #     ]  # 画点：原点、第一主成分、第二主成分
-        # lines = [
-        #     [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
-        # ]  # 画出三点之间两两连线
-        # colors = [
-        #     [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        # ]
-        # # 构造open3d中的LineSet对象，用于主成分显示
-        # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
-        # line_set.colors = o3d.utility.Vector3dVector(colors)
-        # o3d.visualization.draw_geometries([point_cloud, line_set])
-        # o3d.visualization.draw_geometries([clean_pcd, line_set])
-
-        return x, y, z, rw, rx, ry, rz
-
-
-def draw_box(set_confidence, rgb_img, result):
-    """绘制目标检测边界框"""
-    boxes = result.boxes.xywh.cpu().numpy()
-    confidences = result.boxes.conf.cpu().numpy()
-    class_ids = result.boxes.cls.cpu().numpy()
-    labels = result.names
-
-    for i, box in enumerate(boxes):
-        if confidences[i] >= set_confidence:
-            x_center, y_center, width, height = box[:4]
-
-            p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
-            p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
-            cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
-
-            cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-
-
-def draw_mask(set_confidence, rgb_img, result):
-    """绘制实例分割mask"""
-    masks = result.masks.data.cpu().numpy()
-    orig_shape = result.masks.orig_shape
-    confidences = result.boxes.conf.cpu().numpy()
-
-    for i, mask in enumerate(masks):
-        if confidences[i] >= set_confidence:
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-        else:
-            continue
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    """畸变矫正"""
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(depth_img, mask, box):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    x_center, y_center, w, h = box[:4]
-
-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
-
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-
-
-class DetectNode(Node):
-    def __init__(self, name, mode):
-        super().__init__(name)
-        self.mode = mode
-        self.device = None
-        self.checkpoint_path = None
-        self.checkpoint_name = None
-        self.output_boxes = None
-        self.output_masks = None
-        self.function = None
-        self.calculate_function = None
-        self.eye_in_hand_mat = None
-        self.eye_to_hand_mat = None
-
-        self.K = None
-
-        self.fx = self.fy = 0.5
-        self.camera_data = {}
-
-        self.cv_bridge = CvBridge()
-        self.lock = threading.Lock()
-        
-        '''init'''
-        self._init_param()
-
-
-        if mode == 'detect':
-            self._init_model()
-        else:
-            self.function = None
-            self.get_logger().error('Error: Mode Unknown')
-
-        if self.device == 'cpu':
-            self.calculate_function = calculate_pose_cpu
-        elif self.device == 'gpu':
-            raise NotImplementedError('Function: calculate_pose_gpu not implemented')
-        else:
-            raise ValueError(f"device must be cpu or gpu, now {self.device}")
-
-        self._init_publisher()
-        self._init_subscriber()
-        self._init_service()
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('checkpoint_name', 'hivecorebox-seg.pt')
-        self.checkpoint_name = self.get_parameter('checkpoint_name').value
-        self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
-
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', True)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('set_confidence', 0.60)
-        self.set_confidence = self.get_parameter('set_confidence').value
-
-        self.declare_parameter('device', 'cpu')
-        self.device = self.get_parameter('device').value
-
-        self.declare_parameter('classes', 'None')
-        self.classes = ast.literal_eval(self.get_parameter('classes').value)
-
-        self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
-                                              0.0, 1.0, 0.0, 0.0,
-                                              0.0, 0.0, 1.0, 0.0,
-                                              0.0, 0.0, 0.0, 1.0])
-        self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
-
-        self.declare_parameter('eye_to_hand', [1.0, 0.0, 0.0, 0.0,
-                                              0.0, 1.0, 0.0, 0.0,
-                                              0.0, 0.0, 1.0, 0.0,
-                                              0.0, 0.0, 0.0, 1.0])
-        self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
-
-    def _init_model(self):
-        """init model"""
-        device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        try:
-            self.model = YOLO(self.checkpoint_path).to(device_model)
-        except Exception as e:
-            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        if self.checkpoint_name.endswith('-seg.pt'):
-            self.function = self._seg_image
-        else:
-            self.function = None
-            self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
-
-    def _init_publisher(self):
-        """init publisher"""
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_service(self):
-        """init service server"""
-        self.server = self.create_service(
-            VisionObjectRecognition, 
-            "/vision_object_recognition", 
-            self._service_callback
-        )
-
-    def _init_subscriber(self):
-        """init subscriber"""
-        self.sub_img = self.create_subscription(
-            ImgMsg,
-            "/img_msg",
-            self._sub_callback, 
-            10
-        )
-
-    def _sub_callback(self, msg):
-        """同步回调函数"""
-        with self.lock:
-            self.camera_data[msg.position] = [
-                msg.image_color, 
-                msg.image_depth, 
-                msg.karr, 
-                msg.darr
-            ]
-
-    def _service_callback(self, request, response):
-        response.header.stamp = self.get_clock().now().to_msg()
-        response.header.frame_id = "camera_detect"
-
-        with self.lock:
-            if request.camera_position in self.camera_data:
-                color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
-            else:
-                response.success = False
-                response.info = f"{request.camera_position} Camera data is empty or name is wrong"
-                response.objects = []
-
-                return response
-
-        if request.camera_position == 'left' or request.camera_position == 'right':
-            hand_eye_mat = self.eye_in_hand_mat
-        else:
-            hand_eye_mat = self.eye_to_hand_mat
-        
-        self.camera_size = [color_img_ros.width, color_img_ros.height]
-        
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        map1, map2, self.K = get_map(self.K, D, self.camera_size)
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
-
-        img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
-
-        """masks为空，结束这一帧"""
-        if self.output_boxes or self.output_masks:
-            if img is None:
-                img = color_img_ros
-            self.pub_detect_image.publish(img)
-
-        if pose_dict:
-            response.info = "Success get pose"
-            response.success = True
-            # self.get_logger().info('get_pose')
-            for (class_id, class_name), pose_list in pose_dict.items():
-                response.objects.append(
-                    PoseClassAndID(
-                        class_name = class_name,
-                        class_id = class_id,
-                        pose_list = pose_list
-                    )
-                )
-        
-        else:
-            response.info = "pose dict is empty"
-            response.success = False
-            response.objects = []
-
-        return response
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
-        """Use segmentation model"""
-        pose_dict = defaultdict(list)
-
-        depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
-        depth_filter_mask[(depth_img > 0) & (depth_img < 2000)] = 1
-
-        rgb_img[depth_filter_mask == 0] = 0
-
-        '''Get Predict Results'''
-        results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[0])
-        result = results[0]
-
-        '''Get masks'''
-        if result.masks is None or len(result.masks) == 0:
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-        orig_shape = result.masks.orig_shape
-
-        '''Get boxes'''
-        boxes = result.boxes.xywh.cpu().numpy()
-
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
-            
-            if depth_crop is None:
-                continue
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
-            )
-
-            x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
-
-            if (x, y, z) == (None, None, None):
-                self.get_logger().error("have wrong pose")
-                continue
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
-
-        '''mask_img and box_img is or not output'''
-        if self.output_boxes and not self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif self.output_boxes and self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif not self.output_boxes and self.output_masks:
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        else:
-            return None, pose_dict
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect', 'detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/detect_node.py
+++ b/vision_detect/vision_detect/detect_node.py
@@ -1,14 +1,18 @@
 import rclpy
+from rclpy.executors import MultiThreadedExecutor
+
+from vision_detect.vision_core import NodeManager

-from vision_detect.VisionDetect.node import DetectNode

 def main(args=None):
    rclpy.init(args=args)
-    node = DetectNode('detect')
+    node = NodeManager('detect')
    try:
-        rclpy.spin(node)
+        executor = MultiThreadedExecutor()
+        rclpy.spin(node, executor=executor)
    except KeyboardInterrupt:
        pass
    finally:
        node.destroy_node()
-        rclpy.shutdown()
+        if rclpy.ok():
+            rclpy.shutdown()
--- a/vision_detect/vision_detect/detect_service.py
+++ b/vision_detect/vision_detect/detect_service.py
@@ -1,415 +0,0 @@
-import os
-import time
-import threading
-import ast
-from collections import defaultdict
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import open3d as o3d
-import numpy as np
-import transforms3d as tfs
-from cv_bridge import CvBridge
-
-import torch
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-
-from sensor_msgs.msg import Image
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, ImgMsg
-from interfaces.srv import VisionObjectRecognition
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def pca(data, sort=True):
-    """主成分分析 """
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
-    """计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=1000.0,
-        depth_trunc=3.0,
-    )
-
-    point_cloud = point_cloud.remove_non_finite_points()
-
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.020)
-    clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
-
-    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    center = clean_pcd.get_center()
-    x, y, z = center
-
-    w, v = pca(np.asarray(clean_pcd.points))
-
-    if w is not None and v is not None:
-        vx, vy, vz = v[:,0], v[:,1], v[:,2]
-
-        if vx[0] < 0:
-            vx = -vx
-        if vy[1] < 0:
-            vy = -vy
-        if not np.allclose(np.cross(vx, vy), vz):
-            vz = -vz
-
-        R = np.column_stack((vx, vy, vz))
-        rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
-        rmat = hand_eye_mat @ rmat
-        rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-        x, y, z = rmat[0:3, 3].flatten()
-
-        return x, y, z, rw, rx, ry, rz
-
-    else:
-        return 0.0, 0.0, 0.0, None, None, None, None
-
-
-def draw_box(set_confidence, rgb_img, result):
-    """绘制目标检测边界框"""
-    boxes = result.boxes.xywh.cpu().numpy()
-    confidences = result.boxes.conf.cpu().numpy()
-    class_ids = result.boxes.cls.cpu().numpy()
-    labels = result.names
-
-    for i, box in enumerate(boxes):
-        if confidences[i] >= set_confidence:
-            x_center, y_center, width, height = box[:4]
-
-            p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
-            p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
-            cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
-
-            cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-
-
-def draw_mask(set_confidence, rgb_img, result):
-    """绘制实例分割mask"""
-    masks = result.masks.data.cpu().numpy()
-    orig_shape = result.masks.orig_shape
-    confidences = result.boxes.conf.cpu().numpy()
-
-    for i, mask in enumerate(masks):
-        if confidences[i] >= set_confidence:
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-        else:
-            continue
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    """畸变矫正"""
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(depth_img, mask, box):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    x_center, y_center, w, h = box[:4]
-
-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
-
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-
-class DetectNode(Node):
-    def __init__(self, name):
-        super().__init__(name)
-        self.device = None
-        self.checkpoint_path = None
-        self.checkpoint_name = None
-        self.output_boxes = None
-        self.output_masks = None
-        self.function = None
-        self.K = None
-        self.eye_in_hand_mat = None
-        self.eye_to_hand_mat = None
-
-        self.calculate_function = calculate_pose_cpu
-        self.fx = self.fy = 0.5
-        self.camera_data = {}
-        self.cv_bridge = CvBridge()
-        self.lock = threading.Lock()
-        
-        '''init'''
-        self._init_param()
-        self._init_model()
-        self._init_publisher()
-        self._init_subscriber()
-        self._init_service()
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
-        self.checkpoint_name = self.get_parameter('checkpoint_name').value
-        self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
-
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', False)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('set_confidence', 0.25)
-        self.set_confidence = self.get_parameter('set_confidence').value
-
-        self.declare_parameter('classes', 'None')
-        self.classes = ast.literal_eval(self.get_parameter('classes').value)
-
-        self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
-                                              0.0, 1.0, 0.0, 0.0,
-                                              0.0, 0.0, 1.0, 0.0,
-                                              0.0, 0.0, 0.0, 1.0])
-        self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
-
-        self.declare_parameter('eye_to_hand', [1, 0, 0, 0,
-                                              0, 1, 0, 0,
-                                              0, 0, 1, 0,
-                                              0, 0, 0, 1])
-        self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
-
-    def _init_model(self):
-        """init model"""
-        device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        try:
-            self.model = YOLO(self.checkpoint_path).to(device_model)
-        except Exception as e:
-            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        if self.checkpoint_name.endswith('-seg.pt'):
-            self.function = self._seg_image
-        else:
-            self.function = None
-            self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
-
-    def _init_publisher(self):
-        """init publisher"""
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_service(self):
-        """init service server"""
-        self.server = self.create_service(
-            VisionObjectRecognition, 
-            "/vision_object_recognition", 
-            self._service_callback
-        )
-
-    def _init_subscriber(self):
-        """init subscriber"""
-        self.sub_img = self.create_subscription(
-            ImgMsg,
-            "/img_msg",
-            self._sub_callback, 
-            10
-        )
-
-    def _sub_callback(self, msg):
-        """同步回调函数"""
-        with self.lock:
-            self.camera_data[msg.position] = [
-                msg.image_color, 
-                msg.image_depth, 
-                msg.karr, 
-                msg.darr
-            ]
-
-    def _service_callback(self, request, response):
-        response.header.stamp = self.get_clock().now().to_msg()
-        response.header.frame_id = "camera_detect"
-
-        with self.lock:
-            if request.camera_position in self.camera_data:
-                color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
-            else:
-                response.success = False
-                response.info = f"{request.camera_position} Camera data is empty or name is wrong"
-                response.objects = []
-
-                return response
-            
-        if request.camera_position == 'left' or request.camera_position == 'right':
-            hand_eye_mat = self.eye_in_hand_mat
-        else:
-            hand_eye_mat = self.eye_to_hand_mat
-        
-        self.camera_size = [color_img_ros.width, color_img_ros.height]
-        
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        map1, map2, self.K = get_map(self.K, D, self.camera_size)
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
-
-        img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
-
-        """masks为空，结束这一帧"""
-        if self.output_boxes or self.output_masks:
-            if img is None:
-                img = color_img_ros
-            self.pub_detect_image.publish(img)
-
-        if pose_dict:
-            response.info = "Success get pose"
-            response.success = True
-            for (class_id, class_name), pose_list in pose_dict.items():
-                response.objects.append(
-                    PoseClassAndID(
-                        class_name = class_name,
-                        class_id = class_id,
-                        pose_list = pose_list
-                    )
-                )
-
-            return response
-        
-        else:
-            response.info = "pose dict is empty"
-            response.success = False
-            response.objects = []
-
-            return response
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
-        """Use segmentation model"""
-        pose_dict = defaultdict(list)
-
-        '''Get Predict Results'''
-        time1 = time.time()
-        results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=self.classes)
-        time2 = time.time()
-        result = results[0]
-
-        '''Get masks'''
-        if result.masks is None or len(result.masks) == 0:
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-        orig_shape = result.masks.orig_shape
-
-        '''Get boxes'''
-        boxes = result.boxes.xywh.cpu().numpy()
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        time3 = time.time()
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
-            
-            if depth_crop is None:
-                continue
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
-            )
-
-            x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
-
-        time4 = time.time()
-
-        self.get_logger().info(f'start')
-        self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
-        self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
-        self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
-        self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
-        self.get_logger().info(f'end')
-
-        '''mask_img and box_img is or not output'''
-        if self.output_boxes and not self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif self.output_boxes and self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        elif not self.output_boxes and self.output_masks:
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
-        else:
-            return None, pose_dict
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/detect_topic.py
+++ b/vision_detect/vision_detect/detect_topic.py
@@ -1,429 +0,0 @@
-import os
-import time
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import open3d as o3d
-import numpy as np
-import transforms3d as tfs
-from cv_bridge import CvBridge
-
-import torch
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-from message_filters import ApproximateTimeSynchronizer, Subscriber
-
-from sensor_msgs.msg import Image, CameraInfo
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
-
-
-share_dir = get_package_share_directory('vision_detect')
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def pca(data, sort=True):
-    """主成分分析 """
-    center = np.mean(data, axis=0)
-    centered_points = data - center    # 去中心化
-
-    try:
-        cov_matrix = np.cov(centered_points.T)  # 转置
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-
-    except np.linalg.LinAlgError:
-        return None, None
-
-    if sort:
-        sort = eigenvalues.argsort()[::-1]  # 降序排列
-        eigenvalues = eigenvalues[sort]  # 索引
-        eigenvectors = eigenvectors[:, sort]
-
-    return eigenvalues, eigenvectors
-
-def calculate_pose_cpu(mask, rgb_img, depth_img: np.ndarray, intrinsics):
-    """计算位态"""
-    depth_img_mask = np.zeros_like(depth_img)
-    depth_img_mask[mask > 0] = depth_img[mask > 0]
-
-    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
-
-    point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
-        depth=depth_o3d,
-        intrinsic=intrinsics,
-        depth_scale=1000.0,
-        depth_trunc=8.0,
-    )
-
-    point_cloud = point_cloud.remove_non_finite_points()
-
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
-    clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
-
-    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-    center = clean_pcd.get_center()
-    x, y, z = center
-
-    w, v = pca(np.asarray(clean_pcd.points))
-
-    if w is not None and v is not None:
-        vx, vy, vz = v[:,0], v[:,1], v[:,2]
-
-        if vx[0] < 0:
-            vx = -vx
-        if vy[1] < 0:
-            vy = -vy
-        if not np.allclose(np.cross(vx, vy), vz):
-            vz = -vz
-
-        R = np.column_stack((vx, vy, vz))
-        rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
-
-        rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
-        x, y, z = rmat[0:3, 3].flatten()
-
-        # point = [
-        #     [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
-        #     [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        #     ]  # 画点：原点、第一主成分、第二主成分
-        # lines = [
-        #     [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
-        # ]  # 画出三点之间两两连线
-        # colors = [
-        #     [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        # ]
-        # # 构造open3d中的LineSet对象，用于主成分显示
-        # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
-        # line_set.colors = o3d.utility.Vector3dVector(colors)
-        # o3d.visualization.draw_geometries([point_cloud, line_set])
-        # o3d.visualization.draw_geometries([clean_pcd, line_set])
-
-        return x, y, z, rw, rx, ry, rz
-
-
-def draw_box(set_confidence, rgb_img, result):
-    """绘制目标检测边界框"""
-    boxes = result.boxes.xywh.cpu().numpy()
-    confidences = result.boxes.conf.cpu().numpy()
-    class_ids = result.boxes.cls.cpu().numpy()
-    labels = result.names
-
-    for i, box in enumerate(boxes):
-        if confidences[i] >= set_confidence:
-            x_center, y_center, width, height = box[:4]
-
-            p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
-            p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
-            cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
-
-            cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
-
-
-def draw_mask(set_confidence, rgb_img, result):
-    """绘制实例分割mask"""
-    masks = result.masks.data.cpu().numpy()
-    orig_shape = result.masks.orig_shape
-    confidences = result.boxes.conf.cpu().numpy()
-
-    for i, mask in enumerate(masks):
-        if confidences[i] >= set_confidence:
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
-        else:
-            continue
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    """畸变矫正"""
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(rgb_img, depth_img, mask, box):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    x_center, y_center, w, h = box[:4]
-
-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
-
-    rgb_crop = rgb_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return rgb_crop, depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-class DetectNode(Node):
-    def __init__(self, name):
-        super().__init__(name)
-        self.checkpoint_path = None
-        self.checkpoint_name = None
-        self.function = None
-        self.output_boxes = None
-        self.output_masks = None
-        self.K = self.D = None
-        self.map1 = self.map2 = None
-        self.eye_in_hand_mat = None
-        self.eye_to_hand_mat = None
-
-        self.calculate_function = calculate_pose_cpu
-        self.fx = self.fy = 1.0
-        self.cv_bridge = CvBridge()
-
-        '''init'''
-        self._init_param()
-        self._init_model()
-        self._init_publisher()
-        self._init_subscriber()
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('checkpoint_name', 'hivecorebox-seg.pt')
-        self.checkpoint_name = self.get_parameter('checkpoint_name').value
-        self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
-
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', True)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('set_confidence', 0.60)
-        self.set_confidence = self.get_parameter('set_confidence').value
-
-        self.declare_parameter('color_image_topic', '/camera/color/image_raw')
-        self.color_image_topic = self.get_parameter('color_image_topic').value
-
-        self.declare_parameter('depth_image_topic', '/camera/depth/image_raw')
-        self.depth_image_topic = self.get_parameter('depth_image_topic').value
-
-        self.declare_parameter('camera_info_topic', '/camera/color/camera_info')
-        self.camera_info_topic = self.get_parameter('camera_info_topic').value
-
-        self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
-                                              0.0, 1.0, 0.0, 0.0,
-                                              0.0, 0.0, 1.0, 0.0,
-                                              0.0, 0.0, 0.0, 1.0])
-        self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
-
-        self.declare_parameter('eye_to_hand', [1.0, 0.0, 0.0, 0.0,
-                                              0.0, 1.0, 0.0, 0.0,
-                                              0.0, 0.0, 1.0, 0.0,
-                                              0.0, 0.0, 0.0, 1.0])
-        self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
-
-
-    def _init_model(self):
-        """init model"""
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-        try:
-            self.model = YOLO(self.checkpoint_path).to(device)
-        except Exception as e:
-            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        if self.checkpoint_name.endswith('-seg.pt'):
-            self.function = self._seg_image
-        else:
-            self.function = None
-            self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
-
-    def _init_publisher(self):
-        """init_publisher"""
-        self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_subscriber(self):
-        """init_subscriber"""
-        self.sub_camera_info = self.create_subscription(
-            CameraInfo,
-            self.camera_info_topic,
-            self._camera_info_callback,
-            10
-        )
-
-        '''sync get color and depth img'''
-        self.sub_color_image = Subscriber(self, Image, self.color_image_topic)
-        self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic)
-
-        self.sync_subscriber = ApproximateTimeSynchronizer(
-            [self.sub_color_image, self.sub_depth_image],
-            queue_size=10,
-            slop=0.1
-        )
-        self.sync_subscriber.registerCallback(self._sync_callback)
-
-    def _camera_info_callback(self, msg: CameraInfo):
-        """Get camera info"""
-        self.K = msg.k
-        self.D = msg.d
-
-        self.camera_size = [msg.width, msg.height]
-
-        if self.K is not None and self.D is not None:
-            self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
-
-            if len(self.D) != 0:
-                self.destroy_subscription(self.sub_camera_info)
-            else:
-                self.D = [0, 0, 0, 0, 0, 0, 0, 0]
-                self.destroy_subscription(self.sub_camera_info)
-        else:
-            raise "K and D are not defined"
-
-    def _sync_callback(self, color_img_ros, depth_img_ros):
-        """同步回调函数"""
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
-
-        img, pose_list = self.function(color_img_cv, depth_img_cv)
-
-        """masks为空，结束这一帧"""
-        if img is None:
-            img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image.publish(img)
-
-        if pose_list:
-            pose_list_all = PoseArrayClassAndID()
-            for item in pose_list:
-                pose_list_all.objects.append(
-                    PoseClassAndID(
-                        class_name = item["class_name"],
-                        class_id = item["class_id"],
-                        pose = item["pose"]
-                    )
-                )
-            pose_list_all.header.stamp = self.get_clock().now().to_msg()
-            pose_list_all.header.frame_id = "pose_list"
-            self.pub_pose_list.publish(pose_list_all)
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
-        """Use segmentation model"""
-        pose_list = []
-
-        depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
-        depth_filter_mask[(depth_img > 0) & (depth_img < 2000)] = 1
-
-        rgb_img[depth_filter_mask == 0] = 0
-
-        '''Get Predict Results'''
-        time1 = time.time()
-        results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence)
-        time2 = time.time()
-        result = results[0]
-
-        '''Get masks'''
-        if result.masks is None or len(result.masks) == 0:
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-        orig_shape = result.masks.orig_shape
-
-        '''Get boxes'''
-        boxes = result.boxes.xywh.cpu().numpy()
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        time3 = time.time()
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_crop, depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(rgb_img, depth_img, mask, box)
-
-            if depth_crop is None:
-                continue
-
-            intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
-            )
-
-            x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, rgb_crop, depth_crop, intrinsics)
-            print(x, y, z)
-
-            if (x, y, z) != (0.0, 0.0, 0.0):
-                pose = Pose()
-                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                pose_list.append(
-                    {
-                        "class_id": int(class_ids[i]),
-                        "class_name": labels[class_ids[i]],
-                        "pose": pose
-                    }
-                )
-
-        time4 = time.time()
-
-        self.get_logger().info(f'start')
-        self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
-        self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
-        self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
-        self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
-        self.get_logger().info(f'end')
-
-        '''mask_img and box_img is or not output'''
-        if self.output_boxes and not self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        elif self.output_boxes and self.output_masks:
-            draw_box(self.set_confidence, rgb_img, result)
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        elif not self.output_boxes and self.output_masks:
-            draw_mask(self.set_confidence, rgb_img, result)
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        else:
-            return None, pose_list
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/flexivaidk_detect_service.py
+++ b/vision_detect/vision_detect/flexivaidk_detect_service.py
@@ -116,9 +116,9 @@ class DetectNode(Node):
        self.declare_parameter('output_masks', False)
        self.output_masks = self.get_parameter('output_masks').value

-        self.declare_parameter('config_name', 'default_config.json')
+        self.declare_parameter('config_name', 'default_service_config.json')
        self.config_name = self.get_parameter('config_name').value
-        self.config_dir = os.path.join(share_dir, 'configs/flexiv_configs', self.config_name)
+        self.config_dir = os.path.join(share_dir, 'configs/flexiv', self.config_name)

        self.declare_parameter('set_confidence', 0.5)
        self.set_confidence = self.get_parameter('set_confidence').value
--- a/vision_detect/vision_detect/flexivaidk_detect_topic.py
+++ b/vision_detect/vision_detect/flexivaidk_detect_topic.py
@@ -1,340 +0,0 @@
-import os
-import time
-import json
-import sys
-from datetime import datetime
-from collections import defaultdict
-from ament_index_python.packages import get_package_share_directory
-
-import cv2
-import numpy as np
-from cv_bridge import CvBridge
-
-import torch
-from ultralytics import YOLO
-
-import rclpy
-from rclpy.node import Node
-from message_filters import ApproximateTimeSynchronizer, Subscriber
-
-from sensor_msgs.msg import Image, CameraInfo
-from geometry_msgs.msg import Pose, Point, Quaternion
-from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
-
-sys.path.insert(0, "/home/nvidia/aidk/AIDK/release/lib_py/")
-import flexivaidk as aidk
-
-share_dir = get_package_share_directory("vision_detect")
-
-
-def get_map(K, D, camera_size):
-    h, w = camera_size[::-1]
-    K = np.array(K).reshape(3, 3)
-    D = np.array(D)
-    new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
-    map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
-
-    return map1, map2, new_K.flatten()
-
-
-def distortion_correction(color_image, depth_image, map1, map2):
-    """畸变矫正"""
-    undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
-    undistorted_color = undistorted_color.astype(color_image.dtype)
-
-    undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
-    undistorted_depth = undistorted_depth.astype(depth_image.dtype)
-
-    return undistorted_color, undistorted_depth
-
-
-def crop_mask_bbox(rgb_img, depth_img, mask, box):
-    """
-    输入:
-        depth_img: H x W
-        mask: H x W (0/1 或 bool)
-    输出:
-        depth_crop, mask_crop
-    """
-    high, width = depth_img.shape
-    x_center, y_center, w, h = box[:4]
-
-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
-
-    rgb_crop = rgb_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
-
-    return rgb_crop, depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
-
-class DetectNode(Node):
-    def __init__(self, name):
-        super().__init__(name)
-        self.checkpoint_path = None
-        self.checkpoint_name = None
-        self.function = None
-        self.output_boxes = None
-        self.output_masks = None
-        self.K = self.D = None
-        self.map1 = self.map2 = None
-
-        self.fx = self.fy = 1.0
-        self.cv_bridge = CvBridge()
-        self.aidk_client = aidk.AIDKClient('127.0.0.1', 10)
-        while not self.aidk_client.is_ready():
-            time.sleep(0.5)
-
-        '''init'''
-        self._init_param()
-        self._init_model()
-        self._init_config()
-        self._init_publisher()
-        self._init_subscriber()
-
-        self.get_logger().info("Init done")
-
-    def _init_param(self):
-        """init parameter"""
-        self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
-        self.checkpoint_name = self.get_parameter('checkpoint_name').value
-        self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
-
-        self.declare_parameter('config_name', 'default_config.json')
-        self.config_name = self.get_parameter('config_name').value
-        self.config_dir = os.path.join(share_dir, 'configs/flexiv_configs', self.config_name)
-
-        self.declare_parameter('output_boxes', True)
-        self.output_boxes = self.get_parameter('output_boxes').value
-
-        self.declare_parameter('output_masks', True)
-        self.output_masks = self.get_parameter('output_masks').value
-
-        self.declare_parameter('set_confidence', 0.25)
-        self.set_confidence = self.get_parameter('set_confidence').value
-
-        self.declare_parameter('color_image_topic', '/camera/camera/color/image_raw')
-        self.color_image_topic = self.get_parameter('color_image_topic').value
-
-        self.declare_parameter('depth_image_topic', '/camera/camera/aligned_depth_to_color/image_raw')
-        self.depth_image_topic = self.get_parameter('depth_image_topic').value
-
-        self.declare_parameter('camera_info_topic', '/camera/camera/color/camera_info')
-        self.camera_info_topic = self.get_parameter('camera_info_topic').value
-
-
-    def _init_model(self):
-        """init model"""
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        try:
-            self.model = YOLO(self.checkpoint_path).to(device)
-        except Exception as e:
-            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
-        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
-
-        if self.checkpoint_name.endswith('-seg.pt'):
-            self.function = self._seg_image
-        else:
-            self.function = None
-            self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
-
-    def _init_config(self):
-        """init config"""
-        with open(self.config_dir, "r") as f:
-            self.config = json.load(f)
-
-    def _init_publisher(self):
-        """init_publisher"""
-        self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
-
-    def _init_subscriber(self):
-        """init_subscriber"""
-        self.sub_camera_info = self.create_subscription(
-            CameraInfo,
-            self.camera_info_topic,
-            self._camera_info_callback,
-            10
-        )
-        time.sleep(1)
-
-        '''sync get color and depth img'''
-        self.sub_color_image = Subscriber(self, Image, self.color_image_topic)
-        self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic)
-
-        self.sync_subscriber = ApproximateTimeSynchronizer(
-            [self.sub_color_image, self.sub_depth_image],
-            queue_size=10,
-            slop=0.1
-        )
-        self.sync_subscriber.registerCallback(self._sync_callback)
-
-    def _camera_info_callback(self, msg: CameraInfo):
-        """Get camera info"""
-        self.K = msg.k
-        self.D = msg.d
-
-        self.camera_size = [msg.width, msg.height]
-
-        if self.K is not None and self.D is not None:
-            self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
-
-            if len(self.D) != 0:
-                self.destroy_subscription(self.sub_camera_info)
-            else:
-                self.D = [0, 0, 0, 0, 0, 0, 0, 0]
-                self.destroy_subscription(self.sub_camera_info)
-        else:
-            raise "K and D are not defined"
-
-
-    def _sync_callback(self, color_img_ros, depth_img_ros):
-        """同步回调函数"""
-        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
-        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-
-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
-
-        img, pose_dict = self.function(color_img_cv, depth_img_cv)
-
-        """masks为空，结束这一帧"""
-        if img is None:
-            img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
-
-        if self.output_boxes or self.output_masks:
-            self.pub_detect_image.publish(img)
-
-        if pose_dict is not None:
-            pose_list_all = PoseArrayClassAndID()
-            for (class_id, class_name), pose_list in pose_dict.items():
-                pose_list_all.objects.append(
-                    PoseClassAndID(
-                        class_name = class_name,
-                        class_id = class_id,
-                        pose_list = pose_list
-                    )
-                )
-            pose_list_all.header.stamp = self.get_clock().now().to_msg()
-            pose_list_all.header.frame_id = "pose_list"
-            self.pub_pose_list.publish(pose_list_all)
-
-    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
-        """Use segmentation model"""
-        pose_dict = defaultdict(list)
-
-        '''Get Predict Results'''
-        time1 = time.time()
-        results = self.model(rgb_img, retina_masks=True, classes=[39], conf=self.set_confidence)
-        time2 = time.time()
-        result = results[0]
-
-        '''Get masks'''
-        if result.masks is None or len(result.masks) == 0:
-            return None, None
-        masks = result.masks.data.cpu().numpy()
-        orig_shape = result.masks.orig_shape
-
-        '''Get boxes'''
-        boxes = result.boxes.xywh.cpu().numpy()
-        class_ids = result.boxes.cls.cpu().numpy()
-        labels = result.names
-
-        time3 = time.time()
-
-        for i, (mask, box) in enumerate(zip(masks, boxes)):
-            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            rgb_crop, depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(rgb_img, depth_img, mask, box)
-
-            if depth_crop is None:
-                continue
-
-            depth_img_crop_mask = np.zeros_like(depth_crop)
-            depth_img_crop_mask[mask_crop > 0] = depth_crop[mask_crop > 0]
-
-            print(rgb_crop.shape)
-            print(rgb_crop.dtype)
-
-
-            rgb_bytes = cv2.imencode('.png', rgb_crop)[1]
-            depth_bytes = cv2.imencode('.png', depth_img_crop_mask)[1]
-
-            intrinsics = [
-                int(self.camera_size[0]),
-                int(self.camera_size[1]),
-                self.K[2] - x_min,
-                self.K[5] - y_min,
-                self.K[0],
-                self.K[4]
-            ]
-
-            state = self.aidk_client.detect_with_image(
-                obj_name=self.config["command"]["obj_name"],
-                camera_id=self.config["command"]["camera_id"],
-                coordinate_id=self.config["command"]["coordinate_id"],
-                camera_pose=self.config["command"]["camera_pose"],
-                camera_intrinsic=intrinsics,
-                rgb_input=aidk.ImageBuffer(rgb_bytes),
-                depth_input=aidk.ImageBuffer(depth_bytes),
-                custom=self.config["command"]["custom"],
-            )
-
-            self.get_logger().info(f"state: {state}")
-            self.get_logger().info(
-                f"current detected object names: {self.aidk_client.get_detected_obj_names()}, current detected object nums: {self.aidk_client.get_detected_obj_nums()}"
-            )
-
-            for key in self.config["keys"]:
-                parse_state, result_list = self.aidk_client.parse_result(self.config["command"]["obj_name"], key, -1)
-                self.get_logger().info(
-                    "detected time stamp: {}".format(
-                        datetime.fromtimestamp(self.aidk_client.get_detected_time())
-                    )
-                )
-                if not parse_state:
-                    self.get_logger().error("Parse result error!!!")
-                    continue
-                else:
-                    if key in ["bbox", "keypoints", "positions", "obj_pose"]:
-                        for result in result_list:
-                            for vec in result.vect:
-                                self.get_logger().info(f"vec: {vec}")
-                                x, y, z, rw, rx, ry, rz = vec
-
-                                pose = Pose()
-                                pose.position = Point(x=x, y=y, z=z)
-                                pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
-                                pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
-
-                    elif key in ["valid", "double_value", "int_value", "name"]:
-                        for result in result_list:
-                            self.get_logger().info(f"{key}: {getattr(result, key)}")
-
-        time4 = time.time()
-
-        self.get_logger().info(f'start')
-        self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
-        self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
-        self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
-        self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
-        self.get_logger().info(f'end')
-
-        return None, pose_dict
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = DetectNode('detect')
-    try:
-        rclpy.spin(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/get_camera_pose.py
+++ b/vision_detect/vision_detect/get_camera_pose.py
@@ -44,17 +44,17 @@ class GetCameraPose(Node):

            pose_dict = {}
            for object in camera_pose.objects:
-                pose_dict[object.class_name] = object.pose_list
+                pose_dict[object.class_name] = object.pose

            if self.class_name in pose_dict:
                _camera = [
-                    pose_dict[self.class_name][-1].position.x,
-                    pose_dict[self.class_name][-1].position.y,
-                    pose_dict[self.class_name][-1].position.z,
-                    pose_dict[self.class_name][-1].orientation.w,
-                    pose_dict[self.class_name][-1].orientation.x,
-                    pose_dict[self.class_name][-1].orientation.y,
-                    pose_dict[self.class_name][-1].orientation.z,
+                    pose_dict[self.class_name].position.x,
+                    pose_dict[self.class_name].position.y,
+                    pose_dict[self.class_name].position.z,
+                    pose_dict[self.class_name].orientation.w,
+                    pose_dict[self.class_name].orientation.x,
+                    pose_dict[self.class_name].orientation.y,
+                    pose_dict[self.class_name].orientation.z,
                ]

                self.get_logger().info(f"add camera: {_camera}")
--- a/vision_detect/vision_detect/hand_eye_calibration.py
+++ b/vision_detect/vision_detect/hand_eye_calibration.py
@@ -1,360 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-import transforms3d as tfs
-import numpy as np
-import math
-import json
-
-import rclpy
-from rclpy.node import Node
-from rclpy.parameter import Parameter
-
-from tf2_ros import Buffer
-from tf2_msgs.msg import TFMessage
-from interfaces.msg import PoseArrayClassAndID
-from message_filters import ApproximateTimeSynchronizer, Subscriber
-
-
-def get_matrix_quat(x, y, z, rw, rx, ry, rz):
-    """从单位四元数构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    q = [rw, rx, ry, rz]
-    rmat = tfs.quaternions.quat2mat(q)
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def get_matrix_eular_radu(x, y, z, rx, ry, rz):
-    """从欧拉角构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    rmat = tfs.euler.euler2mat(
-        # math.radians(rx), math.radians(ry), math.radians(rz)
-        rx, ry, rz
-    )
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def get_matrix_rotvector(x, y, z, rx, ry, rz):
-    """从旋转向量构建齐次变换矩阵"""
-    '''构造旋转矩阵'''
-    rvec = np.array([rx, ry, rz])
-    theta = np.linalg.norm(rvec)
-    if theta < 1e-8:
-        rmat = np.eye(3)  # 小角度直接返回单位矩阵
-    else:
-        axis = rvec / theta
-        rmat = tfs.axangles.axangle2mat(axis, theta)
-
-    """构造齐次变换矩阵"""
-    rmat = tfs.affines.compose(
-        np.squeeze(np.asarray((x, y, z))),
-        rmat,
-        [1, 1, 1]
-    )
-    return rmat
-
-
-def skew(v):
-    return np.array([[0, -v[2], v[1]],
-                     [v[2], 0, -v[0]],
-                     [-v[1], v[0], 0]])
-
-
-def R2P(T):
-    """旋转矩阵 --> 修正罗德里格斯向量"""
-    axis, angle= tfs.axangles.mat2axangle(T[0:3, 0:3])
-    P = 2 * math.sin(angle / 2) * axis
-    return P
-
-
-def P2R(P):
-    """修正罗德里格斯向量 --> 旋转矩阵"""
-    P2 = np.dot(P.T, P)
-    part_1 = (1 - P2 / 2) * np.eye(3)
-    part_2 = np.add(np.dot(P, P.T), np.sqrt(4- P2) * skew(P.flatten().T))
-    R = np.add(part_1, np.divide(part_2, 2))
-    return R
-
-
-def calculate(Hgs, Hcs):
-    """计算标定矩阵"""
-    # H代表矩阵、h代表标量
-    Hgijs = []
-    Hcijs = []
-    A = []
-    B = []
-    size = 0
-    for i in range(len(Hgs)):
-        for j in range(i + 1, len(Hgs)):
-            size += 1
-            Hgij = np.dot(np.linalg.inv(Hgs[j]), Hgs[i])
-            Hgijs.append(Hgij)
-            Pgij = np.dot(2, R2P(Hgij))
-
-            Hcij = np.dot(Hcs[j], np.linalg.inv(Hcs[i]))
-            Hcijs.append(Hcij)
-            Pcij = np.dot(2, R2P(Hcij))
-
-            A.append(skew(np.add(Pgij, Pcij)))
-            B.append(np.subtract(Pcij, Pgij).reshape(3, 1))
-
-    MA = np.vstack(A)
-    MB = np.vstack(B)
-    Pcg_ = np.dot(np.linalg.pinv(MA), MB)
-    pcg = np.sqrt(np.add(1, np.dot(Pcg_.T, Pcg_)))
-    Pcg = np.dot(np.dot(2, Pcg_), np.linalg.inv(pcg))
-    Rcg = P2R(Pcg).reshape(3, 3)
-
-    A = []
-    B = []
-    id = 0
-    for i in range(len(Hgs)):
-        for j in range(i + 1, len(Hgs)):
-            Hgij = Hgijs[id]
-            Hcij = Hcijs[id]
-            A.append(np.subtract(Hgij[0:3, 0:3], np.eye(3, 3)))
-            B.append(np.subtract(np.dot(Rcg, Hcij[0:3, 3:4]), Hgij[0:3, 3:4]))
-            id += 1
-
-    MA = np.asarray(A).reshape(size * 3, 3)
-    MB = np.asarray(B).reshape(size * 3, 1)
-    Tcg = np.dot(np.linalg.pinv(MA), MB).reshape(3, )
-
-    return tfs.affines.compose(Tcg, np.squeeze(Rcg), [1, 1, 1])
-
-
-class Calibration(Node):
-    def __init__(self, name):
-        super(Calibration, self).__init__(name)
-        self.sync_subscriber = None
-        self.sub_camera_pose = None
-        self.sub_hand_pose = None
-        self.Hgs, self.Hcs = [], []
-        self.hand, self.camera = [], []
-        self.calibration_matrix = None
-        self.calculate = False
-        self.collect = False
-        self.base_name = 'base_link'
-
-        self.tf_buffer = Buffer()
-
-        self.declare_parameter('start_collect_once', False)
-        self.declare_parameter('start_calculate', False)
-        self.declare_parameter('base_name', 'base_link')
-
-        self.declare_parameter('end_name', 'Link6')
-        self.declare_parameter('class_name', 'crossboard')
-        self.declare_parameter('matrix_name', 'eye_in_hand')
-
-        self.end_name = self.get_parameter('end_name').value
-        self.class_name = self.get_parameter('class_name').value
-        self.matrix_name = self.get_parameter('matrix_name').value
-
-        self.declare_parameter('mode', 'eye_in_hand')
-        self.mode = self.get_parameter('mode').value.lower()
-
-        if self.mode not in ['eye_in_hand', 'eye_to_hand']:
-            raise ValueError("mode must be 'eye_in_hand' or 'eye_to_hand'")
-
-        self.declare_parameter('input', 'quat')
-        self.input = self.get_parameter('input').value.lower()
-        if self.input == 'eular':
-            self.function = get_matrix_eular_radu
-        elif self.input == 'rotvertor':
-            self.function = get_matrix_rotvector
-        elif self.input == 'quat':
-            self.function = get_matrix_quat
-        else:
-            raise ValueError("INPUT ERROR")
-        self.done = False
-
-        self._init_sub()
-
-    def _init_sub(self):
-        self.sub_hand_pose = Subscriber(self, TFMessage, '/tf')
-        self.sub_camera_pose = Subscriber(self, PoseArrayClassAndID, '/pose/cv_detect_pose')
-
-        self.sync_subscriber = ApproximateTimeSynchronizer(
-            [self.sub_hand_pose, self.sub_camera_pose],
-            queue_size=10,
-            slop=0.1,
-            allow_headerless = True
-        )
-        self.sync_subscriber.registerCallback(self.get_pose_callback)
-
-    def get_pose_callback(self, hand_tf, camera_pose):
-        self.collect = self.get_parameter('start_collect_once').value
-        self.calculate = self.get_parameter('start_calculate').value
-        self.base_name = self.get_parameter('base_name').value
-        self.end_name = self.get_parameter('end_name').value
-        self.class_name = self.get_parameter('class_name').value
-        self.matrix_name = self.get_parameter('matrix_name').value
-        self.mode = self.get_parameter('mode').value.lower()
-
-        if self.collect:
-            _hand, _camera = None, None
-
-            for transform in hand_tf.transforms:
-                self.tf_buffer.set_transform(transform, "default_authority")
-
-            if self.base_name in self.tf_buffer.all_frames_as_string() and self.end_name in self.tf_buffer.all_frames_as_string():
-
-                trans = self.tf_buffer.lookup_transform(
-                    self.base_name,
-                    self.end_name,
-                    rclpy.time.Time()
-                )
-
-                t = trans.transform.translation
-                r = trans.transform.rotation
-                _hand = [
-                    t.x, t.y, t.z,
-                    r.w, r.x, r.y, r.z
-                ]
-            else: return
-
-            pose_dict = {}
-            for object in camera_pose.objects:
-                pose_dict[object.class_name] = object.pose_list
-
-            if self.class_name in pose_dict:
-                _camera = [
-                    pose_dict[self.class_name][-1].position.x,
-                    pose_dict[self.class_name][-1].position.y,
-                    pose_dict[self.class_name][-1].position.z,
-                    pose_dict[self.class_name][-1].orientation.w,
-                    pose_dict[self.class_name][-1].orientation.x,
-                    pose_dict[self.class_name][-1].orientation.y,
-                    pose_dict[self.class_name][-1].orientation.z,
-                ]
-            else:
-                self.set_parameters([Parameter('start_collect_once', Parameter.Type.BOOL, False)])
-                self.get_logger().info(f"Have not camera data")
-
-            if _hand is None or _camera is None:
-                _hand, _camera = None, None
-                self.get_logger().info("Have not camera data or end data")
-                return
-
-            self.get_logger().info(f"add hand: {_hand}")
-            self.get_logger().info(f"add camera: {_camera}")
-
-            self.hand.extend(_hand)
-            self.camera.extend(_camera)
-            self.set_parameters([Parameter('start_collect_once', Parameter.Type.BOOL, False)])
-
-        if self.calculate:
-            self.calculate_calibration()
-
-            print(self.hand)
-            print(self.camera)
-
-            self.get_logger().info(f"{self.calibration_matrix}")
-            hand_eye_result = {
-                "T": self.calibration_matrix.tolist()
-            }
-            with open(f"{self.matrix_name}_matrix.json", "w") as f:
-                json.dump(hand_eye_result, f, indent=4)
-            self.get_logger().info(f"Save as {self.matrix_name}_matrix.json")
-
-            with open(f"hand_pose_data.json", "w") as f:
-                json.dump(self.hand, f, indent=4)
-
-            with open(f"camera_pose_data.json", "w") as f:
-                json.dump(self.camera, f, indent=4)
-
-            self.done = True
-
-    def calculate_data(self):
-        if self.input == 'quat':
-            for i in range(0, len(self.hand), 7):
-                self.Hgs.append(
-                    np.linalg.inv(
-                        self.function(
-                            self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                            self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
-                        )
-                    )
-                    if self.mode == 'eye_to_hand' else
-                    self.function(
-                        self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                        self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
-                    )
-                )
-                self.Hcs.append(
-                    self.function(
-                        self.camera[i], self.camera[i + 1], self.camera[i + 2],
-                        self.camera[i + 3], self.camera[i + 4], self.camera[i + 5], self.camera[i + 6]
-                    )
-                )
-
-        else:
-            for i in range(0, len(self.hand), 6):
-                self.Hgs.append(
-                    np.linalg.inv(
-                        self.function(
-                            self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                            self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
-                        )
-                    )
-                    if self.mode == 'eye_to_hand' else
-                    self.function(
-                        self.hand[i], self.hand[i + 1], self.hand[i + 2],
-                        self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
-                    )
-                )
-                self.Hcs.append(
-                    self.function(
-                        self.camera[i], self.camera[i + 1], self.camera[i + 2],
-                        self.camera[i + 3], self.camera[i + 4], self.camera[i + 5]
-                    )
-                )
-
-
-    def calculate_calibration(self):
-        self.calculate_data()
-        self.calibration_matrix = calculate(self.Hgs, self.Hcs)
-
-    def get_data_test(self, hand, camera):
-        self.hand, self.camera = hand, camera
-        self.calculate_calibration()
-        print(self.calibration_matrix)
-
-        hand_eye_result = {
-            "T": self.calibration_matrix.tolist()
-        }
-        with open(f"{self.matrix_name}_matrix.json", "w") as f:
-            json.dump(hand_eye_result, f, indent=4)
-        self.get_logger().info(f"Save as {self.matrix_name}_matrix.json")
-
-        self.done = True
-
-
-def main(args=None):
-    rclpy.init(args=args)
-    node = Calibration('calibration')
-    try:
-        while rclpy.ok() and not node.done:
-            rclpy.spin_once(node)
-    except KeyboardInterrupt:
-        pass
-    finally:
-        node.destroy_node()
-        rclpy.shutdown()
-
-
-if __name__ == '__main__':
-    main()
--- a/vision_detect/vision_detect/source_test_node.py
+++ b/vision_detect/vision_detect/source_test_node.py
@@ -0,0 +1,36 @@
+"""
+source_test 节点的创建与启动入口。
+节点类定义位于 vision_core.node_test.source_test。
+"""
+import sys
+
+import rclpy
+from rclpy.executors import MultiThreadedExecutor
+
+from vision_core import SourceTestNode
+
+
+def main(args=None):
+    rclpy.init(args=args)
+
+    # 支持通过 --config 指定配置文件
+    config_path = None
+    for i, arg in enumerate(sys.argv):
+        if arg == "--config" and i + 1 < len(sys.argv):
+            config_path = sys.argv[i + 1]
+            break
+
+    node = SourceTestNode(config_path=config_path)
+    try:
+        executor = MultiThreadedExecutor()
+        rclpy.spin(node, executor=executor)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        if rclpy.ok():
+            rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
--- a/vision_detect/vision_detect/vision_core/init.py
+++ b/vision_detect/vision_detect/vision_core/init.py
@@ -0,0 +1,4 @@
+from .node import NodeManager
+from .node_test import *
+
+__all__ = ["NodeManager", "SourceTestNode"]
--- a/vision_detect/vision_detect/vision_core/core/init.py
+++ b/vision_detect/vision_detect/vision_core/core/init.py
@@ -0,0 +1,2 @@
+from .runtime import *
+from .enum import *
--- a/vision_detect/vision_detect/vision_core/core/components/init.py
+++ b/vision_detect/vision_detect/vision_core/core/components/init.py
@@ -0,0 +1,4 @@
+from . import detectors
+from . import estimators
+from . import image_providers
+from . import refiners
--- a/vision_detect/vision_detect/vision_core/core/components/detectors/init.py
+++ b/vision_detect/vision_detect/vision_core/core/components/detectors/init.py
@@ -0,0 +1,5 @@
+from .object_detector import *
+from .color_detector import *
+from .crossboard_detector import *
+
+# from .detector_baseline import *
--- a/vision_detect/vision_detect/vision_core/core/components/detectors/color_detector.py
+++ b/vision_detect/vision_detect/vision_core/core/components/detectors/color_detector.py
@@ -0,0 +1,40 @@
+import cv2
+import numpy as np
+
+from ...enum import Status
+from ...data_struct import ImageData, DetectData
+from .detector_baseline import DetectorBaseline
+
+
+__all__ = ["ColorDetector"]
+
+class ColorDetector(DetectorBaseline):
+    def __init__(self, config, _logger):
+        super().__init__()
+        self.distance = config["distance"]
+        self.color_range = config["color_range"]
+
+    def _detect(self, classes_name, image_data: ImageData) -> tuple[DetectData | None, int]:
+        if image_data.status != Status.SUCCESS:
+            return None, image_data.status
+
+        color_image = image_data.color_image
+        depth_image = image_data.depth_image
+
+        hsv_img = cv2.cvtColor(color_image, cv2.COLOR_BGR2HSV)
+
+        depth_filter_mask = np.zeros_like(depth_image, dtype=np.uint8)
+        depth_filter_mask[(depth_image > 0) & (depth_image < self.distance)] = 1
+
+        hsv_img[depth_filter_mask == 0] = 0
+
+        mask_part_list = [cv2.inRange(hsv_img, color[0], color[1]) for color in self.color_range]
+        mask = sum(mask_part_list[1:], mask_part_list[0])
+        mask = mask // 255
+
+        if mask is None or not np.any(mask):
+            return None, Status.NO_COLOR
+
+        color_image[mask > 0] = color_image[mask > 0] * 0.5 + np.array([255, 0, 0]) * 0.5
+
+        return DetectData.create_mask_only_data(masks=[mask]), Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/detectors/crossboard_detector.py
+++ b/vision_detect/vision_detect/vision_core/core/components/detectors/crossboard_detector.py
@@ -0,0 +1,46 @@
+import numpy as np
+import cv2
+
+from ...enum import Status
+from ...data_struct import ImageData, DetectData
+from .detector_baseline import DetectorBaseline
+
+
+__all__ = ["CrossboardDetector"]
+
+class CrossboardDetector(DetectorBaseline):
+    def __init__(self, config, _logger):
+        super().__init__()
+        self.pattern_size = config["pattern_size"] # (columns, rows)
+
+    def _detect(self, classes_name, image_data: ImageData) -> tuple[DetectData | None, int]:
+        color_image = image_data.color_image
+        depth_image = image_data.depth_image
+
+        rgb_img_gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
+        ret, corners = cv2.findChessboardCorners(
+            rgb_img_gray, self.pattern_size, cv2.CALIB_CB_FAST_CHECK
+        )
+        if not ret:
+            return None, Status.NO_CROSSBOARD
+
+        # 角点亚像素精确化（提高标定精度）
+        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
+        corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
+
+        corners_subpix = corners_subpix.reshape(self.pattern_size[1], self.pattern_size[0], 2)
+        mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
+
+        for i in range(0, self.pattern_size[1] - 1):
+            for j in range(0, self.pattern_size[0] - 1):
+                pts = np.array([
+                    corners_subpix[i, j],
+                    corners_subpix[i, j + 1],
+                    corners_subpix[i + 1, j + 1],
+                    corners_subpix[i + 1, j]
+                ], dtype=np.int32)
+                cv2.fillConvexPoly(mask, pts, 1)
+
+        color_image[mask > 0] = color_image[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
+        corners = corners.reshape(-1, 2)
+        return DetectData.create_mask_only_data(masks=[mask]), Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/detectors/detector_baseline.py
+++ b/vision_detect/vision_detect/vision_core/core/components/detectors/detector_baseline.py
@@ -0,0 +1,14 @@
+from ...data_struct import DetectData, ImageDataContainer
+
+
+__all__ = ["DetectorBaseline"]
+
+class DetectorBaseline:
+    def __init__(self):
+        pass
+
+    def _detect(self, classes_name, image_data) -> tuple[DetectData | None, int]:
+        pass
+
+    def get_masks(self, position, classes_name, image_data: ImageDataContainer) -> tuple[DetectData | None, int]:
+        return self._detect(classes_name, image_data[position])
--- a/vision_detect/vision_detect/vision_core/core/components/detectors/object_detector.py
+++ b/vision_detect/vision_detect/vision_core/core/components/detectors/object_detector.py
@@ -0,0 +1,65 @@
+import os
+import json
+
+import torch
+from ultralytics import YOLO
+
+from ament_index_python.packages import get_package_share_directory
+
+from ...enum import Status
+from .detector_baseline import DetectorBaseline
+from ...data_struct import ImageData, DetectData
+
+
+__all__ = ["ObjectDetector"]
+
+SHARE_DIR = get_package_share_directory('vision_detect')
+
+class ObjectDetector(DetectorBaseline):
+
+    def __init__(self, config, _logger):
+        super().__init__()
+        self.logger = _logger
+        self.confidence = config["confidence"]
+        with open(os.path.join(SHARE_DIR, config["label_map_path"]), "r") as f:
+            self.labels_map = json.load(f)
+
+        """init model"""
+        checkpoint_path = str(os.path.join(SHARE_DIR, config["checkpoint_path"]))
+        self._device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        try:
+            self.model = YOLO(checkpoint_path)
+        except Exception as e:
+            raise ValueError(f'Failed to load YOLO model: {e}')
+
+    def _detect(
+            self,
+            classes_name: list[str],
+            image_data: ImageData
+    ) -> tuple[DetectData | None, int]:
+
+        if image_data.status != Status.SUCCESS:
+            return None, image_data.status
+
+        classes = []
+        for _class in classes_name:
+            if _class in self.labels_map:
+                classes.append(self.labels_map[_class])
+        if not classes:
+            self.logger.warning("No legal classes name")
+            classes = None
+
+        color_image = image_data.color_image
+        with torch.no_grad():
+            results = self.model.predict(
+                color_image,
+                device=self._device,
+                retina_masks=True,
+                conf=self.confidence,
+                classes=classes,
+            )
+
+        if results[0].masks is None or len(results[0].masks) == 0:
+            return None, Status.NO_DETECT
+
+        return DetectData.create_data(results=results), Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/estimators/init.py
+++ b/vision_detect/vision_detect/vision_core/core/components/estimators/init.py
@@ -0,0 +1,5 @@
+from .pca_estimator import *
+from .icp_estimator import *
+from .gsnet_estimator import *
+
+# from  .estimator_baseline import *
--- a/vision_detect/vision_detect/vision_core/core/components/estimators/estimator_baseline.py
+++ b/vision_detect/vision_detect/vision_core/core/components/estimators/estimator_baseline.py
@@ -0,0 +1,28 @@
+from ...data_struct import ImageData, ImageDataContainer, DetectData, PoseData
+
+__all__ = ["EstimatorBaseline"]
+
+class EstimatorBaseline:
+    def __init__(self, config):
+        self._config = config
+
+    def _estimate(
+        self,
+        image_data: ImageData,
+        detect_data: DetectData,
+        get_grab_width: bool = True
+    ) -> tuple[PoseData | None, int]:
+        pass
+
+    def get_poses(
+            self,
+            position: str,
+            image_data: ImageDataContainer,
+            detect_data: DetectData,
+            get_grab_width: bool = True
+    ) -> tuple[PoseData | None, int]:
+        return self._estimate(
+            image_data=image_data[position],
+            detect_data=detect_data,
+            get_grab_width=get_grab_width
+        )
--- a/vision_detect/vision_detect/vision_core/core/components/estimators/gsnet_estimator.py
+++ b/vision_detect/vision_detect/vision_core/core/components/estimators/gsnet_estimator.py
@@ -0,0 +1,158 @@
+import torch
+import numpy as np
+import transforms3d as tfs
+import MinkowskiEngine as ME
+
+from ...enum import Status
+from ...utils import pointcloud, image
+from ...data_struct import ImageData, DetectData, PoseData
+from .estimator_baseline import EstimatorBaseline
+
+from ..model.gsnet import pred_decode, ModelFreeCollisionDetector
+
+
+__all__ = ["GSNetEstimator"]
+
+
+def minkowski_collate_fn(list_data):
+    coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
+                                                                [d["feats"] for d in list_data])
+    coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
+    coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
+        coordinates_batch, features_batch, return_index=True, return_inverse=True)
+
+    res = {
+        "coors": coordinates_batch,
+        "feats": features_batch,
+        "quantize2original": quantize2original,
+        "point_clouds": torch.stack(
+            [torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
+    }
+    return res
+
+
+class GSNetEstimator(EstimatorBaseline):
+    def __init__(self, config):
+        super().__init__(config)
+        self.collision_sign = True if self._config.get("collision_thresh", 0.00) > 0 else False
+
+    def _estimate(
+        self,
+        image_data: ImageData,
+        detect_data: DetectData,
+        get_grab_width: bool = True
+    ) -> tuple[PoseData | None, int]:
+        depth_image = image_data.depth_image
+        karr = image_data.karr
+        image_size = depth_image.shape[:2][::-1]
+        masks = detect_data.masks
+
+        # check boxes data
+        boxes = detect_data.boxes
+        box_sign = False if boxes is None else True
+
+        if self.collision_sign:
+            full_pcd = pointcloud.create_o3d_pcd(
+                image_data.depth_image, image_size, image_data.karr, **self._config
+            )
+            full_points = np.asarray(full_pcd.points)
+
+        n = 0
+        pose_data = PoseData()
+        for i, mask in enumerate(masks):
+            karr_mask = karr.copy()
+
+            if box_sign:
+                crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
+            else:
+                crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
+
+            depth_img_mask = np.zeros_like(crops[0])
+            depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
+            karr_mask[2] -= p[0]
+            karr_mask[5] -= p[1]
+
+            pcd, CODE = pointcloud.create_o3d_denoised_pcd(
+                depth_img_mask, image_size, karr_mask, **self._config
+            )
+            if CODE != 0:
+                pose_data.add_data(CODE)
+                continue
+
+            # 构建推理数据结构
+            points = np.asarray(pcd.points)
+            ret_dict = {
+                'point_clouds': points.astype(np.float32),
+                'coors': points.astype(np.float32) / self._config.get("voxel_size", 0.002),
+                'feats': np.ones_like(points).astype(np.float32),
+            }
+            batch_data = minkowski_collate_fn([ret_dict])
+
+            # 将数据置于对应的设备上
+            device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+            for key in batch_data:
+                batch_data[key] = batch_data[key].to(device)
+
+            # 点云数量判断，是否返回
+            if batch_data['coors'].shape[0] < 128:  # 例如 128 / 256
+                pose_data.add_data(Status.COORS_TOO_FEW)
+                continue
+            if batch_data["point_clouds"].shape[1] < 128:  # 例如 128 / 256
+                pose_data.add_data(Status.POINT_CLOUDS_TOO_FEW)
+                continue
+
+            # 梯度置0，进入推理
+            with torch.no_grad():
+                end_points = self._config.get("model")(batch_data)
+                if end_points is None:
+                    pose_data.add_data(Status.ZERO_TRUE_NUM)
+                    continue
+                grasp_preds = pred_decode(end_points)
+
+            # 推理结果后处理
+            preds = grasp_preds[0].detach().cpu().numpy()
+            sorted_index = np.argsort(-preds[:, 0])
+            preds = preds[sorted_index]
+            preds = preds[:10]
+
+            if self.collision_sign:
+                mfcdetector = ModelFreeCollisionDetector(
+                    full_points,
+                    voxel_size=self._config.get("voxel_size", 0.002)
+                )
+                collision_mask = mfcdetector.detect(
+                    preds, approach_dist=0.05,
+                    collision_thresh=self._config.get("collision_thresh", 0.01)
+                )
+                preds = preds[~collision_mask]
+
+            Rs = preds[:, 4:13].reshape(-1, 3, 3)
+            centers = preds[:, 13:16].reshape(-1, 3)
+            grab_width = preds[:, 1]
+            if not len(Rs):
+                pose_data.add_data(Status.E2E_NO_PREDICTION)
+                continue
+
+            pose_rmat = []
+            for r, center in zip(Rs, centers):
+                vz, vx, vy = r[:, 0], r[:, 1], r[:, 2]
+                if vx[0] < 0:
+                    vx = -vx
+                if vz[2] < 0:
+                    vz = -vz
+                if not np.allclose(np.cross(vx, vy), vz):
+                    vy = -vy
+
+                R = np.column_stack((vx, vy, vz))
+                pose_rmat.append(tfs.affines.compose(np.squeeze(np.asarray(center)), R, [1, 1, 1]))
+
+            if len(pose_rmat) == 0:
+                pose_data.add_data(Status.E2E_NO_VALID_MATRIX)
+                continue
+
+            pose_data.add_data(Status.SUCCESS, pose_rmat[0], [grab_width[0]])
+
+            n += 1
+        if n == 0:
+            return None, Status.CANNOT_ESTIMATE
+        return pose_data, Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/estimators/icp_estimator.py
+++ b/vision_detect/vision_detect/vision_core/core/components/estimators/icp_estimator.py
@@ -0,0 +1,215 @@
+from pathlib import Path
+
+import numpy as np
+import open3d as o3d
+
+from ament_index_python.packages import get_package_share_directory
+
+from ...enum import Status
+from ...utils import pointcloud, image
+from .estimator_baseline import EstimatorBaseline
+from ...data_struct import ImageData, DetectData, PoseData
+
+
+__all__ = ["ICPEstimator"]
+
+SHARE_DIR = Path(get_package_share_directory('vision_detect'))
+
+
+# def _draw_pointcloud(pcd, T):
+#     R = T[0:3, 0:3]
+#     point = T[0:3, 3:4].flatten()
+#     x, y, z = R[:, 0], R[:, 1], R[:, 2]
+#
+#     points = [
+#         [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
+#         point, point + x, point + y, point + z
+#
+#     ]  # 画点：原点、第一主成分、第二主成分
+#     lines = [
+#         [0, 1], [0, 2], [0, 3],
+#         [4, 5], [4, 6], [4, 7]
+#     ]  # 画出三点之间两两连线
+#     colors = [
+#         [1, 0, 0], [0, 1, 0], [0, 0, 1],
+#         [1, 0, 0], [0, 1, 0], [0, 0, 1]
+#     ]
+#
+#     line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points),
+#                                     lines=o3d.utility.Vector2iVector(lines))
+#     line_set.colors = o3d.utility.Vector3dVector(colors)
+#
+#     pcd.append(line_set)
+#     o3d.visualization.draw_geometries(pcd)
+
+
+def _preprocess_point_cloud(
+        pcd: o3d.geometry.PointCloud,
+        voxel_size: float
+):
+    pcd_down = pcd.voxel_down_sample(voxel_size)
+    radius_normal = voxel_size * 2
+
+    pcd_down.estimate_normals(
+        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30)
+    )
+
+    radius_feature = voxel_size * 5
+
+    pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
+        pcd_down,
+        o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
+    )
+
+    return pcd_down, pcd_fpfh
+
+
+def _prepare_dataset(source, target, voxel_size):
+    trans_init = np.identity(4)
+    source.transform(trans_init)
+
+    source_down, source_fpfh = _preprocess_point_cloud(source, voxel_size)
+    target_down, target_fpfh = _preprocess_point_cloud(target, voxel_size)
+
+    return source_down, target_down, source_fpfh, target_fpfh
+
+
+def _execute_global_registration(
+        source_down,
+        target_down,
+        source_fpfh,
+        target_fpfh,
+        voxel_size
+):
+    distance_threshold = voxel_size * 1.5
+    result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
+        source_down,
+        target_down,
+        source_fpfh,
+        target_fpfh,
+        True,
+        distance_threshold,
+        o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
+        3,
+        [
+            o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
+            o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
+        ],
+        o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
+
+    return result.transformation
+
+
+def _object_icp(
+        target: o3d.geometry.PointCloud,
+        source: o3d.geometry.PointCloud,
+        ransac_voxel_size: float = 0.005,
+        icp_voxel_radius: list[float] | tuple[float, ...] | None = (0.004, 0.002, 0.001),
+        icp_max_iter: list[int] | tuple[int, ...] | None = (50, 30, 14),
+):
+    # # check is PointCloud
+    # if isinstance(source, str):
+    #     source = o3d.io.read_point_cloud(source)
+    # elif isinstance(source, o3d.geometry.PointCloud):
+    #     pass
+    # else:
+    #     raise TypeError(f"Unsupported Type {type(source)}")
+
+    voxel_size = 0.005  # means 5mm for this dataset
+    source_down, target_down, source_fpfh, target_fpfh = _prepare_dataset(source, target,
+                                                                          voxel_size)
+
+    T = _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh,
+                                     ransac_voxel_size)
+
+    for scale in range(3):
+        _iter = icp_max_iter[scale]
+        radius = icp_voxel_radius[scale]
+        # print([_iter, radius, scale])
+
+        source_down = source.voxel_down_sample(radius)
+        target_down = target.voxel_down_sample(radius)
+
+        source_down.estimate_normals(
+            o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
+        target_down.estimate_normals(
+            o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
+
+        result_icp = o3d.pipelines.registration.registration_icp(
+            source_down,
+            target_down,
+            radius * 5,
+            T,
+            o3d.pipelines.registration.TransformationEstimationPointToPoint(),
+            o3d.pipelines.registration.ICPConvergenceCriteria(
+                relative_fitness=1e-6,
+                relative_rmse=1e-6,
+                max_iteration=_iter
+            )
+        )
+
+        T = result_icp.transformation
+
+    # # draw pcd and source
+    # _draw_pointcloud([source.transform(T), target], T)
+
+    return T
+
+
+class ICPEstimator(EstimatorBaseline):
+    def __init__(self, config):
+        super().__init__(config)
+        self._pcd_source_full = o3d.io.read_point_cloud(SHARE_DIR / config["complete_model_path"])
+
+    def _estimate(
+        self,
+        image_data: ImageData,
+        detect_data: DetectData,
+        get_grab_width: bool = True
+    ) -> tuple[PoseData | None, int]:
+
+        depth_image = image_data.depth_image
+        karr = image_data.karr
+        image_size = depth_image.shape[:2][::-1]
+        masks = detect_data.masks
+
+        boxes = detect_data.boxes
+        box_sign = False if boxes is None else True
+
+        n = 0
+        pose_data = PoseData()
+        for i, mask in enumerate(masks):
+            karr_mask = karr.copy()
+
+            if box_sign:
+                crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
+            else:
+                crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
+
+            depth_img_mask = np.zeros_like(crops[0])
+            depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
+            karr_mask[2] -= p[0]
+            karr_mask[5] -= p[1]
+
+            pcd, CODE = pointcloud.create_o3d_denoised_pcd(
+                depth_img_mask, image_size, karr_mask, **self._config
+            )
+            if CODE != 0:
+                pose_data.add_data(CODE)
+                continue
+
+            pose_rmat = _object_icp(
+                self._pcd_source_full.get("source"),
+                pcd,
+                ransac_voxel_size=self._config.get("ransac_voxel_size", 0.005),
+                icp_voxel_radius=self._config.get("icp_voxel_radius", [0.004, 0.002, 0.001]),
+                icp_max_iter=self._config.get("icp_max_iter", [50, 30, 14])
+            )
+
+            grab_width = [0.0, 0.0, 0.0]
+            pose_data.add_data(Status.SUCCESS, pose_rmat, tuple(grab_width))
+            n += 1
+
+        if n == 0:
+            return pose_data, Status.CANNOT_ESTIMATE
+        return pose_data, Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/estimators/pca_estimator.py
+++ b/vision_detect/vision_detect/vision_core/core/components/estimators/pca_estimator.py
@@ -0,0 +1,117 @@
+import numpy as np
+import transforms3d as tfs
+
+from ...enum import Status
+from ...utils import pointcloud, image
+from .estimator_baseline import EstimatorBaseline
+from ...data_struct import ImageData, DetectData, PoseData
+
+
+__all__ = ["PCAEstimator"]
+
+def pca(data: np.ndarray, sort=True):
+    """主成分分析 """
+    center = np.mean(data, axis=0)
+    centered_points = data - center    # 去中心化
+
+    try:
+        cov_matrix = np.cov(centered_points.T)  # 转置
+        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
+
+    except np.linalg.LinAlgError:
+        return None, None
+
+    if sort:
+        sort = eigenvalues.argsort()[::-1]  # 降序排列
+        eigenvalues = eigenvalues[sort]  # 索引
+        eigenvectors = eigenvectors[:, sort]
+
+    return eigenvalues, eigenvectors
+
+
+class PCAEstimator(EstimatorBaseline):
+    def __init__(self, config):
+        super().__init__(config)
+
+    def _estimate(
+        self,
+        image_data: ImageData,
+        detect_data: DetectData,
+        get_grab_width: bool = True,
+    ) -> tuple[PoseData | None, int]:
+
+        depth_image = image_data.depth_image
+        karr = image_data.karr
+        image_size = depth_image.shape[:2][::-1]
+        masks = detect_data.masks
+
+        # check boxes data
+        boxes = detect_data.boxes
+        box_sign = False if boxes is None else True
+
+        n = 0
+        pose_data = PoseData()
+        for i, mask in enumerate(masks):
+            karr_mask = karr.copy()
+
+            if box_sign:
+                crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
+            else:
+                crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
+
+            depth_img_mask = np.zeros_like(crops[0])
+            depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
+            karr_mask[2] -= p[0]
+            karr_mask[5] -= p[1]
+
+            pcd, CODE = pointcloud.create_o3d_denoised_pcd(
+                depth_img_mask, image_size, karr_mask, **self._config
+            )
+            if CODE != 0:
+                pose_data.add_data(CODE)
+                continue
+
+            x, y, z = pcd.get_center()
+            if get_grab_width:
+                if np.asarray(pcd.points).shape[0] < 4:
+                    pose_data.add_data(Status.TOO_FEW_POINTS_OBB)
+                    continue
+
+                obb = pcd.get_oriented_bounding_box()
+                extent = obb.extent
+                order = np.argsort(-extent)
+                grab_width = extent[order]
+
+                v = obb.R
+                v = v[:, order]
+                if v is None:
+                    pose_data.add_data(Status.PCA_NO_VECTOR)
+                    continue
+                grab_width = grab_width * 1.05
+
+            else:
+                w, v = pca(np.asarray(pcd.points))
+
+                if w is None or v is None:
+                    pose_data.add_data(Status.PCA_NO_VECTOR)
+                    continue
+
+                grab_width = (0.0, 0.0, 0.0)
+
+            vx, vy, vz = v[:, 0], v[:, 1], v[:, 2]
+
+            if vx[0] < 0:
+                vx = -vx
+            if vy[1] < 0:
+                vy = -vy
+            if not np.allclose(np.cross(vx, vy), vz):
+                vz = -vz
+
+            R = np.column_stack((vx, vy, vz))
+            pose_mat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
+            pose_data.add_data(Status.SUCCESS, pose_mat, tuple(grab_width))
+            n += 1
+
+        if n == 0:
+            return None, Status.CANNOT_ESTIMATE
+        return pose_data, Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/image_providers/init.py
+++ b/vision_detect/vision_detect/vision_core/core/components/image_providers/init.py
@@ -0,0 +1,5 @@
+from .driver_source import *
+from .single_topic_source import *
+from .muli_topic_source import *
+
+# from .source_baseline import *
--- a/vision_detect/vision_detect/vision_core/core/components/image_providers/driver_source.py
+++ b/vision_detect/vision_detect/vision_core/core/components/image_providers/driver_source.py
@@ -0,0 +1,28 @@
+from rclpy.node import Node
+
+from interfaces.msg import ImgMsg
+
+from .source_baseline import SourceBaseline
+
+
+__all__ = ["DriverSource"]
+
+class DriverSource(SourceBaseline):
+    def __init__(self, config, node: Node):
+        super().__init__(config)
+        self._sub = node.create_subscription(
+            ImgMsg,
+            config["subscription_name"],
+            self._subscription_callback,
+            10
+        )
+
+    def _subscription_callback(self, msg):
+        with self._lock:
+            self._images_buffer.save_data(
+                position=msg.position,
+                color=msg.image_color,
+                depth=msg.image_depth,
+                karr=msg.karr,
+                darr=msg.darr
+            )
--- a/vision_detect/vision_detect/vision_core/core/components/image_providers/muli_topic_source.py
+++ b/vision_detect/vision_detect/vision_core/core/components/image_providers/muli_topic_source.py
@@ -0,0 +1,80 @@
+import threading
+
+from rclpy.node import Node
+from message_filters import Subscriber, ApproximateTimeSynchronizer
+
+from sensor_msgs.msg import Image, CameraInfo
+
+from .source_baseline import SourceBaseline
+
+
+__all__ = ["MuliTopicSource"]
+
+class TopicSubscriber:
+    def __init__(
+            self,
+            position,
+            topic_configs,
+            node: Node,
+            lock: threading.Lock,
+            image_buffer
+    ):
+        self.lock = lock
+        self.position = position
+        self.image_buffer = image_buffer
+        self.event = threading.Event()
+
+        self.camera_info = []
+
+        self.sub_camera_info = node.create_subscription(
+            CameraInfo,
+            topic_configs[2],
+            self._camera_info_callback,
+            10
+        )
+
+        node.get_logger().info("Waiting for camera info...")
+        self.event.wait()
+        node.destroy_subscription(self.sub_camera_info)
+        node.get_logger().info("Camera info received.")
+
+        self.sub_color = Subscriber(node, Image, topic_configs[0])
+        self.sub_depth = Subscriber(node, Image, topic_configs[1])
+        self.sync_subscriber = ApproximateTimeSynchronizer(
+            [self.sub_color, self.sub_depth],
+            queue_size=10,
+            slop=0.1
+        )
+        self.sync_subscriber.registerCallback(self._sync_sub_callback)
+
+    def _sync_sub_callback(self, color, depth):
+        with self.lock:
+            self.image_buffer.save_data(
+                position=self.position,
+                color=color,
+                depth=depth,
+                karr=self.camera_info[0],
+                darr=self.camera_info[1]
+            )
+
+    def _camera_info_callback(self, msg):
+        if len(self.camera_info) == 2:
+            return
+        if msg.k is not None and len(msg.k) > 0 and msg.k is not None and len(msg.k) > 0:
+            self.camera_info = [msg.k, msg.d]
+            self.event.set()
+
+
+class MuliTopicSource(SourceBaseline):
+    def __init__(self, config, node: Node):
+        super().__init__(config)
+
+        self.subscribers = [
+            TopicSubscriber(
+                key,
+                value,
+                node,
+                self._lock,
+                self._images_buffer
+            ) for key, value in config
+        ]
--- a/vision_detect/vision_detect/vision_core/core/components/image_providers/single_topic_source.py
+++ b/vision_detect/vision_detect/vision_core/core/components/image_providers/single_topic_source.py
@@ -0,0 +1,56 @@
+import threading
+
+from rclpy.node import Node
+from message_filters import Subscriber, ApproximateTimeSynchronizer
+
+from sensor_msgs.msg import Image, CameraInfo
+
+from .source_baseline import SourceBaseline
+
+
+__all__ = ["SingleTopicSource"]
+
+class SingleTopicSource(SourceBaseline):
+    def __init__(self, config, node: Node):
+        super().__init__(config)
+        self.position = config["position"]
+        self.event = threading.Event()
+        self.camera_info = []
+
+        self.sub_camera_info = node.create_subscription(
+            CameraInfo,
+            config["camera_info_topic_name"],
+            self._camera_info_callback,
+            10
+        )
+
+        node.get_logger().info("Waiting for camera info...")
+        self.event.wait()
+        node.destroy_subscription(self.sub_camera_info)
+        node.get_logger().info("Camera info received.")
+
+        self.sub_color_image = Subscriber(node, Image, config["color_image_topic_name"])
+        self.sub_depth_image = Subscriber(node, Image, config["depth_image_topic_name"])
+        self.sync_subscriber = ApproximateTimeSynchronizer(
+            [self.sub_color_image, self.sub_depth_image],
+            queue_size=10,
+            slop=0.1
+        )
+        self.sync_subscriber.registerCallback(self._sync_sub_callback)
+
+    def _camera_info_callback(self, msg):
+        if len(self.camera_info) == 2:
+            return
+        if msg.k is not None and len(msg.k) > 0 and msg.k is not None and len(msg.k) > 0:
+            self.camera_info = [msg.k, msg.d]
+            self.event.set()
+
+    def _sync_sub_callback(self, color, depth):
+        with self._lock:
+            self._images_buffer.save_data(
+                position=self.position,
+                color=color,
+                depth=depth,
+                karr=self.camera_info[0],
+                darr=self.camera_info[1]
+            )
--- a/vision_detect/vision_detect/vision_core/core/components/image_providers/source_baseline.py
+++ b/vision_detect/vision_detect/vision_core/core/components/image_providers/source_baseline.py
@@ -0,0 +1,155 @@
+import time
+import threading
+from dataclasses import dataclass, field
+
+import cv2
+from numpy.typing import NDArray
+
+from cv_bridge import CvBridge
+
+from ...enum import Status
+from ...data_struct import ImageDataContainer
+from ...utils import image
+
+
+__all__ = ['SourceBaseline']
+
+@dataclass(slots=True)
+class _BufferData:
+    image_color: NDArray | None
+    image_depth: NDArray | None
+    karr: NDArray | list[float] | None
+    darr: NDArray | list[float] | None
+
+    def is_empty(self):
+        return (self.image_color is None or self.image_depth is None
+                or self.karr is None or self.darr is None)
+
+
+@dataclass(slots=True)
+class _ImageBuffer:
+    data_dict: dict[str, _BufferData] = field(default_factory=dict)
+
+    def __setitem__(self, key, value):
+        raise AttributeError
+
+    def __getitem__(self, key):
+        return self.data_dict[key]
+
+    def __contains__(self, key):
+        return key in self.data_dict
+
+    def __len__(self):
+        return len(self.data_dict)
+
+    def save_data(
+            self,
+            position: str,
+            *,
+            color: NDArray | None,
+            depth: NDArray | None,
+            karr: list[float] | None,
+            darr: list[float] | None,
+    ):
+        self.data_dict[position] = _BufferData(color, depth, karr, darr)
+
+
+class SourceBaseline:
+    def __init__(self, config):
+        self._images_buffer = _ImageBuffer()
+        self.cv_bridge = CvBridge()
+        self._lock = threading.Lock()
+        self._clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+
+        self.distortion_switch = config.get("distortion", False)
+        self.denoising_switch = config.get("denoising", False)
+        self.enhancement_switch = config.get("enhancement", False)
+        self.quality_switch = config.get("quality", False)
+        self.quality_threshold = config.get("quality_threshold", 100.0)
+
+    def get_images(self, positions: tuple[str, ...]) -> tuple[ImageDataContainer | None, int]:
+        time_start = time.time()
+        with self._lock:
+            image_data = ImageDataContainer()
+            if len(self._images_buffer) == 0:
+                return None, Status.NO_CAMERA_DATA
+
+            buffer_data_list = []
+            for position in positions:
+                if not (position in self._images_buffer):
+                    # image_data.add_data(position, Status.NO_POSITION_DATA)
+                    buffer_data_list.append(Status.NO_POSITION_DATA)
+                    continue
+
+                if self._images_buffer[position].is_empty():
+                    # image_data.add_data(position, Status.NO_POSITION_DATA)
+                    buffer_data_list.append(Status.NO_POSITION_DATA)
+                    continue
+
+                buffer_data_list.append(self._images_buffer[position])
+
+        time_1 = time.time()
+        valid_positions = 0
+        for data in buffer_data_list:
+            if data == Status.NO_POSITION_DATA:
+                image_data.add_data(position, Status.NO_POSITION_DATA)
+                continue
+
+            color_img_cv = self.cv_bridge.imgmsg_to_cv2(data.image_color, "bgr8")
+            depth_img_cv = self.cv_bridge.imgmsg_to_cv2(data.image_depth, '16UC1')
+
+            if self.quality_switch:
+                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+                laplacian_source = cv2.Laplacian(gray, cv2.CV_64F).var()
+                if laplacian_source < self.quality_threshold:
+                    image_data.add_data(position, Status.IMAGE_QUALITY_LOW)
+                    continue
+
+
+            # 畸变矫正
+            if self.distortion_switch:
+                camera_size = color_img_cv.shape[:2][::-1]
+                color_img_cv, depth_img_cv, k = image.distortion_correction(
+                    color_img_cv,
+                    depth_img_cv,
+                    data.karr,
+                    data.darr,
+                    camera_size
+                )
+            else:
+                k = data.karr
+
+            # 彩色图像双边滤波
+            if self.denoising_switch:
+                color_img_cv = cv2.bilateralFilter(color_img_cv, 9, 75, 75)
+
+            # 彩色图像增强
+            if self.enhancement_switch:
+                lab = cv2.cvtColor(color_img_cv, cv2.COLOR_BGR2LAB)
+                ch = cv2.split(lab)
+                avg_luminance = cv2.mean(ch[0])[0]
+                ch[0] = self._clahe.apply(ch[0])
+                current_avg_luminance = cv2.mean(ch[0])[0]
+                alpha = avg_luminance / current_avg_luminance
+                ch[0] = cv2.convertScaleAbs(ch[0], alpha=alpha, beta=0)
+                lab = cv2.merge(ch)
+                color_img_cv = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+
+            image_data.add_data(
+                position=position,
+                status=Status.SUCCESS,
+                color_image=color_img_cv,
+                depth_image=depth_img_cv,
+                karr=list(k),
+                darr=tuple(self._images_buffer[position].darr)
+            )
+            valid_positions += 1
+
+        time_end = time.time()
+
+        print(f"get_data:        {(time_1 - time_start) * 1000} ms")
+        print(f"img_cv_process:  {(time_end - time_1) * 1000} ms")
+        if valid_positions == 0:
+            return None, Status.NO_ALL_POSITION_DATA
+        else:
+            return image_data, Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/model.py
+++ b/vision_detect/vision_detect/vision_core/core/components/model.py
@@ -0,0 +1,3 @@
+from ...model import gsnet
+
+__all__ = ['gsnet']
--- a/vision_detect/vision_detect/vision_core/core/components/refiners/init.py
+++ b/vision_detect/vision_detect/vision_core/core/components/refiners/init.py
@@ -0,0 +1,2 @@
+from .fixed_orientation_refiner import *
+from .no_refiner import *
--- a/vision_detect/vision_detect/vision_core/core/components/refiners/fixed_orientation_refiner.py
+++ b/vision_detect/vision_detect/vision_core/core/components/refiners/fixed_orientation_refiner.py
@@ -0,0 +1,425 @@
+import numpy as np
+import transforms3d as tfs
+
+from ...enum import Status
+from ...utils import pointcloud, transforms
+from ...data_struct import ImageData, PoseData
+from .refiner_baseline import RefinerBaseline
+
+
+__all__ = ['FixedOrientationRefiner']
+
+def collision_detector(
+        points: np.ndarray,
+        refine: np.ndarray,
+        volume: list[float],
+        iou: float = 0.001,
+        search_mode: bool = False,
+        **kwargs
+) -> int:
+    """
+    collision detector
+    -----
+    input:
+        points: np.ndarray (3, n)
+        refine: np.ndarray (3, ), Grab target poes coordinate system
+        volume: list [left, right]
+        iou : float
+        search_mode: bool, Default False
+        **kwargs:
+            "grab_width": float
+            "hand_size": list [width, height, length]
+            "left_size": list [thick, width, length]
+            "right_size": list [thick, width, length]
+            thick of gripper finger, width of gripper finger, length of gripper finger
+    output:
+        collision_code: int
+    """
+    hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
+    left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
+    right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
+    grab_width = kwargs.get('grab_width', 0.10) * 0.95
+    x, y, z = refine
+
+    if not search_mode:
+        hand_top_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x - hand_size[1]*1/4))  & (points[0] > (x - hand_size[1]/2))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+        hand_center_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x + hand_size[1]*1/4))  & (points[0] > (x - hand_size[1]*1/4))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+        hand_bottom_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x + hand_size[1]/2)) & (points[0] > (x + hand_size[1]*1/4))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+
+        if (hand_top_box.any() and hand_bottom_box.any()) or hand_center_box.any():
+            return 1
+        if hand_bottom_box.any():
+            return 2
+        if hand_top_box.any():
+            return 3
+    else:
+        iou *= 0.5
+
+    left_finger_box = (
+        (points[2] < (z + left_size[2])) & (points[2] > z -0.05)
+        & (points[0] < (x + left_size[1]/2)) & (points[0] > (x - left_size[1]/2))
+        & (points[1] < (y + grab_width/2 + left_size[0])) & (points[1] > (y + grab_width/2))
+    )
+    right_finger_box = (
+        (points[2] < (z + right_size[2])) & (points[2] > z - 0.05)
+        & (points[0] < (x + right_size[1]/2)) & (points[0] > (x - right_size[1]/2))
+        & (points[1] < (y - grab_width/2)) & (points[1] > (y-(grab_width/2 + right_size[0])))
+    )
+
+    left_iou = left_finger_box.sum() / (volume[0]+1e-6)
+    right_iou = right_finger_box.sum() / (volume[1]+1e-6)
+
+    if left_iou > iou and right_iou > iou:
+        return 6
+
+    if left_iou > iou :
+        return 4
+    if right_iou > iou :
+        return 5
+
+    return 0
+
+
+def refine_grasp_pose(
+        points: np.ndarray,
+        voxel_size: float,
+        target_position: np.ndarray,
+        expect_orientation: np.ndarray | None = None,
+        search_mode: bool = False,
+        **kwargs
+):
+    """
+    refine grasp pose
+    -----
+    input:
+        points: np.ndarray (n, 3), already complete voxel downsample
+        voxel_size: float
+        target_position: np.ndarray (3, ), grab target position
+        expect_orientation: np.ndarray (3, 3), expect grab target orientation
+        search_mode: bool, Default False
+        **kwargs:
+            "grab_width": float
+            "hand_size": list [width, height, length]
+            "left_size": list [thick, width, length]
+            "right_size": list [thick, width, length]
+            thick of gripper finger, width of gripper finger, length of gripper finger
+    output:
+    """
+    if expect_orientation is None:
+        expect_orientation = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
+
+    refine = np.zeros(3)
+    grab_width = kwargs.get('grab_width', 0.10)
+    hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
+    left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
+    right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
+
+    left_volume = left_size[0] * left_size[1] * left_size[2] / (voxel_size**3)
+    right_volume = right_size[0] * right_size[1] * right_size[2] / (voxel_size**3)
+
+    points = points - target_position[None, :] # (n, 3) - (1, 3) = (n, 3)
+    points = expect_orientation.T @ points.T # (3, 3) @ (3, n) = (3, n)
+
+    points = points[:,
+        (points[2] < left_size[2]) & (points[2] > -(0.1 + hand_size[2]))
+        & (points[0] < (0.05 + hand_size[1]/2)) & (points[0] > -(0.05 + hand_size[1]/2))
+        & (points[1] < (0.05 + grab_width/2 + left_size[0]))
+        & (points[1] > -(0.05 + grab_width/2 + right_size[0]))
+    ]
+
+    frist_sign = False
+    y_n = y_p = False
+    left_last = right_last = False
+    hand_num = left_num = right_num = 0
+    while hand_num < 20 and left_num < 10 and right_num < 10:
+        collision_code = collision_detector(
+            points, refine, volume=[left_volume, right_volume], **kwargs
+        )
+
+        if collision_code == 0:
+            if y_p and not y_n:
+                refine[1] += 0.004
+                y_p = y_n = False
+                left_last = True
+                right_last = False
+                continue
+            if not y_p and y_n:
+                refine[1] -= 0.004
+                y_p = y_n = False
+                left_last = False
+                right_last = True
+                continue
+            if search_mode:
+                break
+            position = target_position + (expect_orientation @ refine.T).T
+            rmat = tfs.affines.compose(
+                np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
+            )
+            return rmat, Status.SUCCESS
+        if collision_code == 6:
+            return None, Status.REFINE_FAIL
+
+        if collision_code == 1:
+            refine[2] -= 0.008
+            hand_num += 1
+            print("z + 0.008")
+            continue
+
+        if collision_code == 2:
+            if frist_sign:
+                print("z + 0.004")
+                refine[2] -= 0.004
+                frist_sign = True
+                continue
+            refine[0] -= 0.002
+            hand_num += 1
+            print("x - 0.002")
+            continue
+
+        if collision_code == 3:
+            if frist_sign:
+                print("z + 0.004")
+                refine[2] -= 0.004
+                frist_sign = True
+                continue
+            refine[0] += 0.002
+            hand_num += 1
+            print("x + 0.002")
+            continue
+
+        step = 0.004
+        if collision_code == 4:
+            y_p = True
+            y_n = False
+            if not y_p and y_n:
+                refine[1] -= step
+                step /= 2
+                if step <= 0.001:
+                    return None, Status.REFINE_FAIL
+            refine[1] += step
+            left_num += 1
+            print("y + 0.004")
+            continue
+
+        if collision_code == 5:
+            y_p = False
+            y_n = True
+            if y_p and not y_n:
+                refine[1] += step
+                step /= 2
+                if step <= 0.001:
+                    return None, Status.REFINE_FAIL
+            refine[1] -= step
+            right_num += 1
+            print("y - 0.004")
+            continue
+    else:
+        return None, Status.REFINE_FAIL
+
+    # max_moves = 20
+    # step = 0.004
+    # x_moves, y_moves, z_moves = 0, 0, 0
+    # last_collision_direction = None  # 记录最后一次碰撞发生的方向
+    # first_up_collision = False  # 标记是否第一次发生上端碰撞
+    # first_down_collision = False  # 标记是否第一次发生下端碰撞
+    # last_left_position = None  # 记录左指碰撞前的位置
+    # last_right_position = None  # 记录右指碰撞前的位置
+    #
+    # while x_moves < max_moves and y_moves < max_moves and z_moves < max_moves:
+    #     # 每次进入循环时先进行碰撞检测
+    #     collision = collision_detector(points, refine, volume=[left_volume, right_volume], **kwargs)
+    #
+    #     # 如果没有碰撞，且上次碰撞是在左右方向，继续按方向移动
+    #     if collision == 0:
+    #         if last_collision_direction == "left":
+    #             refine[0] += step  # 向左指碰撞的方向移动
+    #         elif last_collision_direction == "right":
+    #             refine[0] -= step  # 向右指碰撞的方向移动
+    #         if search_mode:
+    #             break
+    #         position = target_position + (expect_orientation @ refine.T).T
+    #         rmat = tfs.affines.compose(
+    #             np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
+    #         )
+    #         return rmat, Status.SUCCESS
+    #
+    #     if collision == 1:  # 掌心或手掌上下两端同时碰撞
+    #         refine[2] -= step  # 在z方向调整位置
+    #         z_moves += 1
+    #     elif collision == 3:  # 手掌上端碰撞
+    #         if not first_up_collision:  # 第一次发生上端碰撞
+    #             refine[2] -= step
+    #             first_up_collision = True  # 记录第一次发生上端碰撞
+    #             z_moves += 1
+    #         elif x_moves < max_moves:  # 后续上端碰撞，调整x轴
+    #             refine[0] += step
+    #             x_moves += 1
+    #     elif collision == 2:  # 手掌下端碰撞
+    #         if not first_down_collision:  # 第一次发生下端碰撞
+    #             refine[2] -= step
+    #             first_down_collision = True  # 记录第一次发生下端碰撞
+    #             z_moves += 1
+    #         elif x_moves < max_moves:  # 后续下端碰撞，调整x轴
+    #             refine[0] -= step
+    #             x_moves += 1
+    #     elif collision == 6:  # 双指碰撞
+    #         return None
+    #
+    #     # 根据碰撞类型处理调整方向
+    #     elif collision == 4:  # 左指碰撞
+    #         if last_right_position is not None:
+    #             step /= 2  # 步长减半
+    #             if step <= 0.001:
+    #                 return None, Status.REFINE_FAIL
+    #             refine = last_right_position.copy() # 回到右指碰撞前的位置
+    #             last_right_position = None
+    #             last_collision_direction = "right"  # 设置为左指方向
+    #             refine[0] -= step
+    #             x_moves += 1
+    #             continue  # 继续进行下一次碰撞检测
+    #         last_collision_direction = "left"
+    #         last_left_position = refine.copy()  # 记录左指碰撞前的位置
+    #         refine[0] += step  # 向左指方向移动
+    #         x_moves += 1
+    #
+    #     elif collision == 5:  # 右指碰撞
+    #         if last_left_position is not None:
+    #             step /= 2
+    #             if step <= 0.001:
+    #                 return None, Status.REFINE_FAIL
+    #             refine = last_left_position.copy()  # 回到左指碰撞前的位置
+    #             last_left_position = None
+    #             last_collision_direction = "left"  # 设置为右指方向
+    #             refine[0] += step
+    #             x_moves += 1
+    #             continue  # 继续进行下一次碰撞检测
+    #         last_collision_direction = "right"
+    #         last_right_position = refine.copy()  # 记录右指碰撞前的位置
+    #         refine[0] -= step  # 向右指方向移动
+    #         x_moves += 1
+    # else:
+    #     return None, Status.REFINE_FAIL
+
+
+
+    if search_mode:
+        right_num = left_num = 0
+        # already in left side
+        if left_last and not right_last:
+            y_min = refine[1] - 0.004
+            while left_num < 10:
+                left_num += 1
+                refine[1] += 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] - 0.004 + y_min) / 2
+                    break
+            else:
+                refine[1] = y_min + 0.02
+            print(f"left_num = {left_num}")
+
+        elif not left_last and right_last:
+            # already in right side
+            y_max = refine[1] + 0.004
+            while right_num < 10:
+                right_num += 1
+                refine[1] -= 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] + 0.004 + y_max) / 2
+                    break
+            else:
+                refine[1] = y_max - 0.02
+            print(f"right_num = {right_num}")
+
+        elif not left_last and not right_last:
+            # in middle
+            y_cur = refine[1]
+            while left_num < 10:
+                left_num += 1
+                refine[1] += 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    y_max = refine[1] - 0.004
+                    break
+            else:
+                y_max = y_cur + 0.040
+            print(f"left_num = {left_num}")
+
+            refine[1] = y_cur
+            while right_num < 10:
+                right_num += 1
+                refine[1] -= 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] + 0.004 + y_max) / 2
+                    break
+            else:
+                refine[1] = (y_cur - 0.040 + y_max) / 2
+            print(f"right_num = {right_num}")
+
+        position = target_position + (expect_orientation @ refine.T).T
+        rmat = tfs.affines.compose(
+            np.squeeze(np.asarray(position)),
+            expect_orientation,
+            [1, 1, 1]
+        )
+        return rmat, Status.SUCCESS
+    return None, Status.REFINE_FAIL
+
+
+class FixedOrientationRefiner(RefinerBaseline):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+
+    def _refine(self, image_data: ImageData, pose_data: PoseData, calibration_mat: np.ndarray,
+                **kwargs) -> tuple[PoseData, int]:
+
+        image_size = image_data.depth_image.shape[:2][::-1]
+        full_pcd = pointcloud.create_o3d_pcd(
+            image_data.depth_image, image_size, image_data.karr, **self.config
+        )
+        full_points = np.asarray(full_pcd.points)
+
+        n = 0
+        for i, (status, pose_mat, grasp_width) in enumerate(pose_data):
+            if status != 0:
+                continue
+
+            pose_mat, CODE = refine_grasp_pose(
+                full_points, self.config.get("voxel_size"), pose_mat[0:3, 3], search_mode=True
+            )
+            if CODE != 0:
+                pose_data.set_data(i, CODE)
+                continue
+
+            pose_mat = calibration_mat @ pose_mat
+            quat = transforms.rmat2quat(pose_mat)
+            pose_data.set_data(i, Status.SUCCESS, quat, grasp_width)
+            n += 1
+
+        if n == 0:
+            return pose_data, Status.ALL_POSE_REFINE_FAILED
+
+        return pose_data, Status.SUCCESS
--- a/vision_detect/vision_detect/vision_core/core/components/refiners/no_refiner.py
+++ b/vision_detect/vision_detect/vision_core/core/components/refiners/no_refiner.py
@@ -0,0 +1,29 @@
+from typing import Optional, Tuple
+
+import numpy as np
+
+from ...enum import Status
+from .refiner_baseline import RefinerBaseline
+from ...data_struct import ImageDataContainer, PoseData
+from ...utils import transforms
+
+__all__ = ["NoRefiner"]
+
+SUCCESS = 0
+
+class NoRefiner(RefinerBaseline):
+    def __init__(self, config):
+        super().__init__()
+
+    def _refine(self, image_data: Optional[ImageDataContainer], pose_data: Optional[PoseData],
+                calibration_mat: Optional[np.ndarray], **kwargs) -> Tuple[Optional[PoseData], int]:
+        for i, (status, pose_mat, grasp_width) in enumerate(pose_data):
+            if status != 0:
+                continue
+
+            pose_mat = calibration_mat @ pose_mat
+            quat = transforms.rmat2quat(pose_mat)
+            pose_data.set_data(i, Status.SUCCESS, quat, grasp_width)
+
+        return pose_data, Status.SUCCESS
+
--- a/vision_detect/vision_detect/vision_core/core/components/refiners/refiner_baseline.py
+++ b/vision_detect/vision_detect/vision_core/core/components/refiners/refiner_baseline.py
@@ -0,0 +1,34 @@
+from numpy.typing import NDArray
+
+from ...data_struct import ImageData, ImageDataContainer, PoseData
+
+
+__all__ = ['RefinerBaseline']
+
+class RefinerBaseline:
+    def __init__(self):
+        pass
+
+    def _refine(
+            self,
+            image_data:ImageData,
+            pose_data: PoseData,
+            calibration_mat: NDArray,
+            **kwargs
+    ) -> tuple[PoseData | None, int]:
+        pass
+
+    def get_refine(
+            self,
+            position: str,
+            image_data_container:ImageDataContainer,
+            pose_data: PoseData,
+            calibration_mat_dict: dict[str, NDArray],
+            **kwargs
+    ) -> tuple[PoseData | None, int]:
+        return self._refine(
+            image_data=image_data_container[position],
+            pose_data=pose_data,
+            calibration_mat=calibration_mat_dict[position],
+            **kwargs
+        )
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
liangyuxuan	867606dd68	add 预处理模块测试节点、图像质量判断功能	2026-03-06 15:07:34 +08:00
liangyuxuan	0bcf24e0ca	增加相机控制接口，图像预处理部分功能	2026-03-06 10:07:31 +08:00
liangyuxuan	d960da1192	框架调整(部分)，暂无crossboard-color-icp-e2e，添加了动作服务节点及动作客户端节点	2026-02-06 18:09:37 +08:00
liangyuxuan	6ebf159234	1	2026-01-23 11:47:45 +08:00
liangyuxuan	86e5ad76fc	1	2026-01-23 11:39:30 +08:00
liangyuxuan	54981b3892	1	2026-01-23 11:36:09 +08:00
liangyuxuan	1602b1423d	1	2026-01-21 16:06:22 +08:00
liangyuxuan	b04196de57	1	2026-01-21 09:13:06 +08:00
liangyuxuan	611b529075	修改抓取位置微调	2026-01-19 14:06:09 +08:00
liangyuxuan	039bb2d165	添加抓取位置微调、检测图片和原图片保存	2026-01-15 17:39:45 +08:00
liangyuxuan	a59776e6c2	增加end2end方式计算位姿	2026-01-12 09:57:30 +08:00
liangyuxuan	f937329e4b	增加end2end方式计算位姿	2026-01-12 09:48:58 +08:00
liangyuxuan	6fbf13e232	增加end2end方式计算位姿	2026-01-12 09:48:54 +08:00
liangyuxuan	cd4bbdc5f3	1	2025-12-30 17:34:23 +08:00
liangyuxuan	97c142f6e3	更改去噪参数	2025-12-30 13:22:00 +08:00
liangyuxuan	eeedd4bfc6	1	2025-12-28 16:24:16 +08:00
liangyuxuan	08319d9eba	1	2025-12-28 16:19:49 +08:00
liangyuxuan	3025b5a23a	采集的深度数据更改为.png存储	2025-12-28 13:57:37 +08:00
liangyuxuan	93f3a8d45e	移动文件位置	2025-12-27 16:47:46 +08:00
liangyuxuan	12fed65d8a	更改深度raw的保存方式为.zip	2025-12-27 16:45:55 +08:00
liangyuxuan	87a5697e75	迁移数据采集节点至vision_tools，修改点下采样参数	2025-12-27 11:08:53 +08:00
liangyuxuan	8279687f4c	修正数据采集节点	2025-12-25 15:39:13 +08:00
liangyuxuan	aff813b107	检测物体排序修正	2025-12-24 19:48:16 +08:00
liangyuxuan	a5f340bc35	添加相机数据采集服务客户端	2025-12-24 17:51:56 +08:00
liangyuxuan	d730325a0d	添加相机数据采集服务客户端	2025-12-24 17:51:03 +08:00
liangyuxuan	2e4cb18fa6	添加相机数据采集服务客户端	2025-12-24 17:33:26 +08:00
liangyuxuan	facd43f54f	添加相机数据采集节点	2025-12-24 16:44:06 +08:00
liangyuxuan	59b90201e4	设置检测输出顺序为：左->右，下->上	2025-12-24 10:42:44 +08:00
liangyuxuan	db330ec33c	1	2025-12-23 18:25:22 +08:00
liangyuxuan	d62faaa9cc	1	2025-12-23 15:13:01 +08:00
liangyuxuan	eec02de5a7	删除多余文件	2025-12-23 09:57:52 +08:00
liangyuxuan	b8444c82b9	头部相机标定文件	2025-12-19 15:50:59 +08:00
liangyuxuan	727b191a2a	删除物体自身深度估计	2025-12-17 10:50:23 +08:00
liangyuxuan	f2f93deb4a	1	2025-12-17 09:46:09 +08:00
liangyuxuan	46ac61a62a	删除物体自身深度估计	2025-12-16 17:24:08 +08:00
liangyuxuan	3b8112deab	1	2025-12-12 15:53:46 +08:00
liangyuxuan	fbe4d80807	1	2025-12-12 15:14:53 +08:00
liangyuxuan	c1b93468fa	1	2025-12-12 09:58:57 +08:00
liangyuxuan	151726eedb	add tools	2025-12-11 17:40:31 +08:00
liangyuxuan	2df01a41b1	update checkpoint	2025-12-11 16:08:44 +08:00
liangyuxuan	47e14725bc	fix	2025-12-11 10:34:53 +08:00
liangyuxuan	1232fd5ce9	fix	2025-12-11 10:34:33 +08:00
liangyuxuan	bce708ed05	抓取宽度设置为物体y轴方向	2025-12-10 14:42:12 +08:00
liangyuxuan	1702ed2fcb	add medical sense file	2025-12-10 14:08:38 +08:00
liangyuxuan	3e78f31a84	优化参数	2025-12-10 09:51:51 +08:00
liangyuxuan	b593cc628d	优化参数	2025-12-10 09:51:38 +08:00
liangyuxuan	f6a9b6c8fb	修改抓取宽度计算方法	2025-12-09 17:51:43 +08:00
liangyuxuan	e27acec63e	add tools	2025-12-08 17:23:26 +08:00
liangyuxuan	efcd598565	结构变更	2025-12-05 14:28:10 +08:00
liangyuxuan	927f3c728c	简化配置方式，修正部分错误	2025-12-04 16:19:40 +08:00
liangyuxuan	f433d4428f	1	2025-12-04 11:14:03 +08:00
liangyuxuan	c0a1b4e6e0	滤波参数优化，添加对不同尺寸点云的自适应处理	2025-12-04 11:10:56 +08:00
liangyuxuan	ef093e2813	.py导出为.onnx	2025-12-03 18:15:38 +08:00
liangyuxuan	fe71dab13a	.py导出为.onnx	2025-12-03 18:06:55 +08:00
liangyuxuan	3ed1182472	点云滤波参数优化	2025-12-03 17:33:41 +08:00