1

2025-11-27 16:27:11 +08:00
parent 101857b8c4
commit 694272080a
3 changed files with 91 additions and 77 deletions
--- a/vision_detect/vision_detect/VisionDetect/node/detect_node.py
+++ b/vision_detect/vision_detect/VisionDetect/node/detect_node.py
@@ -1,3 +1,4 @@
+"""Vision Detect Node"""
 import os
 import time
 import threading
@@ -24,28 +25,46 @@ from interfaces.msg import PoseClassAndID, PoseArrayClassAndID, ImgMsg
 from interfaces.srv import VisionObjectRecognition


-from ..utils import calculate_pose_pca, calculate_pose_icp, get_map, distortion_correction, crop_imgs_box_xywh, \
+from ..utils import calculate_pose_pca, calculate_pose_icp, get_map, \
+    distortion_correction, crop_imgs_box_xywh, \
    draw_box, draw_mask, crop_imgs_mask, calculate_grav_width

 share_dir = get_package_share_directory('vision_detect')


 class DetectNode(Node):
+    """Detect Node"""
    def __init__(self, name):
        super().__init__(name)
        self.device = None
        self.checkpoint_path = None
        self.output_boxes = None
        self.output_masks = None
+
        self.function = None
        self.source = None
-        self.K = None
-        self.eye_in_hand_mat = None
+
+        self.server = None
+        self.model = None
+
+        self.k = self.d = None
+        self.eye_in_left_hand_mat = None
+        self.eye_in_right_hand_mat = None
        self.eye_to_hand_mat = None
+        self.hand_eye_mat = None
+
+        self.eye_in_left_hand_path = None
+        self.eye_in_right_hand_path = None
+        self.eye_to_hand_path = None
+
        self.get_camera_mode = None
        self.detect_mode = None
        self.calculate_mode = None

+        self.camera_size = None
+
+        self.map1 = self.map2 = None
+
        self.calculate_function = calculate_pose_pca
        self.fx = self.fy = 0.5
        self.camera_data = {}
@@ -158,7 +177,7 @@ class DetectNode(Node):
            self.get_logger().warning(f"{self.eye_in_left_hand_path} not found, use E(4, 4)")
            self.eye_in_left_hand_mat = np.eye(4)
        else:
-            with open(self.eye_in_left_hand_path, "r") as f:
+            with open(self.eye_in_left_hand_path, "r", encoding="utf-8") as f:
                self.eye_in_left_hand_mat = np.array(json.load(f)["T"])
                if self.eye_in_left_hand_mat.shape != (4, 4):
                    self.get_logger().warning("The shape of eye_in_right_hand_mat is wrong, use E(4, 4)")
@@ -168,7 +187,7 @@ class DetectNode(Node):
            self.get_logger().warning(f"{self.eye_in_right_hand_path} not found, use E(4, 4)")
            self.eye_in_right_hand_mat = np.eye(4)
        else:
-            with open(self.eye_in_right_hand_path, "r") as f:
+            with open(self.eye_in_right_hand_path, "r", encoding="utf-8") as f:
                self.eye_in_right_hand_mat = np.array(json.load(f)["T"])
                if self.eye_in_right_hand_mat.shape != (4, 4):
                    self.get_logger().warning("The shape of eye_in_right_hand_mat is wrong, use E(4, 4)")
@@ -178,7 +197,7 @@ class DetectNode(Node):
            self.get_logger().warning(f"{self.eye_to_hand_path} not found, use E(4, 4)")
            self.eye_to_hand_mat = np.eye(4)
        else:
-            with open(self.eye_to_hand_path, "r") as f:
+            with open(self.eye_to_hand_path, "r", encoding="utf-8") as f:
                self.eye_to_hand_mat = np.array(json.load(f)["T"])
                if self.eye_to_hand_mat.shape != (4, 4):
                    self.get_logger().warning("The shape of eye_to_hand_mat is wrong, use E(4, 4)")
@@ -193,7 +212,7 @@ class DetectNode(Node):
            self.model = YOLO(self.checkpoint_path).to(device_model)
        except Exception as e:
            self.get_logger().error(f'Failed to load YOLO model: {e}')
-            raise
+            raise 
        self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')

        if self.checkpoint_path.endswith('-seg.pt'):
@@ -201,7 +220,7 @@ class DetectNode(Node):
        else:
            self.function = None
            self.get_logger().fatal(f'Unknown checkpoint: {self.checkpoint_path}')
-            raise
+            raise ValueError(f'Unknown checkpoint: {self.checkpoint_path}')
        self.get_logger().info("Initialize model done")

    def _init_publisher(self):
@@ -244,7 +263,7 @@ class DetectNode(Node):
            rclpy.spin_until_future_complete(self, self.future)
            self.get_logger().info("Camera info received.")

-            '''sync get color and depth img'''
+            # sync get color and depth img
            self.sub_color_image = Subscriber(self, Image, self.color_image_topic_name)
            self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic_name)

@@ -267,17 +286,18 @@ class DetectNode(Node):

    def _camera_info_callback(self, msg: CameraInfo):
        """Get camera info"""
-        self.K = msg.k
-        self.D = msg.d
+        self.k = msg.k
+        self.d = msg.d

        self.camera_size = [msg.width, msg.height]

-        if self.K is not None and len(self.K) > 0 and self.D is not None and len(self.D) > 0:
-            self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
-            if not self.future.done(): self.future.set_result(True)
+        if self.k is not None and len(self.k) > 0 and self.d is not None and len(self.d) > 0:
+            self.map1, self.map2, self.k = get_map(msg.k, msg.d, self.camera_size)
+            if not self.future.done(): 
+                self.future.set_result(True)
            self.destroy_subscription(self.sub_camera_info)
        else:
-            self.get_logger().warning("K and D are not defined")
+            self.get_logger().warning("K and d are not defined")

    def _service_sub_callback(self, msgs):
        """同步回调函数"""
@@ -298,7 +318,7 @@ class DetectNode(Node):

        img, pose_list = self.function(color_img_cv, depth_img_cv)

-        """masks为空，结束这一帧"""
+        # masks为空，结束这一帧
        if img is None:
            img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")

@@ -326,7 +346,7 @@ class DetectNode(Node):

        with self.lock:
            if request.camera_position in self.camera_data:
-                color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
+                color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position]
            else:
                response.success = False
                response.info = f"{request.camera_position} Camera data is empty or name is wrong"
@@ -346,12 +366,12 @@ class DetectNode(Node):
        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')

-        map1, map2, self.K = get_map(self.K, D, self.camera_size)
+        map1, map2, self.k = get_map(self.k, d, self.camera_size)
        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)

        img, pose_list = self.function(color_img_cv, depth_img_cv)

-        """masks为空，结束这一帧"""
+        # masks为空，结束这一帧
        if self.output_boxes or self.output_masks:
            if img is None:
                img = color_img_ros
@@ -369,32 +389,29 @@ class DetectNode(Node):
                        grab_width = item["grab_width"]
                    )
                )
-
-            return response
-
        else:
            response.info = "pose dict is empty"
            response.success = False
            response.objects = []

-            return response
+        return response

    def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
        """Use segmentation model"""
        pose_list = []

-        '''Get Predict Results'''
+        # Get Predict Results
        time1 = time.time()
        results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
        time2 = time.time()
        result = results[0]

-        '''Get masks'''
+        # Get masks
        if result.masks is None or len(result.masks) == 0:
            return None, None
        masks = result.masks.data.cpu().numpy()

-        '''Get boxes'''
+        # Get boxes
        boxes = result.boxes.xywh.cpu().numpy()
        class_ids = result.boxes.cls.cpu().numpy()
        labels = result.names
@@ -411,10 +428,10 @@ class DetectNode(Node):
            intrinsics = o3d.camera.PinholeCameraIntrinsic(
                int(self.camera_size[0]),
                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
+                self.k[0],
+                self.k[4],
+                self.k[2] - x_min,
+                self.k[5] - y_min
            )

            x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -426,7 +443,7 @@ class DetectNode(Node):
                self.configs
            )

-            grab_width = calculate_grav_width(mask, self.K, z)
+            grab_width = calculate_grav_width(mask, self.k, z)
            z = z + grab_width * 0.45

            if (x, y, z) != (0.0, 0.0, 0.0):
@@ -444,14 +461,14 @@ class DetectNode(Node):

        time4 = time.time()

-        self.get_logger().info(f'start')
+        self.get_logger().info('start')
        self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
        self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
        self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
        self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
-        self.get_logger().info(f'end')
+        self.get_logger().info('end')

-        '''mask_img and box_img is or not output'''
+        # mask_img and box_img is or not output
        if self.output_boxes and not self.output_masks:
            draw_box(rgb_img, result)
            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
@@ -492,10 +509,10 @@ class DetectNode(Node):
        intrinsics = o3d.camera.PinholeCameraIntrinsic(
            int(self.camera_size[0]),
            int(self.camera_size[1]),
-            self.K[0],
-            self.K[4],
-            self.K[2] - x_min,
-            self.K[5] - y_min
+            self.k[0],
+            self.k[4],
+            self.k[2] - x_min,
+            self.k[5] - y_min
        )

        x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -554,10 +571,10 @@ class DetectNode(Node):
            intrinsics = o3d.camera.PinholeCameraIntrinsic(
                int(self.camera_size[0]),
                int(self.camera_size[1]),
-                self.K[0],
-                self.K[4],
-                self.K[2] - x_min,
-                self.K[5] - y_min
+                self.k[0],
+                self.k[4],
+                self.k[2] - x_min,
+                self.k[5] - y_min
            )

            x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -600,8 +617,8 @@ class DetectNode(Node):
                2
            )
            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
-        else:
-            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
+            
+        return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None


 if __name__ == '__main__':
@@ -613,4 +630,4 @@ if __name__ == '__main__':
        pass
    finally:
        node.destroy_node()
-        rclpy.shutdown()
+        rclpy.shutdown()
--- a/vision_test/include/vision_test/VisionDetectNode.hpp
+++ b/vision_test/include/vision_test/VisionDetectNode.hpp
@@ -12,19 +12,16 @@
 #include "interfaces/srv/vision_object_recognition.hpp"
 #include "interfaces/msg/pose_array_class_and_id.hpp"

-using json = nlohmann::json;
-using SyncPolicy=message_filters::sync_policies::ApproximateTime<sensor_msgs::msg::Image, sensor_msgs::msg::Image>;
-

 class VisionDetectNode: public rclcpp::Node {
    public: explicit VisionDetectNode(const std::string& name);
    
    private:
        bool output_box, output_mask;
-        std::string GetCameraMode, DetectMode, CalculateMode;
+        std::string get_camera_mode, detect_mode, calculate_mode;
        std::string cfg_str;
-        json calibrition_path; 
-        json GetCameraConfigs, DetectConfigs, CalculateConfigs;
+        nlohmann::json calibrition_path; 
+        nlohmann::json get_camera_configs, segmentation_configs, calculate_configs;

        Eigen::Matrix4d left_hand_mat, right_hand_mat, head_mat;

@@ -36,7 +33,7 @@ class VisionDetectNode: public rclcpp::Node {

        message_filters::Subscriber<sensor_msgs::msg::Image> color_img_sub;
        message_filters::Subscriber<sensor_msgs::msg::Image> depth_img_sub;
-        std::shared_ptr<message_filters::Synchronizer<SyncPolicy>> sync_sub;
+        std::shared_ptr<message_filters::Synchronizer<message_filters::sync_policies::ApproximateTime<sensor_msgs::msg::Image, sensor_msgs::msg::Image>>> sync_sub;

        rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr camera_info_sub;

--- a/vision_test/src/VisionDetectNode.cpp
+++ b/vision_test/src/VisionDetectNode.cpp
@@ -13,38 +13,38 @@ VisionDetectNode::VisionDetectNode(const std::string& name): Node(name) {
        this->init_publisher();
    }

-    if (this->GetCameraMode == "Service") {
-        this->GetCameraConfigs = json::parse(this->get_parameter("ServiceConfigs").as_string());
+    if (this->get_camera_mode == "Service") {
+        this->get_camera_configs = json::parse(this->get_parameter("ServiceConfigs").as_string());
        this->init_service();
        this->init_subscriber();
    }
-    else if (this->GetCameraMode == "Topic") {
-        this->GetCameraConfigs = json::parse(this->get_parameter("TopicConfigs").as_string());
+    else if (this->get_camera_mode == "Topic") {
+        this->get_camera_configs = json::parse(this->get_parameter("TopicConfigs").as_string());
        this->init_topic();
    }
    else {
-        RCLCPP_WARN(this->get_logger(), "Unkonw GetCameraMode, Use Service");
-        this->GetCameraConfigs = json::parse(this->get_parameter("ServiceConfigs").as_string());
+        RCLCPP_WARN(this->get_logger(), "Unkonw get_camera_mode, Use Service");
+        this->get_camera_configs = json::parse(this->get_parameter("ServiceConfigs").as_string());
        this->init_service();
        this->init_subscriber();
    }

-    if (this->DetectMode == "Detect") {
+    if (this->detect_mode == "Detect") {
        this->init_model();
-        this->DetectConfigs = json::parse(this->get_parameter("DetectConfigs").as_string());
+        this->segmentation_configs = json::parse(this->get_parameter("DetectConfigs").as_string());
        this->detect_function = &VisionDetectNode::seg_img;
    }
-    else if (this->DetectMode == "Color") {
-        this->DetectConfigs = json::parse(this->get_parameter("ColorConfigs").as_string());
+    else if (this->detect_mode == "Color") {
+        this->segmentation_configs = json::parse(this->get_parameter("ColorConfigs").as_string());
        this->detect_function = &VisionDetectNode::seg_color;
    }
-    else if (this->DetectMode == "Crossboard") {
-        this->DetectConfigs = json::parse(this->get_parameter("CrossboardConfigs").as_string());
+    else if (this->detect_mode == "Crossboard") {
+        this->segmentation_configs = json::parse(this->get_parameter("CrossboardConfigs").as_string());
        this->detect_function = &VisionDetectNode::seg_crossboard;
    }
    else {
-        RCLCPP_WARN(this->get_logger(), "Unkonw DetectMode, Use Service");                
-        this->DetectConfigs = json::parse(this->get_parameter("DetectConfigs").as_string());
+        RCLCPP_WARN(this->get_logger(), "Unkonw detect_mode, Use Service");                
+        this->segmentation_configs = json::parse(this->get_parameter("DetectConfigs").as_string());
    }
    RCLCPP_INFO(this->get_logger(), "Init process done");
 }
@@ -58,13 +58,13 @@ void VisionDetectNode::init_param() {
    this->output_box = this->get_parameter("output_box").as_bool();
    this->output_mask = this->get_parameter("output_mask").as_bool();

-    this->declare_parameter<std::string>("GetCameraMode", "Service");
-    this->declare_parameter<std::string>("DetectMode", "Detect");
-    this->declare_parameter<std::string>("CalculateMode", "PCA");
+    this->declare_parameter<std::string>("get_camera_mode", "Service");
+    this->declare_parameter<std::string>("detect_mode", "Detect");
+    this->declare_parameter<std::string>("calculate_mode", "PCA");

-    this->GetCameraMode = this->get_parameter("GetCameraMode").as_string();
-    this->DetectMode = this->get_parameter("DetectMode").as_string();
-    this->CalculateMode = this->get_parameter("CalculateMode").as_string();
+    this->get_camera_mode = this->get_parameter("get_camera_mode").as_string();
+    this->detect_mode = this->get_parameter("detect_mode").as_string();
+    this->calculate_mode = this->get_parameter("calculate_mode").as_string();

    this->declare_parameter<std::string>("calibrition_path", "{}");

@@ -116,7 +116,7 @@ void VisionDetectNode::init_calibration_mat() {
 void VisionDetectNode::init_service() {
    /*Init_service*/
    detect_server = this->create_service<interfaces::srv::VisionObjectRecognition>(
-        this->GetCameraConfigs["service_name"].get<std::string>(), 
+        this->get_camera_configs["service_name"].get<std::string>(), 
        std::bind(
            &VisionDetectNode::service_callback, 
            this, 
@@ -132,8 +132,8 @@ void VisionDetectNode::init_service() {

 void VisionDetectNode::init_topic() {
    /*Init Topic*/
-    this->color_img_sub.subscribe(this, this->GetCameraConfigs["color_image_topic_name"].get<std::string>());
-    this->depth_img_sub.subscribe(this, this->GetCameraConfigs["depth_image_topic_name"].get<std::string>());
+    this->color_img_sub.subscribe(this, this->get_camera_configs["color_image_topic_name"].get<std::string>());
+    this->depth_img_sub.subscribe(this, this->get_camera_configs["depth_image_topic_name"].get<std::string>());
    this->sync_sub = std::make_shared<message_filters::Synchronizer<SyncPolicy>>(
        SyncPolicy(10), 
        color_img_sub, depth_img_sub
@@ -142,7 +142,7 @@ void VisionDetectNode::init_topic() {
    this->sync_sub->registerCallback(&VisionDetectNode::topic_callback, this);

    this->camera_info_sub = this->create_subscription<sensor_msgs::msg::CameraInfo>(
-        this->GetCameraConfigs["camera_info_topic_name"].get<std::string>(), 
+        this->get_camera_configs["camera_info_topic_name"].get<std::string>(), 
        10,
        std::bind(&VisionDetectNode::camera_info_callback, this, std::placeholders::_1)
    );
@@ -156,7 +156,7 @@ void VisionDetectNode::init_publisher() {
    /*Init Publisher*/
    pub_detect_img = this->create_publisher<sensor_msgs::msg::Image>("/image/detect_image", 10);

-    if (GetCameraMode == "Topic") {
+    if (get_camera_mode == "Topic") {
        pub_pose_list = this->create_publisher<interfaces::msg::PoseArrayClassAndID>("/pose/cv_detect_pose", 10);
    }