修正检测节点计算的位置误差

2025-09-11 18:12:37 +08:00
parent ef16f02f0f
commit 7348a99a53
1 changed files with 94 additions and 51 deletions
--- a/HiveCoreR0/src/robot_vision/detect_part/detect_part/detect.py
+++ b/HiveCoreR0/src/robot_vision/detect_part/detect_part/detect.py
@@ -1,5 +1,6 @@
 import os
 from collections import defaultdict
+import time

 import cv2
 import open3d as o3d
@@ -7,6 +8,8 @@ import numpy as np
 import transforms3d as tfs
 from cv_bridge import CvBridge

+# from concurrent.futures import ProcessPoolExecutor
+
 import torch
 from detect_part.ultralytics import YOLO

@@ -21,18 +24,32 @@ from interfaces.msg import PoseWithID, PoseWithIDArray
 import detect_part


+# def calculate_pose_for_mask(args):
+#     mask, orig_shape, rgb_img, depth_img, class_id, label, camera_size, K = args
+#     intrinsics = o3d.camera.PinholeCameraIntrinsic(
+#         camera_size[0],
+#         camera_size[1],
+#         K[0],
+#         K[4],
+#         K[2],
+#         K[5]
+#     )
+#     x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, intrinsics)
+#     return class_id, label, x, y, z, roll, pitch, yaw
+
+
 def pca(data, sort=True):
-    # center = data.get_center()  # 求 NX3 向量的均值
-    data_np = np.asarray(data.points)
-    center = np.mean(data_np, axis=0)
-    centered_points = data_np - center    # 去中心化
+    center = np.mean(data, axis=0)
+    centered_points = data - center    # 去中心化

    # 计算特征值和特征向量
    # cov_matrix = np.dot(centered_points.T,centered_points)
    # eigenvectors, eigenvalues, eigenvectors_T = np.linalg.svd(cov_matrix)
-
-    cov_matrix = np.cov(centered_points.T)  # 注意要转置
-    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
+    try:
+        cov_matrix = np.cov(centered_points.T)  # 注意要转置
+        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
+    except np.linalg.LinAlgError:
+        return None, None

    if sort:
        sort = eigenvalues.argsort()[::-1]  # 降序排列
@@ -42,18 +59,14 @@ def pca(data, sort=True):
    return eigenvalues, eigenvectors


-def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray, intrinsics): # , mask_set):
+def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray, intrinsics):
    """计算位态"""
    mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-
    depth_img_mask = np.zeros_like(depth_img)
    depth_img_mask[mask > 0] = depth_img[mask > 0]
-    rgb_img_mask = np.zeros_like(rgb_img)
-    rgb_img_mask[mask > 0] = rgb_img[mask > 0]
-    # rgb_img_mask[mask_set > 0] = rgb_img_mask[mask_set > 0] + np.array([0, 0, 255]) * 0.5

    depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))  # 转成毫米整数
-    color_o3d = o3d.geometry.Image(rgb_img_mask.astype(np.uint8))
+    color_o3d = o3d.geometry.Image(rgb_img.astype(np.uint8))

    rgbd_img = o3d.geometry.RGBDImage.create_from_color_and_depth(
        color=color_o3d, depth=depth_o3d,
@@ -65,38 +78,51 @@ def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray,
    point_cloud = point_cloud.remove_non_finite_points()

    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
-
    clean_pcd, ind = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
-    center = point_cloud.get_center()
+
+    if clean_pcd.points == 0:
+        return 0.0, 0.0, 0.0, None, None, None
+    center = clean_pcd.get_center()
    x, y, z = center

-    w, v = pca(clean_pcd)
-    vx, vy, vz = v[:,0], v[:,1], v[:,2]
+    w, v = pca(np.asarray(clean_pcd.points))

-    if vx[0] < 0:
-        vx = -vx
-    if vy[1] < 0:
-        vy = -vy
-    if not np.allclose(np.cross(vx, vy), vz):
-        vz = -vz
+    if w is not None and v is not None:
+        vx, vy, vz = v[:,0], v[:,1], v[:,2]

-    R = np.column_stack((vx, vy, vz))
-    rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
+        if vx[0] < 0:
+            vx = -vx
+        if vy[1] < 0:
+            vy = -vy
+        if not np.allclose(np.cross(vx, vy), vz):
+            vz = -vz

-    roll, pitch, yaw = tfs.euler.mat2euler(rmat)
+        R = np.column_stack((vx, vy, vz))
+        rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])

-    # point = [[x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
-    #          [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]]  # 画点：原点、第一主成分、第二主成分
-    # lines = [[0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]]  # 画出三点之间两两连线
-    # colors = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]
+        roll, pitch, yaw = tfs.euler.mat2euler(rmat)

-    # # 构造open3d中的LineSet对象，用于主成分显示
-    # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
-    # line_set.colors = o3d.utility.Vector3dVector(colors)
-    # o3d.visualization.draw_geometries([point_cloud, line_set])
-    # o3d.visualization.draw_geometries([clean_pcd, line_set])
+        # point = [
+        #     [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
+        #     [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
+        #     ]  # 画点：原点、第一主成分、第二主成分
+        # lines = [
+        #     [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
+        # ]  # 画出三点之间两两连线
+        # colors = [
+        #     [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
+        # ]
+        #
+        # # 构造open3d中的LineSet对象，用于主成分显示
+        # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
+        # line_set.colors = o3d.utility.Vector3dVector(colors)
+        # o3d.visualization.draw_geometries([point_cloud, line_set])
+        # # o3d.visualization.draw_geometries([clean_pcd, line_set])

-    return x, y, z, roll, pitch, yaw
+        return x, y, z, roll, pitch, yaw
+
+    else:
+        return 0.0, 0.0, 0.0, None, None, None


 def draw_box(set_confidence, rgb_img, result):
@@ -244,7 +270,6 @@ class DetectNode(Node):

    def _camera_info_callback(self, msg: CameraInfo):
        """Get camera info"""
-        camera_size = [msg.width, msg.height]
        self.K = msg.k
        self.D = msg.d

@@ -254,8 +279,8 @@ class DetectNode(Node):
        if self.K is not None and self.D is not None:
            self.map1, self.map2, self.K = self._get_map(msg.k)
            self.intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                camera_size[0],
-                camera_size[1],
+                self.camera_size[0],
+                self.camera_size[1],
                self.K[0],
                self.K[4],
                self.K[2],
@@ -313,7 +338,9 @@ class DetectNode(Node):
        pose_dict = defaultdict(list)

        '''Get Predict Results'''
+        # time1 = time.time()
        results = self.model(rgb_img)
+        # time2 = time.time()
        result = results[0]

        '''Get masks'''
@@ -327,21 +354,46 @@ class DetectNode(Node):
        class_ids = result.boxes.cls.cpu().numpy()
        labels = result.names

+        # time3 = time.time()
+
+        # tasks = [
+        #     (mask, orig_shape, rgb_img, depth_img, int(class_ids[i]), labels[class_ids[i]], self.camera_size, self.K)
+        #     for i, (mask, conf) in enumerate(zip(masks, confidences))
+        #     if conf >= self.set_confidence  # 置信度低的不加入任务
+        # ]
+        # with ProcessPoolExecutor(max_workers=4) as executor:
+        #     results = list(executor.map(calculate_pose_for_mask, tasks))
+        #
+        # for res in results:
+        #     class_id, label, x, y, z, roll, pitch, yaw = res
+        #     pose = Pose()
+        #     pose.position = Point(x=x, y=y, z=z)
+        #     pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
+        #     pose_dict[(class_id, label)].append(pose)
+
        for i, mask in enumerate(masks):
            '''Confidence Filter'''
            if confidences[i] >= self.set_confidence:
                x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, self.intrinsics)
-                cv2.circle(rgb_img, (int(x), int(y)), 5, (0, 0, 255), 2)
-
                if (x, y, z) != (0.0, 0.0, 0.0):
                    pose = Pose()
                    pose.position = Point(x=x, y=y, z=z)
                    pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
                    pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)

+        # time4 = time.time()
+
        '''mask_img and box_img is or not output'''
        if self.output_boxes and not self.output_masks:
            draw_box(self.set_confidence, rgb_img, result)
+            # time5 = time.time()
+            # self.get_logger().info(f'start')
+            # self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
+            # self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
+            # self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
+            # self.get_logger().info(f'{(time5 - time4)*1000} ms, draw box')
+            # self.get_logger().info(f'{(time5 - time1)*1000} ms, completing a picture entire process')
+            # self.get_logger().info(f'end')
            return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
        elif self.output_boxes and self.output_masks:
            draw_box(self.set_confidence, rgb_img, result)
@@ -377,18 +429,9 @@ class DetectNode(Node):
                    ], dtype=np.int32)
                    cv2.fillConvexPoly(mask, pts, 1)

-            # mask_set = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
-            # pts_set = np.array([
-            #     corners_subpix[0, 0],
-            #     corners_subpix[0, 1],
-            #     corners_subpix[1, 1],
-            #     corners_subpix[1, 0]
-            # ], dtype=np.int32)
-            # cv2.fillConvexPoly(mask_set, pts_set, 1)
-
            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
            orig_shape = rgb_img_gray.shape
-            x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img,depth_img, self.intrinsics) # , mask_set)
+            x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img,depth_img, self.intrinsics)

            if (x, y, z) != (0.0, 0.0, 0.0):
                pose = Pose()