1

2025-09-23 17:49:20 +08:00
parent b31e6954d0
commit e3d41a8cd7
3 changed files with 66 additions and 51 deletions
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10" />
+  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
 </project>
--- a/HiveCoreR0/src/robot_vision/detect_part/detect_part/crossboard_detect.py
+++ b/HiveCoreR0/src/robot_vision/detect_part/detect_part/crossboard_detect.py
@@ -65,11 +65,11 @@ def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):

    point_cloud = point_cloud.remove_non_finite_points()

-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
-    clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
+    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
+    clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=5.0)

    if len(clean_pcd.points) == 0:
-        return 0.0, 0.0, 0.0, None, None, None
+        return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    center = clean_pcd.get_center()
    x, y, z = center

@@ -90,22 +90,21 @@ def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):

        roll, pitch, yaw = tfs.euler.mat2euler(rmat)

-        # point = [
-        #     [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
-        #     [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        #     ]  # 画点：原点、第一主成分、第二主成分
-        # lines = [
-        #     [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
-        # ]  # 画出三点之间两两连线
-        # colors = [
-        #     [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
-        # ]
-        #
-        # # 构造open3d中的LineSet对象，用于主成分显示
-        # line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
-        # line_set.colors = o3d.utility.Vector3dVector(colors)
-        # o3d.visualization.draw_geometries([point_cloud, line_set])
-        # # o3d.visualization.draw_geometries([clean_pcd, line_set])
+        point = [
+            [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
+            [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
+            ]  # 画点：原点、第一主成分、第二主成分
+        lines = [
+            [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
+        ]  # 画出三点之间两两连线
+        colors = [
+            [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
+        ]
+        # 构造open3d中的LineSet对象，用于主成分显示
+        line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
+        line_set.colors = o3d.utility.Vector3dVector(colors)
+        o3d.visualization.draw_geometries([point_cloud, line_set])
+        o3d.visualization.draw_geometries([clean_pcd, line_set])

        return x, y, z, roll, pitch, yaw

@@ -156,7 +155,7 @@ def distortion_correction(color_image, depth_image, map1, map2):
    return undistorted_color, undistorted_depth


-def crop_mask_bbox(depth_img, mask, box):
+def crop_mask_bbox(depth_img, mask):
    """
    输入:
        depth_img: H x W
@@ -164,14 +163,14 @@ def crop_mask_bbox(depth_img, mask, box):
    输出:
        depth_crop, mask_crop
    """
-    width, high = depth_img.shape
-    x_center, y_center, w, h = box[:4]
+    high, width = depth_img.shape
+    ys, xs = np.where(mask > 0)

-    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
-    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
+    x_min, x_max = int(round(xs.min())), int(round(xs.max()))
+    y_min, y_max = int(round(ys.min())), int(round(ys.max()))

-    depth_crop = depth_img[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
-    mask_crop = mask[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
+    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
+    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]

    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))

@@ -281,21 +280,12 @@ class DetectNode(Node):
        self.camera_size = [msg.width, msg.height]

        if self.K is not None and self.D is not None:
-            self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
-            self.scale = [self.expect_size[0] / self.camera_size[0], self.expect_size[1] / self.camera_size[1]]
-            self.intrinsics = o3d.camera.PinholeCameraIntrinsic(
-                self.expect_size[0],
-                self.expect_size[1],
-                self.K[0] * self.scale[0],
-                self.K[4] * self.scale[1],
-                self.K[2] * self.scale[0],
-                self.K[5] * self.scale[1]
-            )
+            # self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)

            if len(self.D) != 0:
                self.destroy_subscription(self.sub_camera_info)
            else:
-                self.D = [0, 0, 0, 0, 0, 0, 0, 0]
+                self.D = [0, 0, 0, 0, 0]
                self.destroy_subscription(self.sub_camera_info)
        else:
            raise "K and D are not defined" 
@@ -305,11 +295,8 @@ class DetectNode(Node):
        """同步回调函数"""
        color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
        depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
-        if self.camera_size != self.expect_size:
-            color_img_cv = cv2.resize(color_img_cv, self.expect_size)
-            depth_img_cv = cv2.resize(depth_img_cv, self.expect_size)

-        color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
+        # color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)

        img, pose_dict = self.function(color_img_cv, depth_img_cv)

@@ -337,8 +324,8 @@ class DetectNode(Node):
    def _test_image(self, rgb_img, depth_img):
        pose_dict = defaultdict(list)
        rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
-        pattern_size = (11, 8)
-        # pattern_size = (15, 7)
+        # pattern_size = (11, 8)
+        pattern_size = (15, 7)
        ret, corners = cv2.findChessboardCorners(rgb_img_gray, pattern_size)
        if ret:
            # 角点亚像素精确化（提高标定精度）
@@ -361,12 +348,36 @@ class DetectNode(Node):
            rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
            orig_shape = rgb_img_gray.shape
            mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
-            x, y, z, roll, pitch, yaw = self.calculate_function(mask, depth_img, self.intrinsics)
+            depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask)
+
+            if depth_crop is None:
+                return None, None
+
+            intrinsics = o3d.camera.PinholeCameraIntrinsic(
+                int(self.camera_size[0]),
+                int(self.camera_size[1]),
+                self.K[0],
+                self.K[4],
+                self.K[2] - x_min,
+                self.K[5] - y_min
+            )
+
+            x, y, z, roll, pitch, yaw = self.calculate_function(mask_crop, depth_crop, intrinsics)
+
+            point = corners_subpix[3, 7]
+            z_p = depth_img[int(round(point[1])), int(round(point[0]))] / 1e3
+            x_p = (point[0] - self.K[2]) / self.K[0] * z_p
+            y_p = (point[1] - self.K[5]) / self.K[4] * z_p
+
+            self.get_logger().info(f"{x}, {y}, {z}")
+            self.get_logger().info(f"{x_p}, {y_p}, {z_p}")
+            self.get_logger().info(f"{x_p-x}, {y_p-y}, {z_p-z}")

            if (x, y, z) != (0.0, 0.0, 0.0):
                pose = Pose()
                pose.position = Point(x=x, y=y, z=z)
-                pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
+                quat = tfs.euler.euler2quat(roll, pitch, yaw)
+                pose.orientation = Quaternion(w=quat[0], x=quat[1], y=quat[2], z=quat[3])
                pose_dict[int(99), 'crossboard'].append(pose)

            cv2.putText(rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}', (0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
--- a/HiveCoreR0/src/robot_vision/detect_part/detect_part/detect.py
+++ b/HiveCoreR0/src/robot_vision/detect_part/detect_part/detect.py
@@ -69,8 +69,8 @@ def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):

    point_cloud = point_cloud.remove_non_finite_points()

-    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
-    clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
+    down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
+    clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=5.0)

    if len(clean_pcd.points) == 0:
        return 0.0, 0.0, 0.0, None, None, None
@@ -152,14 +152,14 @@ def crop_mask_bbox(depth_img, mask, box):
    输出:
        depth_crop, mask_crop
    """
-    width, high = depth_img.shape
+    high, width = depth_img.shape
    x_center, y_center, w, h = box[:4]

    x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
    y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))

-    depth_crop = depth_img[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
-    mask_crop = mask[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
+    depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
+    mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]

    return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))

@@ -379,7 +379,8 @@ class DetectNode(Node):
                if (x, y, z) != (0.0, 0.0, 0.0):
                    pose = Pose()
                    pose.position = Point(x=x, y=y, z=z)
-                    pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
+                    quat = tfs.euler.euler2quat(roll, pitch, yaw)
+                    pose.orientation = Quaternion(w=quat[0], x=quat[1], y=quat[2], z=quat[3])
                    pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)

        time4 = time.time()