修正检测节点计算的位置误差,优化检测和计算消耗

This commit is contained in:
liangyuxuan
2025-09-15 11:03:10 +08:00
parent 7348a99a53
commit 7b194e0d72
5 changed files with 144 additions and 103 deletions

View File

@@ -8,8 +8,6 @@ import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
# from concurrent.futures import ProcessPoolExecutor
import torch
from detect_part.ultralytics import YOLO
@@ -24,29 +22,22 @@ from interfaces.msg import PoseWithID, PoseWithIDArray
import detect_part
# def calculate_pose_for_mask(args):
# mask, orig_shape, rgb_img, depth_img, class_id, label, camera_size, K = args
# intrinsics = o3d.camera.PinholeCameraIntrinsic(
# camera_size[0],
# camera_size[1],
# K[0],
# K[4],
# K[2],
# K[5]
# )
# x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, intrinsics)
# return class_id, label, x, y, z, roll, pitch, yaw
def get_map(K, D, camera_size):
h, w = camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def pca(data, sort=True):
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
# 计算特征值和特征向量
# cov_matrix = np.dot(centered_points.T,centered_points)
# eigenvectors, eigenvalues, eigenvectors_T = np.linalg.svd(cov_matrix)
try:
cov_matrix = np.cov(centered_points.T) # 注意要转置
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
@@ -58,29 +49,26 @@ def pca(data, sort=True):
return eigenvalues, eigenvectors
def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray, intrinsics):
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):
"""计算位态"""
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16)) # 转成毫米整数
color_o3d = o3d.geometry.Image(rgb_img.astype(np.uint8))
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
rgbd_img = o3d.geometry.RGBDImage.create_from_color_and_depth(
color=color_o3d, depth=depth_o3d,
depth_scale=1000.0, # 深度单位换算
depth_trunc=3.0,
convert_rgb_to_intensity=False
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=1000.0,
depth_trunc=5.0,
)
point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_img, intrinsics)
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
clean_pcd, ind = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
if clean_pcd.points == 0:
if len(clean_pcd.points) == 0:
return 0.0, 0.0, 0.0, None, None, None
center = clean_pcd.get_center()
x, y, z = center
@@ -168,10 +156,29 @@ def distortion_correction(color_image, depth_image, map1, map2):
return undistorted_color, undistorted_depth
def crop_mask_bbox(depth_img, mask, box):
"""
输入:
depth_img: H x W
mask: H x W (0/1 或 bool)
输出:
depth_crop, mask_crop
"""
width, high = depth_img.shape
x_center, y_center, w, h = box[:4]
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
depth_crop = depth_img[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
mask_crop = mask[max(0, y_min):min(y_max, high), max(0, x_min):min(x_max, width)]
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
class DetectNode(Node):
def __init__(self, name, mode):
super().__init__(name)
self.device = None
self.checkpoint_path = None
self.checkpoint_name = None
self.function = None
@@ -181,8 +188,10 @@ class DetectNode(Node):
self.map1 = self.map2 = None
self.intrinsics = None
self.mode = mode
self.device = None
self.calculate_function = None
self.fx = self.fy = 0.5
self.camera_size = []
self.cv_bridge = CvBridge()
'''init'''
@@ -196,6 +205,13 @@ class DetectNode(Node):
self.function = None
self.get_logger().error('Error: Mode Unknown')
if self.device == 'cpu':
self.calculate_function = calculate_pose_cpu
elif self.device == 'gpu':
raise NotImplementedError('Function: calculate_pose_gpu not implemented')
else:
raise ValueError(f"device must be cpu or gpu, now {self.device}")
self._init_publisher()
self._init_subscriber()
@@ -213,7 +229,7 @@ class DetectNode(Node):
self.declare_parameter('output_masks', False)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('set_confidence', 0.5)
self.declare_parameter('set_confidence', 0.6)
self.set_confidence = self.get_parameter('set_confidence').value
self.declare_parameter('color_image_topic', '/camera/color/image_raw')
@@ -225,11 +241,18 @@ class DetectNode(Node):
self.declare_parameter('camera_info_topic', '/camera/color/camera_info')
self.camera_info_topic = self.get_parameter('camera_info_topic').value
self.declare_parameter('device', 'cpu')
self.device = self.get_parameter('device').value
self.declare_parameter('width', 1280)
self.declare_parameter('high', 720)
self.expect_size = [self.get_parameter('width').value, self.get_parameter('high').value]
def _init_model(self):
"""init model"""
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(self.checkpoint_path).to(self.device)
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
@@ -273,18 +296,18 @@ class DetectNode(Node):
self.K = msg.k
self.D = msg.d
self.camera_size.append(msg.width)
self.camera_size.append(msg.height)
self.camera_size = [msg.width, msg.height]
if self.K is not None and self.D is not None:
self.map1, self.map2, self.K = self._get_map(msg.k)
self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
self.scale = [self.expect_size[0] / self.camera_size[0], self.expect_size[1] / self.camera_size[1]]
self.intrinsics = o3d.camera.PinholeCameraIntrinsic(
self.camera_size[0],
self.camera_size[1],
self.K[0],
self.K[4],
self.K[2],
self.K[5]
self.expect_size[0],
self.expect_size[1],
self.K[0] * self.scale[0],
self.K[4] * self.scale[1],
self.K[2] * self.scale[0],
self.K[5] * self.scale[1]
)
if len(self.D) != 0:
@@ -295,19 +318,15 @@ class DetectNode(Node):
else:
raise "K and D are not defined"
def _get_map(self, K):
h, w = self.camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(self.D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def _sync_callback(self, color_img_ros, depth_img_ros):
"""同步回调函数"""
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
if self.camera_size != self.expect_size:
color_img_cv = cv2.resize(color_img_cv, self.expect_size)
depth_img_cv = cv2.resize(depth_img_cv, self.expect_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
img, pose_dict = self.function(color_img_cv, depth_img_cv)
@@ -338,9 +357,9 @@ class DetectNode(Node):
pose_dict = defaultdict(list)
'''Get Predict Results'''
# time1 = time.time()
time1 = time.time()
results = self.model(rgb_img)
# time2 = time.time()
time2 = time.time()
result = results[0]
'''Get masks'''
@@ -350,50 +369,52 @@ class DetectNode(Node):
orig_shape = result.masks.orig_shape
'''Get boxes'''
boxes = result.boxes.data.cpu().numpy()
confidences = result.boxes.conf.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
# time3 = time.time()
time3 = time.time()
# tasks = [
# (mask, orig_shape, rgb_img, depth_img, int(class_ids[i]), labels[class_ids[i]], self.camera_size, self.K)
# for i, (mask, conf) in enumerate(zip(masks, confidences))
# if conf >= self.set_confidence # 置信度低的不加入任务
# ]
# with ProcessPoolExecutor(max_workers=4) as executor:
# results = list(executor.map(calculate_pose_for_mask, tasks))
#
# for res in results:
# class_id, label, x, y, z, roll, pitch, yaw = res
# pose = Pose()
# pose.position = Point(x=x, y=y, z=z)
# pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
# pose_dict[(class_id, label)].append(pose)
for i, mask in enumerate(masks):
for i, (mask, box) in enumerate(zip(masks, boxes)):
'''Confidence Filter'''
if confidences[i] >= self.set_confidence:
x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, self.intrinsics)
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
if depth_crop is not None:
continue
mask_crop = cv2.resize(mask_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
depth_crop = cv2.resize(depth_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.expect_size[0] * self.fx),
int(self.expect_size[1] * self.fy),
self.K[0] * self.scale[0] * self.fx,
self.K[4] * self.scale[1] * self.fy,
(self.K[2] - x_min) * self.scale[0] * self.fx,
(self.K[5] - y_min) * self.scale[1] * self.fy
)
x, y, z, roll, pitch, yaw = self.calculate_function(mask_crop, depth_crop, intrinsics)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
# time4 = time.time()
time4 = time.time()
'''mask_img and box_img is or not output'''
if self.output_boxes and not self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
# time5 = time.time()
# self.get_logger().info(f'start')
# self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
# self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
# self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
# self.get_logger().info(f'{(time5 - time4)*1000} ms, draw box')
# self.get_logger().info(f'{(time5 - time1)*1000} ms, completing a picture entire process')
# self.get_logger().info(f'end')
time5 = time.time()
self.get_logger().info(f'start')
self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time5 - time4)*1000} ms, draw box')
self.get_logger().info(f'{(time5 - time1)*1000} ms, completing a picture entire process')
self.get_logger().info(f'end')
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif self.output_boxes and self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
@@ -431,7 +452,8 @@ class DetectNode(Node):
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
orig_shape = rgb_img_gray.shape
x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img,depth_img, self.intrinsics)
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
x, y, z, roll, pitch, yaw = self.calculate_function(mask, depth_img, self.intrinsics)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
@@ -457,6 +479,7 @@ def main(args=None):
node.destroy_node()
rclpy.shutdown()
def crossboard_detect(args=None):
rclpy.init(args=args)
node = DetectNode('detect', 'test')

View File

@@ -1,22 +1,4 @@
# Copyright 2023 Intel Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Launch realsense2_camera node."""
import os
import yaml
from launch import LaunchDescription
import launch_ros.actions
from launch_ros.actions import Node
from launch.actions import DeclareLaunchArgument, OpaqueFunction, LogInfo
from launch.substitutions import LaunchConfiguration
@@ -31,6 +13,9 @@ def generate_launch_description():
DeclareLaunchArgument('color_image_topic', default_value='/camera/camera/color/image_raw'),
DeclareLaunchArgument('depth_image_topic', default_value='/camera/camera/depth/image_rect_raw'),
DeclareLaunchArgument('camera_info_topic', default_value='/camera/camera/color/camera_info'),
DeclareLaunchArgument('width', default_value='1280'),
DeclareLaunchArgument('high', default_value='720'),
DeclareLaunchArgument('device', default_value='cpu')
]
def create_detect_node(context):
@@ -41,6 +26,9 @@ def generate_launch_description():
color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
width = LaunchConfiguration('width').perform(context)
high = LaunchConfiguration('high').perform(context)
device = LaunchConfiguration('device').perform(context)
return [
Node(
@@ -54,6 +42,9 @@ def generate_launch_description():
'color_image_topic': color_image_topic,
'depth_image_topic': depth_image_topic,
'camera_info_topic': camera_info_topic,
'width': int(width),
'high': int(high),
'device': device,
}]
)
]

View File

@@ -58,6 +58,9 @@ def generate_launch_description():
DeclareLaunchArgument('color_image_topic', default_value='/camera/color/image_raw'),
DeclareLaunchArgument('depth_image_topic', default_value='/camera/depth/image_raw'),
DeclareLaunchArgument('camera_info_topic', default_value='/camera/color/camera_info'),
DeclareLaunchArgument('width', default_value='1280'),
DeclareLaunchArgument('high', default_value='720'),
DeclareLaunchArgument('device', default_value='cpu')
]
args_camera = [
@@ -318,6 +321,9 @@ def generate_launch_description():
output_boxes = LaunchConfiguration('output_boxes').perform(context)
output_masks = LaunchConfiguration('output_masks').perform(context)
conf = LaunchConfiguration('set_confidence').perform(context)
width = LaunchConfiguration('width').perform(context)
high = LaunchConfiguration('high').perform(context)
device = LaunchConfiguration('device').perform(context)
return [
Node(
@@ -327,7 +333,10 @@ def generate_launch_description():
'checkpoint_name': checkpoint,
'output_boxes': output_boxes.lower() == 'true',
'output_masks': output_masks.lower() == 'true',
'set_confidence': float(conf)
'set_confidence': float(conf),
'width': int(width),
'high': int(high),
'device': device
}]
)
]

View File

@@ -54,10 +54,13 @@ def generate_launch_description():
DeclareLaunchArgument('checkpoint_name', default_value='yolo11s-seg.pt'),
DeclareLaunchArgument('output_boxes', default_value='True'),
DeclareLaunchArgument('output_masks', default_value='False'),
DeclareLaunchArgument('set_confidence', default_value='0.6'),
DeclareLaunchArgument('set_confidence', default_value='0.2'),
DeclareLaunchArgument('color_image_topic', default_value='/camera/color/image_raw'),
DeclareLaunchArgument('depth_image_topic', default_value='/camera/depth/image_raw'),
DeclareLaunchArgument('camera_info_topic', default_value='/camera/color/camera_info'),
DeclareLaunchArgument('width', default_value='1280'),
DeclareLaunchArgument('high', default_value='720'),
DeclareLaunchArgument('device', default_value='cpu')
]
args_camera = [
@@ -321,6 +324,9 @@ def generate_launch_description():
color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
width = LaunchConfiguration('width').perform(context)
high = LaunchConfiguration('high').perform(context)
device = LaunchConfiguration('device').perform(context)
return [
Node(
@@ -334,6 +340,9 @@ def generate_launch_description():
'color_image_topic': color_image_topic,
'depth_image_topic': depth_image_topic,
'camera_info_topic': camera_info_topic,
'width': int(width),
'high': int(high),
'device': device
}]
)
]

View File

@@ -150,6 +150,9 @@ def generate_launch_description():
DeclareLaunchArgument('color_image_topic', default_value='/camera/camera/color/image_raw'),
DeclareLaunchArgument('depth_image_topic', default_value='/camera/camera/depth/image_rect_raw'),
DeclareLaunchArgument('camera_info_topic', default_value='/camera/camera/color/camera_info'),
DeclareLaunchArgument('width', default_value='1280'),
DeclareLaunchArgument('high', default_value='720'),
DeclareLaunchArgument('device', default_value='cpu')
]
def create_detect_node(context):
@@ -160,6 +163,9 @@ def generate_launch_description():
color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
width = LaunchConfiguration('width').perform(context)
high = LaunchConfiguration('high').perform(context)
device = LaunchConfiguration('device').perform(context)
return [
Node(
@@ -173,6 +179,9 @@ def generate_launch_description():
'color_image_topic': color_image_topic,
'depth_image_topic': depth_image_topic,
'camera_info_topic': camera_info_topic,
'width': int(width),
'high': int(high),
'device': device
}]
)
]