修正检测节点计算的位置误差

This commit is contained in:
liangyuxuan
2025-09-11 18:12:37 +08:00
parent ef16f02f0f
commit 7348a99a53

View File

@@ -1,5 +1,6 @@
import os
from collections import defaultdict
import time
import cv2
import open3d as o3d
@@ -7,6 +8,8 @@ import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
# from concurrent.futures import ProcessPoolExecutor
import torch
from detect_part.ultralytics import YOLO
@@ -21,18 +24,32 @@ from interfaces.msg import PoseWithID, PoseWithIDArray
import detect_part
# def calculate_pose_for_mask(args):
# mask, orig_shape, rgb_img, depth_img, class_id, label, camera_size, K = args
# intrinsics = o3d.camera.PinholeCameraIntrinsic(
# camera_size[0],
# camera_size[1],
# K[0],
# K[4],
# K[2],
# K[5]
# )
# x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, intrinsics)
# return class_id, label, x, y, z, roll, pitch, yaw
def pca(data, sort=True):
# center = data.get_center() # 求 NX3 向量的均值
data_np = np.asarray(data.points)
center = np.mean(data_np, axis=0)
centered_points = data_np - center # 去中心化
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
# 计算特征值和特征向量
# cov_matrix = np.dot(centered_points.T,centered_points)
# eigenvectors, eigenvalues, eigenvectors_T = np.linalg.svd(cov_matrix)
cov_matrix = np.cov(centered_points.T) # 注意要转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
try:
cov_matrix = np.cov(centered_points.T) # 注意要转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
@@ -42,18 +59,14 @@ def pca(data, sort=True):
return eigenvalues, eigenvectors
def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray, intrinsics): # , mask_set):
def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray, intrinsics):
"""计算位态"""
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
rgb_img_mask = np.zeros_like(rgb_img)
rgb_img_mask[mask > 0] = rgb_img[mask > 0]
# rgb_img_mask[mask_set > 0] = rgb_img_mask[mask_set > 0] + np.array([0, 0, 255]) * 0.5
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16)) # 转成毫米整数
color_o3d = o3d.geometry.Image(rgb_img_mask.astype(np.uint8))
color_o3d = o3d.geometry.Image(rgb_img.astype(np.uint8))
rgbd_img = o3d.geometry.RGBDImage.create_from_color_and_depth(
color=color_o3d, depth=depth_o3d,
@@ -65,38 +78,51 @@ def calculate_pose(mask, orig_shape, rgb_img: np.ndarray, depth_img: np.ndarray,
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
clean_pcd, ind = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
center = point_cloud.get_center()
if clean_pcd.points == 0:
return 0.0, 0.0, 0.0, None, None, None
center = clean_pcd.get_center()
x, y, z = center
w, v = pca(clean_pcd)
vx, vy, vz = v[:,0], v[:,1], v[:,2]
w, v = pca(np.asarray(clean_pcd.points))
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
if w is not None and v is not None:
vx, vy, vz = v[:,0], v[:,1], v[:,2]
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
roll, pitch, yaw = tfs.euler.mat2euler(rmat)
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
# point = [[x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]] # 画点:原点、第一主成分、第二主成分
# lines = [[0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]] # 画出三点之间两两连线
# colors = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]
roll, pitch, yaw = tfs.euler.mat2euler(rmat)
# # 构造open3d中的LineSet对象用于主成分显示
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
# o3d.visualization.draw_geometries([point_cloud, line_set])
# o3d.visualization.draw_geometries([clean_pcd, line_set])
# point = [
# [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
#
# # 构造open3d中的LineSet对象用于主成分显示
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
# o3d.visualization.draw_geometries([point_cloud, line_set])
# # o3d.visualization.draw_geometries([clean_pcd, line_set])
return x, y, z, roll, pitch, yaw
return x, y, z, roll, pitch, yaw
else:
return 0.0, 0.0, 0.0, None, None, None
def draw_box(set_confidence, rgb_img, result):
@@ -244,7 +270,6 @@ class DetectNode(Node):
def _camera_info_callback(self, msg: CameraInfo):
"""Get camera info"""
camera_size = [msg.width, msg.height]
self.K = msg.k
self.D = msg.d
@@ -254,8 +279,8 @@ class DetectNode(Node):
if self.K is not None and self.D is not None:
self.map1, self.map2, self.K = self._get_map(msg.k)
self.intrinsics = o3d.camera.PinholeCameraIntrinsic(
camera_size[0],
camera_size[1],
self.camera_size[0],
self.camera_size[1],
self.K[0],
self.K[4],
self.K[2],
@@ -313,7 +338,9 @@ class DetectNode(Node):
pose_dict = defaultdict(list)
'''Get Predict Results'''
# time1 = time.time()
results = self.model(rgb_img)
# time2 = time.time()
result = results[0]
'''Get masks'''
@@ -327,21 +354,46 @@ class DetectNode(Node):
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
# time3 = time.time()
# tasks = [
# (mask, orig_shape, rgb_img, depth_img, int(class_ids[i]), labels[class_ids[i]], self.camera_size, self.K)
# for i, (mask, conf) in enumerate(zip(masks, confidences))
# if conf >= self.set_confidence # 置信度低的不加入任务
# ]
# with ProcessPoolExecutor(max_workers=4) as executor:
# results = list(executor.map(calculate_pose_for_mask, tasks))
#
# for res in results:
# class_id, label, x, y, z, roll, pitch, yaw = res
# pose = Pose()
# pose.position = Point(x=x, y=y, z=z)
# pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
# pose_dict[(class_id, label)].append(pose)
for i, mask in enumerate(masks):
'''Confidence Filter'''
if confidences[i] >= self.set_confidence:
x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img, depth_img, self.intrinsics)
cv2.circle(rgb_img, (int(x), int(y)), 5, (0, 0, 255), 2)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(x=roll, y=pitch, z=yaw)
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
# time4 = time.time()
'''mask_img and box_img is or not output'''
if self.output_boxes and not self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
# time5 = time.time()
# self.get_logger().info(f'start')
# self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
# self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
# self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
# self.get_logger().info(f'{(time5 - time4)*1000} ms, draw box')
# self.get_logger().info(f'{(time5 - time1)*1000} ms, completing a picture entire process')
# self.get_logger().info(f'end')
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif self.output_boxes and self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
@@ -377,18 +429,9 @@ class DetectNode(Node):
], dtype=np.int32)
cv2.fillConvexPoly(mask, pts, 1)
# mask_set = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
# pts_set = np.array([
# corners_subpix[0, 0],
# corners_subpix[0, 1],
# corners_subpix[1, 1],
# corners_subpix[1, 0]
# ], dtype=np.int32)
# cv2.fillConvexPoly(mask_set, pts_set, 1)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
orig_shape = rgb_img_gray.shape
x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img,depth_img, self.intrinsics) # , mask_set)
x, y, z, roll, pitch, yaw = calculate_pose(mask, orig_shape, rgb_img,depth_img, self.intrinsics)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()