添加抓取位置微调、检测图片和原图片保存

This commit is contained in:
liangyuxuan
2026-01-15 17:39:45 +08:00
parent a59776e6c2
commit 039bb2d165
13 changed files with 335 additions and 144 deletions

View File

@@ -13,6 +13,7 @@
"1200": "The number of points is insufficient to compute an OBB",
"1201": "PCA output vector is None",
"1202": "This pose cannot be grab, and position refine fail",
"1300": "E2E model input data 'coors' are fewer than 128",
"1301": "E2E model input data 'point_clouds' are fewer than 128",

View File

@@ -1,5 +1,5 @@
{
"node_name": "bottle_detect_service",
"node_name": "medical_sense_service",
"output_boxes": "True",
"output_masks": "False",
@@ -17,11 +17,11 @@
"detect_mode": "Detect",
"Detect_configs": {
"checkpoint_path": "checkpoints/medical_sense-seg.pt",
"confidence": 0.50,
"confidence": 0.75,
"classes": []
},
"calculate_mode": "E2E",
"calculate_mode": "PCA",
"PCA_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,

View File

@@ -1,32 +1,47 @@
import os
from glob import glob
from setuptools import find_packages, setup
package_name = 'vision_detect'
openvino_files = []
for model_dir in glob("checkpoints/*_openvino_model"):
model_name = os.path.basename(model_dir)
model_files = glob(os.path.join(model_dir, '*'))
openvino_files.append(
('share/' + package_name + '/checkpoints/' + model_name, model_files)
)
data_files = [
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
('share/' + package_name, ['package.xml']),
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
('share/' + package_name + '/configs', glob('configs/*.json')),
('share/' + package_name + '/configs/flexiv_configs',
glob('configs/flexiv_configs/*.json')),
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
('share/' + package_name + '/configs/launch_configs',
glob('configs/launch_configs/*.json')),
('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
]
data_files.extend(openvino_files)
setup(
name=package_name,
version='0.0.0',
packages=find_packages(exclude=['test']),
data_files=[
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
('share/' + package_name, ['package.xml']),
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
('share/' + package_name + '/configs', glob('configs/*.json')),
('share/' + package_name + '/configs/flexiv_configs', glob('configs/flexiv_configs/*.json')),
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
('share/' + package_name + '/configs/launch_configs', glob('configs/launch_configs/*.json')),
('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
],
data_files=data_files,
install_requires=['setuptools'],
zip_safe=True,
include_package_data=True,

View File

@@ -0,0 +1 @@
from .gsnet import *

View File

@@ -0,0 +1,7 @@
from .models.graspnet import *
from .utils.collision_detector import *
__all__ = [
"GraspNet", "pred_decode", "ModelFreeCollisionDetector"
]

View File

@@ -15,7 +15,7 @@ from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
from ..utils import distortion_correction, crop_imgs_box_xywh, draw_box, draw_mask, rmat2quat, \
crop_imgs_mask, get_map, create_o3d_pcd, save_img
crop_imgs_mask, get_map, create_o3d_pcd, save_img, refine_grasp_pose
from .init_node import InitBase
@@ -105,23 +105,24 @@ class DetectNode(InitBase):
response.success = False
response.info = "Camera data have not objects"
response.objects = []
print("=========================== < end > ===========================")
return response
response.success = False
response.info = f"Name is wrong: {request.camera_position}"
response.objects = []
print("=========================== < end > ===========================")
return response
if request.camera_position == 'left':
self.hand_eye_mat = self.eye_in_left_hand_mat
self.p = "left"
elif request.camera_position == 'right':
self.hand_eye_mat = self.eye_in_right_hand_mat
self.p = "right"
else:
self.hand_eye_mat = self.eye_to_hand_mat
self.p = "head"
self.camera_size = [color_img_ros.width, color_img_ros.height]
@@ -157,7 +158,6 @@ class DetectNode(InitBase):
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
print("=========================== < end > ===========================")
return response
@@ -167,7 +167,7 @@ class DetectNode(InitBase):
home = os.path.expanduser("~")
save_dir = os.path.join(home, "images")
save_img(rgb_img, "orign_image.png", save_dir=save_dir, mark_cur_time=True)
save_img(rgb_img.copy(), "orign_image.png", save_dir=save_dir, mark_cur_time=True)
# Get Predict Results
time1 = time.time()
@@ -178,7 +178,6 @@ class DetectNode(InitBase):
# Get masks
if result.masks is None or len(result.masks) == 0:
return None, 1000, False
masks = result.masks.data.cpu().numpy()
# Get boxes
@@ -196,6 +195,10 @@ class DetectNode(InitBase):
self.get_logger().info(f"Detect object num: {len(masks)}")
full_points = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
)
if self.calculate_mode == "E2E" and self.detect_mode == 'Detect' and E2E_SIGN:
self.calculate_configs["orign_point_clouds"] = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
@@ -223,6 +226,19 @@ class DetectNode(InitBase):
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
time_c = time.time()
if self.p == "left" or self.p == "right":
position = rmat[0:3, 3]
rmat, sign = refine_grasp_pose(
full_points, self.calculate_configs.get("voxel_size"), position
)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
time_e = time.time()
print(f"Refine: {(time_e - time_c) * 1000} ms")
self.get_logger().info(f"grab_width: {grab_width}")
# rmat = self.hand_eye_mat @ rmat
x, y, z, rw, rx, ry, rz = rmat2quat(self.hand_eye_mat @ rmat)
@@ -253,7 +269,7 @@ class DetectNode(InitBase):
if not self.output_boxes and not self.output_masks:
return None, pose_list, True
if self.output_boxes:
draw_box(rgb_img, result, save_dir=save_dir, mark_time = False)
draw_box(rgb_img, result, save_dir=save_dir, mark_time = True)
if self.output_masks:
draw_mask(rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True

View File

@@ -18,7 +18,7 @@ from .config_node import ConfigBase
E2E_SIGN = True
try:
from ..net.gsnet.models.graspnet import GraspNet
from ..net import GraspNet
from ..utils import calculate_pose_e2e
except (ImportError, ModuleNotFoundError):
E2E_SIGN = False

View File

@@ -3,3 +3,4 @@ from .draw_tools import *
from .calculate_tools import *
from .file_tools import *
from .pointclouds_tools import *
from .grasp_refine import *

View File

@@ -14,9 +14,7 @@ try:
import torch
import MinkowskiEngine as ME
import collections.abc as container_abcs
from ..net.gsnet.models.graspnet import pred_decode
from ..net.gsnet.utils.collision_detector import ModelFreeCollisionDetector
from ..net import pred_decode, ModelFreeCollisionDetector
except (ImportError, OSError, RuntimeError):
logging.warning("ImportError or OSError or RuntimeError")
E2E_SIGN = False
@@ -64,8 +62,14 @@ def calculate_pose_pca(
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool
**kwargs:
output:
rmat: np.ndarray (4, 4)
grab_width: list
sign: bool
"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -186,72 +190,6 @@ def calculate_pose_icp(
return rmat, grab_width, True
def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
"""点云去噪"""
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
point_num = len(down_pcd.points)
# 半径滤波
clean_pcd, _ = down_pcd.remove_radius_outlier(
nb_points=max(int(round(10 * voxel_size / 0.005)), 3),
radius=voxel_size * 10
)
# 统计滤波
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
nb_neighbors=max(int(round(10 * voxel_size / 0.005)), 3),
std_ratio=2.0
)
# 过滤过近的点
points = np.asarray(clean_pcd.points)
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
# # 使用数量最大簇判定噪声强度
# _, counts = np.unique(labels[labels >= 0], return_counts=True)
# largest_cluster_ratio = counts.max() / len(labels)
# if largest_cluster_ratio < 0.5:
# return None
labels = np.array(
clean_pcd.cluster_dbscan(
eps=voxel_size * 10,
min_points=max(int(round(10 * voxel_size / 0.005)), 3)
)
)
if len(labels[labels >= 0]) == 0:
return clean_pcd, True
# 使用距离最近簇作为物体
points = np.asarray(clean_pcd.points)
cluster_label = set(labels)
point_cloud_clusters = []
for label in cluster_label:
if label == -1:
continue
idx = np.where(labels == label)[0]
point_cloud_cluster = clean_pcd.select_by_index(idx)
points_cluster_z = points[idx, 2]
z_avg = np.mean(points_cluster_z)
if z_avg < 0.2:
continue
point_cloud_clusters.append((point_cloud_cluster, z_avg))
if len(point_cloud_clusters) == 0:
return clean_pcd
point_cloud_clusters.sort(key=lambda x: x[1])
clean_pcd = point_cloud_clusters[0][0]
# 使用最近簇判断噪音强度
largest_cluster_ratio = len(clean_pcd.points) / point_num
if largest_cluster_ratio < 0.05:
return 1100, False
return clean_pcd, True
if E2E_SIGN:
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
@@ -266,27 +204,6 @@ if E2E_SIGN:
"quantize2original": quantize2original,
"point_clouds": torch.stack([torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
}
# [{
# 'point_clouds': points.astype(np.float32),
# 'coors': points.astype(np.float32) / kwargs.get("voxel_size", 0.002),
# 'feats': np.ones_like(points).astype(np.float32),
# }]
# def collate_fn_(batch):
# if type(batch[0]).__module__ == 'numpy':
# return torch.stack([torch.from_numpy(b) for b in batch], 0)
# elif isinstance(batch[0], container_abcs.Sequence):
# return [[torch.from_numpy(sample) for sample in b] for b in batch]
# elif isinstance(batch[0], container_abcs.Mapping):
# for key in batch[0]:
# if key == 'coors' or key == 'feats':
# continue
# res[key] = collate_fn_([d[key] for d in batch])
# return res
#
# res = collate_fn_(list_data)
return res
@@ -297,7 +214,22 @@ if E2E_SIGN:
calculate_grab_width: bool = False,
**kwargs
):
"""点云主成分分析法计算位态"""
"""
点云抓取姿态预测模型计算位态
-----
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool (abandon)
**kwargs:
"depth_scale": float
"depth_trunc": float
"voxel_size": float
"model":
"collision_thresh": float
"""
# logging.error("stage 1")
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -355,15 +287,15 @@ if E2E_SIGN:
if len(point_cloud.points) == 0:
return 1101, [0.0, 0.0, 0.0], False
# 点云补齐15000个点
points = np.asarray(point_cloud.points)
if len(points) >= 15000:
idxs = np.random.choice(len(points), 15000, replace=False)
else:
idxs1 = np.arange(len(points))
idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
points = points[idxs]
# # 点云补齐15000个点
# points = np.asarray(point_cloud.points)
# if len(points) >= 15000:
# idxs = np.random.choice(len(points), 15000, replace=False)
# else:
# idxs1 = np.arange(len(points))
# idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
# idxs = np.concatenate([idxs1, idxs2], axis=0)
# points = points[idxs]
# 构建推理数据结构
ret_dict = {

View File

@@ -0,0 +1,217 @@
import numpy as np
import transforms3d as tfs
# import open3d as o3d
__all__ = ["refine_grasp_pose"]
def collision_detector(
points: np.ndarray,
refine: np.ndarray,
volume: list[float],
iou: float = 0.001,
**kwargs
) -> int:
"""
collision detector
-----
input:
points: np.ndarray (3, n)
refine: np.ndarray (3, ), Grab target poes coordinate system
volume: list [left, right]
iou : float
**kwargs:
"grab_width": float
"hand_size": list [width, height, length]
"left_size": list [thick, width, length]
"right_size": list [thick, width, length]
thick of gripper finger, width of gripper finger, length of gripper finger
output:
collision_code: int
"""
hand_size = kwargs.get('hand_size', [0.090, 0.063, 0.13])
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
grab_width = kwargs.get('grab_width', 0.10) * 0.95
x, y, z = refine
hand_top_box = (
(points[2] < z) & (points[2] > (z - hand_size[2]))
& (points[0] < x) & (points[0] > (x - hand_size[1]/2))
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
)
hand_bottom_box = (
(points[2] < z) & (points[2] > (z - hand_size[2]))
& (points[0] < (x + hand_size[1]/2)) & (points[0] > x)
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
)
left_finger_box = (
(points[2] < (z + left_size[2])) & (points[2] > z)
& (points[0] < (x + left_size[1]/2)) & (points[0] > (x - left_size[1]/2))
& (points[1] < (y + grab_width/2 + left_size[0])) & (points[1] > (y + grab_width/2))
)
right_finger_box = (
(points[2] < (z + right_size[2])) & (points[2] > z)
& (points[0] < (x + right_size[1]/2)) & (points[0] > (x - right_size[1]/2))
& (points[1] < (y - grab_width/2)) & (points[1] > (y-(grab_width/2 + right_size[0])))
)
if hand_top_box.any() and hand_bottom_box.any():
return 1
if hand_bottom_box.any():
return 2
if hand_top_box.any():
return 3
left_iou = left_finger_box.sum() / (volume[0]+1e-6)
right_iou = right_finger_box.sum() / (volume[1]+1e-6)
if left_iou > iou and right_iou > iou:
return 6
if left_iou > iou :
return 4
if right_iou > iou :
return 5
return 0
def refine_grasp_pose(
points: np.ndarray,
voxel_size: float,
target_position: np.ndarray,
expect_orientation: np.ndarray | None = None,
**kwargs
):
"""
refine grasp pose
-----
input:
points: np.ndarray (n, 3), already complete voxel downsample
voxel_size: float
target_position: np.ndarray (3, ), grab target position
expect_orientation: np.ndarray (3, 3), expect grab target orientation
**kwargs:
"grab_width": float
"hand_size": list [width, height, length]
"left_size": list [thick, width, length]
"right_size": list [thick, width, length]
thick of gripper finger, width of gripper finger, length of gripper finger
output:
"""
if expect_orientation is None:
expect_orientation = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
refine = np.zeros(3)
grab_width = kwargs.get('grab_width', 0.10)
hand_size = kwargs.get('hand_size', [0.090, 0.063, 0.13])
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
left_volume = left_size[0] * left_size[1] * left_size[2] / (voxel_size**3)
right_volume = right_size[0] * right_size[1] * right_size[2] / (voxel_size**3)
points = points - target_position[None, :] # (n, 3) - (1, 3) = (n, 3)
points = expect_orientation.T @ points.T # (3, 3) @ (3, n) = (3, n)
points = points[:,
(points[2] < left_size[2]) & (points[2] > -(0.1 + hand_size[2]))
& (points[0] < (0.05 + hand_size[1]/2)) & (points[0] > -(0.05 + hand_size[1]/2))
& (points[1] < (0.05 + grab_width/2 + left_size[0]))
& (points[1] > -(0.05 + grab_width/2 + right_size[0]))
]
frist_sign = False
y_n = y_p = False
hand_num = left_num = right_num = 0
while hand_num < 20 and left_num < 10 and right_num < 10:
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], **kwargs
)
if collision_code == 0:
if y_p and not y_n:
refine[1] += 0.005
y_p = y_n = False
continue
if not y_p and y_n:
refine[1] -= 0.005
y_p = y_n = False
continue
position = target_position + (expect_orientation @ refine.T).T
rmat = tfs.affines.compose(np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1])
return rmat, True
if collision_code == 6:
return 1202, False
if collision_code == 1:
refine[2] -= 0.01
hand_num += 1
print("z + 0.01")
continue
elif collision_code == 2:
if not frist_sign:
print("z + 0.005")
refine[2] -= 0.005
frist_sign = True
continue
refine[0] -= 0.005
hand_num += 1
print("x - 0.005")
continue
elif collision_code == 3:
if not frist_sign:
print("z + 0.005")
refine[2] -= 0.005
frist_sign = True
continue
refine[0] += 0.005
hand_num += 1
print("x + 0.005")
continue
elif collision_code == 4:
y_p = True
y_n = False
refine[1] += 0.005
left_num += 1
print("y + 0.005")
continue
elif collision_code == 5:
y_p = False
y_n = True
refine[1] -= 0.005
right_num += 1
print("y - 0.005")
continue
return 1202, False
# def draw(pcd, R, T):
# point = T
# x, y, z = R[:, 0], R[:, 1], R[:, 2]
#
# points = [
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
# point, point + x, point + y, point + z
#
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3],
# [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1],
# [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
#
# o3d.visualization.draw_geometries([pcd, line_set])

View File

@@ -177,8 +177,8 @@ def save_img(img: np.ndarray, save_name, save_dir: str | None = None, mark_cur_t
save_path = os.path.join(home_path, save_name)
if mark_cur_time:
cur_time = time.time() * 1000
path, ext = os.path.splitext(save_name)
cur_time = int(time.time() * 1000)
path, ext = os.path.splitext(save_path)
save_path = path + f'_{cur_time}' + ext
cv2.imwrite(save_path, img)

View File

@@ -104,16 +104,17 @@ def create_o3d_pcd(depth_img, camera_size, k, **kwargs):
orign_point_clouds = orign_point_clouds.voxel_down_sample(
kwargs.get("voxel_size", 0.01)
)
orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
radius=kwargs.get("voxel_size", 0.01) * 10
)
# 统计滤波
orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
nb_neighbors=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
std_ratio=2.0
)
# # 半径滤波
# orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
# nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
# radius=kwargs.get("voxel_size", 0.01) * 10
# )
#
# # 统计滤波
# orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
# nb_neighbors=int(round(20 * kwargs.get("voxel_size", 0.01) / 0.005)),
# std_ratio=2.0
# )
orign_points = np.asarray(orign_point_clouds.points)
return orign_points