Merge branch 'feature' into feature_cpp_test

2026-01-19 14:09:29 +08:00
parent 1c9af20786 611b529075
commit 52c10dcb3c
18 changed files with 439 additions and 157 deletions
--- a/calibration_mat/eye_in_right_hand.json
+++ b/calibration_mat/eye_in_right_hand.json
@@ -1,22 +1,22 @@
 {
    "T": [
        [
-            0.013635791720580838,
-            0.9992765703636767,
-            -0.03550212819481636,
-            -0.08645650535173793
+            0.02515190926712163,
+            0.9984434279780899,
+            -0.049780544267610596,
+            -0.08312977955463981
        ],
        [
-            -0.9998865276218899,
-            0.013399556063222661,
-            -0.006883587549256321,
-            0.036196137264974
+            -0.9996337211449028,
+            0.025617085950903107,
+            0.008728599966714646,
+            0.059044674332170574
        ],
        [
-            -0.006402895000908794,
-            0.03559196285001416,
-            0.999345893630474,
-            0.014407883180676354
+            0.00999024575340213,
+            0.04954276975245833,
+            0.9987220378839358,
+            0.017378234075134728
        ],
        [
            0.0,
--- a/tools/pt2vino.py
+++ b/tools/pt2vino.py
@@ -0,0 +1,24 @@
+import argparse
+
+from ultralytics import YOLO
+
+
+def main(checkpoint_path):
+    model = YOLO(checkpoint_path)
+    model.export(
+        format="openvino",
+        imgsz=(1280, 720),
+        dynamic=True,
+        simplify=True,
+        half=True,
+        workspace=0.8,
+        batch=1,
+        device="cpu"
+    )
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_path", type=str)
+    args = parser.parse_args()
+    main(args.checkpoint_path)
+    # main(checkpoint_path="checkpoint/medical_sense-seg.pt")
--- a/vision_detect/checkpoints/hivecorebox-seg.onnx
+++ b/vision_detect/checkpoints/hivecorebox-seg.onnx
--- a/vision_detect/checkpoints/medical_sense-seg.pt
+++ b/vision_detect/checkpoints/medical_sense-seg.pt
--- a/vision_detect/checkpoints/yolo11n-seg.onnx
+++ b/vision_detect/checkpoints/yolo11n-seg.onnx
--- a/vision_detect/checkpoints/yolo11s-seg.onnx
+++ b/vision_detect/checkpoints/yolo11s-seg.onnx
--- a/vision_detect/configs/error_configs/report_logging_define.json
+++ b/vision_detect/configs/error_configs/report_logging_define.json
@@ -13,6 +13,7 @@

    "1200": "The number of points is insufficient to compute an OBB",
    "1201": "PCA output vector is None",
+    "1202": "This pose cannot be grab, and position refine fail",

    "1300": "E2E model input data 'coors' are fewer than 128",
    "1301": "E2E model input data 'point_clouds' are fewer than 128",
--- a/vision_detect/configs/launch_configs/medical_sense.json
+++ b/vision_detect/configs/launch_configs/medical_sense.json
@@ -1,5 +1,5 @@
 {
-    "node_name": "bottle_detect_service",
+    "node_name": "medical_sense_service",
    "output_boxes": "True",
    "output_masks": "False",

@@ -17,11 +17,11 @@
    "detect_mode": "Detect",
    "Detect_configs": {
        "checkpoint_path": "checkpoints/medical_sense-seg.pt",
-        "confidence": 0.50,
+        "confidence": 0.75,
        "classes": []
    },

-    "calculate_mode": "E2E",
+    "calculate_mode": "PCA",
    "PCA_configs": {
        "depth_scale": 1000.0,
        "depth_trunc": 3.0,
--- a/vision_detect/setup.py
+++ b/vision_detect/setup.py
@@ -1,32 +1,47 @@
+import os
 from glob import glob
 from setuptools import find_packages, setup

 package_name = 'vision_detect'

+openvino_files = []
+for model_dir in glob("checkpoints/*_openvino_model"):
+    model_name = os.path.basename(model_dir)
+    model_files = glob(os.path.join(model_dir, '*'))
+    openvino_files.append(
+        ('share/' + package_name + '/checkpoints/' + model_name, model_files)
+    )
+
+data_files = [
+    ('share/ament_index/resource_index/packages', ['resource/' + package_name]),
+    ('share/' + package_name, ['package.xml']),
+    ('share/' + package_name + '/launch', glob('launch/*.launch.py')),
+    ('share/' + package_name + '/configs', glob('configs/*.json')),
+    ('share/' + package_name + '/configs/flexiv_configs',
+     glob('configs/flexiv_configs/*.json')),
+    ('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
+    ('share/' + package_name + '/configs/launch_configs',
+     glob('configs/launch_configs/*.json')),
+    ('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
+
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
+    ('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
+    ('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
+
+    ('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
+     glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
+    ('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
+     glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
+    ]
+
+data_files.extend(openvino_files)
+
 setup(
    name=package_name,
    version='0.0.0',
    packages=find_packages(exclude=['test']),
-    data_files=[
-        ('share/ament_index/resource_index/packages', ['resource/' + package_name]),
-        ('share/' + package_name, ['package.xml']),
-        ('share/' + package_name + '/launch', glob('launch/*.launch.py')),
-        ('share/' + package_name + '/configs', glob('configs/*.json')),
-        ('share/' + package_name + '/configs/flexiv_configs', glob('configs/flexiv_configs/*.json')),
-        ('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
-        ('share/' + package_name + '/configs/launch_configs', glob('configs/launch_configs/*.json')),
-        ('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
-
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
-        ('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
-        ('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
-
-        ('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
-         glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
-        ('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
-         glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
-    ],
+    data_files=data_files,
    install_requires=['setuptools'],
    zip_safe=True,
    include_package_data=True,
--- a/vision_detect/vision_detect/VisionDetect/net/init.py
+++ b/vision_detect/vision_detect/VisionDetect/net/init.py
@@ -0,0 +1 @@
+from .gsnet import *
--- a/vision_detect/vision_detect/VisionDetect/net/gsnet/init.py
+++ b/vision_detect/vision_detect/VisionDetect/net/gsnet/init.py
@@ -0,0 +1,7 @@
+from .models.graspnet import *
+from .utils.collision_detector import *
+
+
+__all__ = [
+    "GraspNet", "pred_decode", "ModelFreeCollisionDetector"
+]
--- a/vision_detect/vision_detect/VisionDetect/node/detect_node.py
+++ b/vision_detect/vision_detect/VisionDetect/node/detect_node.py
@@ -15,7 +15,7 @@ from geometry_msgs.msg import Pose, Point, Quaternion
 from interfaces.msg import PoseClassAndID, PoseArrayClassAndID

 from ..utils import distortion_correction, crop_imgs_box_xywh, draw_box, draw_mask, rmat2quat, \
-    crop_imgs_mask, get_map, create_o3d_pcd, save_img
+    crop_imgs_mask, get_map, create_o3d_pcd, save_img, refine_grasp_pose
 from .init_node import InitBase


@@ -105,23 +105,24 @@ class DetectNode(InitBase):
                    response.success = False
                    response.info = "Camera data have not objects"
                    response.objects = []
-
                    print("=========================== < end > ===========================")
                    return response

                response.success = False
                response.info = f"Name is wrong: {request.camera_position}"
                response.objects = []
-
                print("=========================== < end > ===========================")
                return response

        if request.camera_position == 'left':
            self.hand_eye_mat = self.eye_in_left_hand_mat
+            self.p = "left"
        elif request.camera_position == 'right':
            self.hand_eye_mat = self.eye_in_right_hand_mat
+            self.p = "right"
        else:
            self.hand_eye_mat = self.eye_to_hand_mat
+            self.p = "head"

        self.camera_size = [color_img_ros.width, color_img_ros.height]

@@ -157,7 +158,6 @@ class DetectNode(InitBase):
            if img is None:
                img = color_img_ros
            self.pub_detect_image.publish(img)
-
        print("=========================== < end > ===========================")
        return response

@@ -167,7 +167,7 @@ class DetectNode(InitBase):

        home = os.path.expanduser("~")
        save_dir = os.path.join(home, "images")
-        save_img(rgb_img, "orign_image.png", save_dir=save_dir, mark_cur_time=True)
+        save_img(rgb_img.copy(), "orign_image.png", save_dir=save_dir, mark_cur_time=True)

        # Get Predict Results
        time1 = time.time()
@@ -178,7 +178,6 @@ class DetectNode(InitBase):
        # Get masks
        if result.masks is None or len(result.masks) == 0:
            return None, 1000, False
-
        masks = result.masks.data.cpu().numpy()

        # Get boxes
@@ -196,6 +195,12 @@ class DetectNode(InitBase):

        self.get_logger().info(f"Detect object num: {len(masks)}")

+        full_points = create_o3d_pcd(
+            depth_img, self.camera_size, self.k, **self.calculate_configs
+        )
+        time_full_points = time.time()
+        print(f"create full points: {(time_full_points - time3) * 1000}")
+
        if self.calculate_mode == "E2E" and self.detect_mode == 'Detect' and E2E_SIGN:
            self.calculate_configs["orign_point_clouds"] = create_o3d_pcd(
                depth_img, self.camera_size, self.k, **self.calculate_configs
@@ -223,8 +228,21 @@ class DetectNode(InitBase):
                self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
                continue

+            time_c = time.time()
+            if self.p == "left" or self.p == "right":
+                position = rmat[0:3, 3]
+                rmat, sign = refine_grasp_pose(
+                    full_points, self.calculate_configs.get("voxel_size"), position,
+                    search_mode=True
+                )
+                if not sign:
+                    self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
+                    continue
+
+            time_e = time.time()
+            print(f"Refine: {(time_e - time_c) * 1000} ms")
+
            self.get_logger().info(f"grab_width: {grab_width}")
-            # rmat = self.hand_eye_mat @ rmat
            x, y, z, rw, rx, ry, rz = rmat2quat(self.hand_eye_mat @ rmat)

            pose = Pose()
@@ -253,7 +271,7 @@ class DetectNode(InitBase):
        if not self.output_boxes and not self.output_masks:
            return None, pose_list, True
        if self.output_boxes:
-            draw_box(rgb_img, result, save_dir=save_dir, mark_time = False)
+            draw_box(rgb_img, result, save_dir=save_dir, mark_time = True)
        if self.output_masks:
            draw_mask(rgb_img, result)
        return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
--- a/vision_detect/vision_detect/VisionDetect/node/init_node.py
+++ b/vision_detect/vision_detect/VisionDetect/node/init_node.py
@@ -18,7 +18,7 @@ from .config_node import ConfigBase

 E2E_SIGN = True
 try:
-    from ..net.gsnet.models.graspnet import GraspNet
+    from ..net import GraspNet
    from ..utils import calculate_pose_e2e
 except (ImportError, ModuleNotFoundError):
    E2E_SIGN = False
--- a/vision_detect/vision_detect/VisionDetect/utils/init.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/init.py
@@ -3,3 +3,4 @@ from .draw_tools import *
 from .calculate_tools import *
 from .file_tools import *
 from .pointclouds_tools import *
+from .grasp_refine import *
--- a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
@@ -14,9 +14,7 @@ try:
    import torch
    import MinkowskiEngine as ME
    import collections.abc as container_abcs
-
-    from ..net.gsnet.models.graspnet import pred_decode
-    from ..net.gsnet.utils.collision_detector import ModelFreeCollisionDetector
+    from ..net import pred_decode, ModelFreeCollisionDetector
 except (ImportError, OSError, RuntimeError):
    logging.warning("ImportError or OSError or RuntimeError")
    E2E_SIGN = False
@@ -64,8 +62,14 @@ def calculate_pose_pca(
    input:
        mask: np.ndarray
        depth_img: np.ndarray
+        intrinsics: o3d.camera.PinholeCameraIntrinsic
+        calculate_grab_width: bool
+        **kwargs:

-
+    output:
+        rmat: np.ndarray (4, 4)
+        grab_width: list
+        sign: bool
    """
    depth_img_mask = np.zeros_like(depth_img)
    depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -186,72 +190,6 @@ def calculate_pose_icp(
    return rmat, grab_width, True


-def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
-    """点云去噪"""
-    point_cloud = point_cloud.remove_non_finite_points()
-    down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
-    point_num = len(down_pcd.points)
-
-    # 半径滤波
-    clean_pcd, _ = down_pcd.remove_radius_outlier(
-        nb_points=max(int(round(10 * voxel_size / 0.005)), 3),
-        radius=voxel_size * 10
-        )
-
-    # 统计滤波
-    clean_pcd, _ = clean_pcd.remove_statistical_outlier(
-        nb_neighbors=max(int(round(10 * voxel_size / 0.005)), 3),
-        std_ratio=2.0
-        )
-
-    # 过滤过近的点
-    points = np.asarray(clean_pcd.points)
-    clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
-
-    # # 使用数量最大簇判定噪声强度
-    # _, counts = np.unique(labels[labels >= 0], return_counts=True)
-    # largest_cluster_ratio = counts.max() / len(labels)
-    # if largest_cluster_ratio < 0.5:
-    #     return None
-
-    labels = np.array(
-        clean_pcd.cluster_dbscan(
-            eps=voxel_size * 10,
-            min_points=max(int(round(10 * voxel_size / 0.005)), 3)
-        )
-    )
-    if len(labels[labels >= 0]) == 0:
-        return clean_pcd, True
-
-    # 使用距离最近簇作为物体
-    points = np.asarray(clean_pcd.points)
-    cluster_label = set(labels)
-    point_cloud_clusters = []
-    for label in cluster_label:
-        if label == -1:
-            continue
-        idx = np.where(labels == label)[0]
-        point_cloud_cluster = clean_pcd.select_by_index(idx)
-        points_cluster_z = points[idx, 2]
-        z_avg = np.mean(points_cluster_z)
-        if z_avg < 0.2:
-            continue
-        point_cloud_clusters.append((point_cloud_cluster, z_avg))
-
-    if len(point_cloud_clusters) == 0:
-        return clean_pcd
-
-    point_cloud_clusters.sort(key=lambda x: x[1])
-    clean_pcd = point_cloud_clusters[0][0]
-
-    # 使用最近簇判断噪音强度
-    largest_cluster_ratio = len(clean_pcd.points) / point_num
-    if largest_cluster_ratio < 0.05:
-        return 1100, False
-
-    return clean_pcd, True
-
-
 if E2E_SIGN:
    def minkowski_collate_fn(list_data):
        coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
@@ -266,27 +204,6 @@ if E2E_SIGN:
            "quantize2original": quantize2original,
            "point_clouds": torch.stack([torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
        }
-
-        # [{
-        #     'point_clouds': points.astype(np.float32),
-        #     'coors': points.astype(np.float32) / kwargs.get("voxel_size", 0.002),
-        #     'feats': np.ones_like(points).astype(np.float32),
-        # }]
-
-        # def collate_fn_(batch):
-        #     if type(batch[0]).__module__ == 'numpy':
-        #         return torch.stack([torch.from_numpy(b) for b in batch], 0)
-        #     elif isinstance(batch[0], container_abcs.Sequence):
-        #         return [[torch.from_numpy(sample) for sample in b] for b in batch]
-        #     elif isinstance(batch[0], container_abcs.Mapping):
-        #         for key in batch[0]:
-        #             if key == 'coors' or key == 'feats':
-        #                 continue
-        #             res[key] = collate_fn_([d[key] for d in batch])
-        #         return res
-        #
-        # res = collate_fn_(list_data)
-
        return res


@@ -297,7 +214,22 @@ if E2E_SIGN:
            calculate_grab_width: bool = False,
            **kwargs
    ):
-        """点云主成分分析法计算位态"""
+        """
+        点云抓取姿态预测模型计算位态
+        -----
+        input:
+            mask: np.ndarray
+            depth_img: np.ndarray
+            intrinsics: o3d.camera.PinholeCameraIntrinsic
+            calculate_grab_width: bool (abandon)
+            **kwargs:
+                "depth_scale": float
+                "depth_trunc": float
+                "voxel_size": float
+                "model":
+                "collision_thresh": float
+
+        """
        # logging.error("stage 1")
        depth_img_mask = np.zeros_like(depth_img)
        depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -355,15 +287,15 @@ if E2E_SIGN:
        if len(point_cloud.points) == 0:
            return 1101, [0.0, 0.0, 0.0], False

-        # 点云补齐15000个点
-        points = np.asarray(point_cloud.points)
-        if len(points) >= 15000:
-            idxs = np.random.choice(len(points), 15000, replace=False)
-        else:
-            idxs1 = np.arange(len(points))
-            idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
-            idxs = np.concatenate([idxs1, idxs2], axis=0)
-        points = points[idxs]
+        # # 点云补齐15000个点
+        # points = np.asarray(point_cloud.points)
+        # if len(points) >= 15000:
+        #     idxs = np.random.choice(len(points), 15000, replace=False)
+        # else:
+        #     idxs1 = np.arange(len(points))
+        #     idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
+        #     idxs = np.concatenate([idxs1, idxs2], axis=0)
+        # points = points[idxs]

        # 构建推理数据结构
        ret_dict = {
--- a/vision_detect/vision_detect/VisionDetect/utils/grasp_refine.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/grasp_refine.py
@@ -0,0 +1,282 @@
+import numpy as np
+import transforms3d as tfs
+# import open3d as o3d
+
+__all__ = ["refine_grasp_pose"]
+
+
+def collision_detector(
+        points: np.ndarray,
+        refine: np.ndarray,
+        volume: list[float],
+        iou: float = 0.001,
+        search_mode: bool = False,
+        **kwargs
+) -> int:
+    """
+    collision detector
+    -----
+    input:
+        points: np.ndarray (3, n)
+        refine: np.ndarray (3, ), Grab target poes coordinate system
+        volume: list [left, right]
+        iou : float
+        search_mode: bool, Default False
+        **kwargs:
+            "grab_width": float
+            "hand_size": list [width, height, length]
+            "left_size": list [thick, width, length]
+            "right_size": list [thick, width, length]
+            thick of gripper finger, width of gripper finger, length of gripper finger
+    output:
+        collision_code: int
+    """
+    hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
+    left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
+    right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
+    grab_width = kwargs.get('grab_width', 0.10) * 0.95
+    x, y, z = refine
+
+    if not search_mode:
+        hand_top_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x - hand_size[1]*1/4))  & (points[0] > (x - hand_size[1]/2))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+        hand_center_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x + hand_size[1]*1/4))  & (points[0] > (x - hand_size[1]*1/4))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+        hand_bottom_box = (
+            (points[2] < z) & (points[2] > (z - hand_size[2]))
+            & (points[0] < (x + hand_size[1]/2)) & (points[0] > (x + hand_size[1]*1/4))
+            & (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
+        )
+
+        if (hand_top_box.any() and hand_bottom_box.any()) or hand_center_box.any():
+            return 1
+        if hand_bottom_box.any():
+            return 2
+        if hand_top_box.any():
+            return 3
+
+    left_finger_box = (
+        (points[2] < (z + left_size[2])) & (points[2] > z -0.05)
+        & (points[0] < (x + left_size[1]/2)) & (points[0] > (x - left_size[1]/2))
+        & (points[1] < (y + grab_width/2 + left_size[0])) & (points[1] > (y + grab_width/2))
+    )
+    right_finger_box = (
+        (points[2] < (z + right_size[2])) & (points[2] > z - 0.05)
+        & (points[0] < (x + right_size[1]/2)) & (points[0] > (x - right_size[1]/2))
+        & (points[1] < (y - grab_width/2)) & (points[1] > (y-(grab_width/2 + right_size[0])))
+    )
+
+    left_iou = left_finger_box.sum() / (volume[0]+1e-6)
+    right_iou = right_finger_box.sum() / (volume[1]+1e-6)
+
+    if left_iou > iou and right_iou > iou:
+        return 6
+
+    if left_iou > iou :
+        return 4
+    if right_iou > iou :
+        return 5
+
+    return 0
+
+
+def refine_grasp_pose(
+        points: np.ndarray,
+        voxel_size: float,
+        target_position: np.ndarray,
+        expect_orientation: np.ndarray | None = None,
+        search_mode: bool = False,
+        **kwargs
+):
+    """
+    refine grasp pose
+    -----
+    input:
+        points: np.ndarray (n, 3), already complete voxel downsample
+        voxel_size: float
+        target_position: np.ndarray (3, ), grab target position
+        expect_orientation: np.ndarray (3, 3), expect grab target orientation
+        search_mode: bool, Default False
+        **kwargs:
+            "grab_width": float
+            "hand_size": list [width, height, length]
+            "left_size": list [thick, width, length]
+            "right_size": list [thick, width, length]
+            thick of gripper finger, width of gripper finger, length of gripper finger
+    output:
+    """
+    if expect_orientation is None:
+        expect_orientation = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
+
+    refine = np.zeros(3)
+    grab_width = kwargs.get('grab_width', 0.10)
+    hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
+    left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
+    right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
+
+    left_volume = left_size[0] * left_size[1] * left_size[2] / (voxel_size**3)
+    right_volume = right_size[0] * right_size[1] * right_size[2] / (voxel_size**3)
+
+    points = points - target_position[None, :] # (n, 3) - (1, 3) = (n, 3)
+    points = expect_orientation.T @ points.T # (3, 3) @ (3, n) = (3, n)
+
+    points = points[:,
+        (points[2] < left_size[2]) & (points[2] > -(0.1 + hand_size[2]))
+        & (points[0] < (0.05 + hand_size[1]/2)) & (points[0] > -(0.05 + hand_size[1]/2))
+        & (points[1] < (0.05 + grab_width/2 + left_size[0]))
+        & (points[1] > -(0.05 + grab_width/2 + right_size[0]))
+    ]
+
+    frist_sign = False
+    y_n = y_p = False
+    left_last = right_last = False
+    hand_num = left_num = right_num = 0
+    while hand_num < 20 and left_num < 10 and right_num < 10:
+        collision_code = collision_detector(
+            points, refine, volume=[left_volume, right_volume], **kwargs
+        )
+
+        if collision_code == 0:
+            if y_p and not y_n:
+                refine[1] += 0.004
+                y_p = y_n = False
+                left_last = True
+                right_last = False
+                continue
+            if not y_p and y_n:
+                refine[1] -= 0.004
+                y_p = y_n = False
+                left_last = False
+                right_last = True
+                continue
+            if search_mode:
+                break
+            position = target_position + (expect_orientation @ refine.T).T
+            rmat = tfs.affines.compose(
+                np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
+            )
+            return rmat, True
+        if collision_code == 6:
+            return 1202, False
+
+        if collision_code == 1:
+            refine[2] -= 0.008
+            hand_num += 1
+            print("z + 0.008")
+            continue
+
+        if collision_code == 2:
+            if frist_sign:
+                print("z + 0.004")
+                refine[2] -= 0.004
+                frist_sign = True
+                continue
+            refine[0] -= 0.002
+            hand_num += 1
+            print("x - 0.002")
+            continue
+
+        if collision_code == 3:
+            if frist_sign:
+                print("z + 0.004")
+                refine[2] -= 0.004
+                frist_sign = True
+                continue
+            refine[0] += 0.002
+            hand_num += 1
+            print("x + 0.002")
+            continue
+
+        if collision_code == 4:
+            y_p = True
+            y_n = False
+            refine[1] += 0.004
+            left_num += 1
+            print("y + 0.004")
+            continue
+
+        if collision_code == 5:
+            y_p = False
+            y_n = True
+            refine[1] -= 0.004
+            right_num += 1
+            print("y - 0.004")
+            continue
+    else:
+        return 1202, False
+
+    if search_mode:
+        right_num = left_num = 0
+        if left_last and not right_last:
+            y_min = refine[1]
+            while left_num < 10:
+                left_num += 1
+                refine[1] += 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] - 0.004 + y_min) / 2
+                    break
+            else:
+                refine[1] = y_min + 0.2
+            print(f"left_num = {left_num}")
+
+        elif not left_last and right_last:
+            y_max = refine[1]
+            while right_num < 10:
+                right_num += 1
+                refine[1] -= 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] + 0.004 + y_max) / 2
+                    break
+            else:
+                refine[1] = y_max - 0.2
+            print(f"right_num = {right_num}")
+
+        elif not left_last and not right_last:
+            y_cur = refine[1]
+            while left_num < 6:
+                left_num += 1
+                refine[1] += 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    y_max = refine[1] - 0.004
+                    break
+            else:
+                y_max = refine[1] + 0.024
+            print(f"left_num = {left_num}")
+
+            refine[1] = y_cur
+            while right_num < 6:
+                right_num += 1
+                refine[1] -= 0.004
+                collision_code = collision_detector(
+                    points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
+                )
+                if collision_code:
+                    refine[1] = (refine[1] + 0.004 + y_max) / 2
+                    break
+            else:
+                refine[1] = (y_cur - 0.024 + y_max) / 2
+            print(f"right_num = {right_num}")
+
+        position = target_position + (expect_orientation @ refine.T).T
+        rmat = tfs.affines.compose(
+            np.squeeze(np.asarray(position)),
+            expect_orientation,
+            [1, 1, 1]
+        )
+        return rmat, True
+    return 1202, False
--- a/vision_detect/vision_detect/VisionDetect/utils/image_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/image_tools.py
@@ -177,8 +177,8 @@ def save_img(img: np.ndarray, save_name, save_dir: str | None = None, mark_cur_t
        save_path = os.path.join(home_path, save_name)

    if mark_cur_time:
-        cur_time = time.time() * 1000
-        path, ext = os.path.splitext(save_name)
+        cur_time = int(time.time() * 1000)
+        path, ext = os.path.splitext(save_path)
        save_path = path + f'_{cur_time}' + ext

    cv2.imwrite(save_path, img)
--- a/vision_detect/vision_detect/VisionDetect/utils/pointclouds_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/pointclouds_tools.py
@@ -104,16 +104,17 @@ def create_o3d_pcd(depth_img, camera_size, k, **kwargs):
    orign_point_clouds = orign_point_clouds.voxel_down_sample(
        kwargs.get("voxel_size", 0.01)
    )
-    orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
-        nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
-        radius=kwargs.get("voxel_size", 0.01) * 10
-    )
-
-    # 统计滤波
-    orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
-        nb_neighbors=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
-        std_ratio=2.0
-    )
+    # # 半径滤波
+    # orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
+    #     nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
+    #     radius=kwargs.get("voxel_size", 0.01) * 10
+    # )
+    #
+    # # 统计滤波
+    # orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
+    #     nb_neighbors=int(round(20 * kwargs.get("voxel_size", 0.01) / 0.005)),
+    #     std_ratio=2.0
+    # )
    orign_points = np.asarray(orign_point_clouds.points)

    return orign_points