53 Commits

Author SHA1 Message Date
liangyuxuan
cc520c349e compliete initbase 2026-01-20 19:58:14 +08:00
liangyuxuan
52c10dcb3c Merge branch 'feature' into feature_cpp_test 2026-01-19 14:09:29 +08:00
liangyuxuan
611b529075 修改抓取位置微调 2026-01-19 14:06:09 +08:00
liangyuxuan
039bb2d165 添加抓取位置微调、检测图片和原图片保存 2026-01-15 17:39:45 +08:00
liangyuxuan
1c9af20786 Merge branch 'feature' into feature_cpp_test 2026-01-12 09:58:05 +08:00
liangyuxuan
a59776e6c2 增加end2end方式计算位姿 2026-01-12 09:57:30 +08:00
liangyuxuan
f937329e4b 增加end2end方式计算位姿 2026-01-12 09:48:58 +08:00
liangyuxuan
6fbf13e232 增加end2end方式计算位姿 2026-01-12 09:48:54 +08:00
liangyuxuan
cd4bbdc5f3 1 2025-12-30 17:34:23 +08:00
liangyuxuan
97c142f6e3 更改去噪参数 2025-12-30 13:22:00 +08:00
liangyuxuan
eeedd4bfc6 1 2025-12-28 16:24:16 +08:00
liangyuxuan
08319d9eba 1 2025-12-28 16:19:49 +08:00
liangyuxuan
7d3424b7e5 Merge branch 'feature' into feature_cpp_test 2025-12-28 13:58:17 +08:00
liangyuxuan
3025b5a23a 采集的深度数据更改为.png存储 2025-12-28 13:57:37 +08:00
liangyuxuan
563737932b Merge branch 'feature' into feature_cpp_test 2025-12-27 16:49:25 +08:00
liangyuxuan
93f3a8d45e 移动文件位置 2025-12-27 16:47:46 +08:00
liangyuxuan
12fed65d8a 更改深度raw的保存方式为.zip 2025-12-27 16:45:55 +08:00
liangyuxuan
87a5697e75 迁移数据采集节点至vision_tools,修改点下采样参数 2025-12-27 11:08:53 +08:00
liangyuxuan
8279687f4c 修正数据采集节点 2025-12-25 15:39:13 +08:00
liangyuxuan
aff813b107 检测物体排序修正 2025-12-24 19:48:16 +08:00
liangyuxuan
a89e525e7a Merge branch 'feature' into feature_cpp_test 2025-12-24 17:52:42 +08:00
liangyuxuan
a5f340bc35 添加相机数据采集服务客户端 2025-12-24 17:51:56 +08:00
liangyuxuan
d730325a0d 添加相机数据采集服务客户端 2025-12-24 17:51:03 +08:00
liangyuxuan
2e4cb18fa6 添加相机数据采集服务客户端 2025-12-24 17:33:26 +08:00
liangyuxuan
facd43f54f 添加相机数据采集节点 2025-12-24 16:44:06 +08:00
liangyuxuan
59b90201e4 设置检测输出顺序为:左->右,下->上 2025-12-24 10:42:44 +08:00
liangyuxuan
db330ec33c 1 2025-12-23 18:25:22 +08:00
liangyuxuan
610f488d1e Merge branch 'feature' into feature_cpp_test 2025-12-23 15:14:50 +08:00
liangyuxuan
d62faaa9cc 1 2025-12-23 15:13:01 +08:00
liangyuxuan
eec02de5a7 删除多余文件 2025-12-23 09:57:52 +08:00
liangyuxuan
389bd2b50e Merge branch 'feature' into feature_cpp_test 2025-12-19 15:51:38 +08:00
liangyuxuan
b8444c82b9 头部相机标定文件 2025-12-19 15:50:59 +08:00
liangyuxuan
44b2cc43e0 add onnxruntime inference(no post-processing) 2025-12-18 18:52:49 +08:00
liangyuxuan
4f8ae43239 update test.cpp CMakeLists.txt 2025-12-18 09:07:09 +08:00
liangyuxuan
78c129da83 Merge branch 'feature' into feature_cpp_test 2025-12-17 14:07:04 +08:00
liangyuxuan
0f27c5cd3c Merge branch 'feature' into feature_cpp_test 2025-12-16 17:26:29 +08:00
liangyuxuan
0c41be3154 1 2025-12-12 18:41:45 +08:00
liangyuxuan
6dafd77bcc 1 2025-12-12 16:03:12 +08:00
liangyuxuan
9fd8951fae 1 2025-12-12 15:47:41 +08:00
liangyuxuan
c5a4b5afed 1 2025-12-12 15:07:28 +08:00
liangyuxuan
6ddf95f737 1 2025-12-12 10:07:30 +08:00
liangyuxuan
360372f681 Merge branch 'feature' into feature_cpp_test 2025-12-12 10:05:49 +08:00
liangyuxuan
b264bbd6bb 1 2025-12-11 18:36:15 +08:00
liangyuxuan
ae8f7d370e add CMakeLists 2025-12-11 18:17:47 +08:00
liangyuxuan
e13deca6b7 update 2025-12-11 15:59:32 +08:00
liangyuxuan
f8d4167542 Merge branch 'feature' into feature_cpp_test 2025-12-10 14:50:48 +08:00
liangyuxuan
8c7c38eaf4 ConfigBase类完成-CPP 2025-12-05 18:48:09 +08:00
liangyuxuan
250e2600f8 Merge branch 'feature' into feature_cpp_test 2025-12-05 14:28:49 +08:00
liangyuxuan
98483772c2 Merge branch 'feature' into feature_cpp_test 2025-12-04 11:16:19 +08:00
liangyuxuan
ed4136b36d Merge branch 'feature' into feature_cpp_test 2025-12-04 11:12:23 +08:00
liangyuxuan
0af9c10189 Merge remote-tracking branch 'origin/feature_cpp_test' into feature_cpp_test 2025-12-03 18:30:58 +08:00
liangyuxuan
36077ceb19 Merge branch 'feature' into feature_cpp_test 2025-12-03 18:10:44 +08:00
liangyuxuan
8e13d777ff add cpp 2025-12-03 13:58:18 +08:00
1952 changed files with 405801 additions and 3578 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@
**/build/
**/install/
**/log/
**/VisionDetect/**/**.so

View File

@@ -0,0 +1,28 @@
{
"T": [
[
-0.029312906184646676,
0.9941492398859674,
0.10396173510077301,
-0.0809046635564155
],
[
-0.9993360095995784,
-0.02689542239474161,
-0.024579995358005674,
0.036195409684615196
],
[
-0.021640088923136416,
-0.10461321660460321,
0.9942775173275502,
0.04913468358915477
],
[
0.0,
0.0,
0.0,
1.0
]
]
}

View File

@@ -1,22 +1,22 @@
{
"T": [
[
0.013635791720580838,
0.9992765703636767,
-0.03550212819481636,
-0.08645650535173793
0.02515190926712163,
0.9984434279780899,
-0.049780544267610596,
-0.08312977955463981
],
[
-0.9998865276218899,
0.013399556063222661,
-0.006883587549256321,
0.036196137264974
-0.9996337211449028,
0.025617085950903107,
0.008728599966714646,
0.059044674332170574
],
[
-0.006402895000908794,
0.03559196285001416,
0.999345893630474,
0.014407883180676354
0.00999024575340213,
0.04954276975245833,
0.9987220378839358,
0.017378234075134728
],
[
0.0,

View File

@@ -0,0 +1,28 @@
{
"T": [
[
0.01868075138315295,
0.8881359544145228,
0.45920099738999437,
-0.19284748723617093
],
[
0.002854261818715398,
-0.4593266423892484,
0.8882628489252997,
0.057154511710361816
],
[
0.9998214254141742,
-0.01528273756969839,
-0.011115539354645598,
-0.04187423675125192
],
[
0.0,
0.0,
0.0,
1.0
]
]
}

View File

@@ -192,13 +192,13 @@ class _Calibration:
np.linalg.inv(
self._function(
self._hand[i], self._hand[i + 1], self._hand[i + 2],
self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
)
)
if self._mode == 'eye_to_hand' else
self._function(
self._hand[i], self._hand[i + 1], self._hand[i + 2],
self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
)
)
self._Hcs.append(

View File

@@ -52,7 +52,7 @@ def calculate(mode, hand_path, camera_path, hand_eye_path):
for i in range(sum_):
hand = [hand_all[i*7], hand_all[i*7+1], hand_all[i*7+2], hand_all[i*7+3], hand_all[i*7+4], hand_all[i*7+5], hand_all[i*7+6]]
camera = [camera_all[i*7], camera_all[i*7+1], camera_all[i*7+2], camera_all[i*7+3], camera_all[i*7+4], camera_all[i*7+5], camera_all[i*7+6]]
T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[3], hand[4], hand[5], hand[6])
T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[6], hand[3], hand[4], hand[5])
T_cal_in_camera = get_matrix_quat(camera[0], camera[1], camera[2], camera[3], camera[4], camera[5], camera[6])
if mode == "eye_to_hand":

24
tools/pt2vino.py Normal file
View File

@@ -0,0 +1,24 @@
import argparse
from ultralytics import YOLO
def main(checkpoint_path):
model = YOLO(checkpoint_path)
model.export(
format="openvino",
imgsz=(1280, 720),
dynamic=True,
simplify=True,
half=True,
workspace=0.8,
batch=1,
device="cpu"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("checkpoint_path", type=str)
args = parser.parse_args()
main(args.checkpoint_path)
# main(checkpoint_path="checkpoint/medical_sense-seg.pt")

View File

@@ -0,0 +1,29 @@
{
"info": {},
"warring": {
"0000": "Success",
"1000": "Detected object count is 0",
"1001": "Depth crop is None",
"1003": "Failed to detect a valid pose",
"1100": "Object point cloud contains excessive noise",
"1101": "The point cloud is empty",
"1200": "The number of points is insufficient to compute an OBB",
"1201": "PCA output vector is None",
"1202": "This pose cannot be grab, and position refine fail",
"1300": "E2E model input data 'coors' are fewer than 128",
"1301": "E2E model input data 'point_clouds' are fewer than 128",
"1302": "The 'true num' of points is 0; No graspable points are available",
"1303": "The model returned no predictions",
"1304": "All rotation vector processing failed; no valid rotation matrix was generated"
},
"error": {},
"fatal": {}
}

View File

@@ -48,5 +48,12 @@
"ransac_voxel_size": 0.005,
"icp_voxel_radius": [0.004, 0.002, 0.001],
"icp_max_iter": [50, 30, 14]
},
"E2E_configs": {
"checkpoint_path": "checkpoints/posenet.pt",
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010,
"collision_thresh": 0.01
}
}

View File

@@ -1,5 +1,5 @@
{
"node_name": "bottle_detect_service",
"node_name": "medical_sense_service",
"output_boxes": "True",
"output_masks": "False",
@@ -17,7 +17,7 @@
"detect_mode": "Detect",
"Detect_configs": {
"checkpoint_path": "checkpoints/medical_sense-seg.pt",
"confidence": 0.50,
"confidence": 0.75,
"classes": []
},
@@ -25,6 +25,13 @@
"PCA_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010
"voxel_size": 0.005
},
"E2E_configs": {
"checkpoint_path": "checkpoints/posenet.pt",
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.0005,
"collision_thresh": 0.01
}
}

View File

@@ -1,47 +0,0 @@
from launch import LaunchDescription
from launch_ros.actions import Node
from launch.actions import DeclareLaunchArgument, OpaqueFunction
from launch.substitutions import LaunchConfiguration
import os
import ast
import json
from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
def generate_launch_description():
args_detect = [
DeclareLaunchArgument('output_boxes', default_value='True'),
DeclareLaunchArgument('output_masks', default_value='False'),
DeclareLaunchArgument('color_image_topic', default_value='/camera/color/image_raw'),
DeclareLaunchArgument('depth_image_topic', default_value='/camera/depth/image_raw'),
DeclareLaunchArgument('camera_info_topic', default_value='/camera/color/camera_info'),
]
def create_detect_node(context):
output_boxes = LaunchConfiguration('output_boxes').perform(context)
output_masks = LaunchConfiguration('output_masks').perform(context)
color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
return [
Node(
package='vision_detect',
executable='crossboard_detect_node',
parameters=[{
'output_boxes': output_boxes.lower() == 'true',
'output_masks': output_masks.lower() == 'true',
'color_image_topic': color_image_topic,
'depth_image_topic': depth_image_topic,
'camera_info_topic': camera_info_topic,
}]
)
]
return LaunchDescription(args_detect + [
OpaqueFunction(function=create_detect_node),
])

View File

@@ -1,49 +0,0 @@
from launch import LaunchDescription
from launch_ros.actions import Node
from launch.actions import DeclareLaunchArgument, OpaqueFunction
from launch.substitutions import LaunchConfiguration
import os
import ast
import json
from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/detect_service.json')
with open(config_dir, "r") as f:
configs = json.load(f)
def generate_launch_description():
args_detect = [
DeclareLaunchArgument('checkpoint_name', default_value=configs['checkpoint_name']),
DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
DeclareLaunchArgument('set_confidence', default_value=configs['set_confidence']),
DeclareLaunchArgument('classes', default_value=configs['classes']),
]
def create_detect_node(context):
checkpoint = LaunchConfiguration('checkpoint_name').perform(context)
output_boxes = LaunchConfiguration('output_boxes').perform(context)
output_masks = LaunchConfiguration('output_masks').perform(context)
conf = LaunchConfiguration('set_confidence').perform(context)
classes = LaunchConfiguration('classes').perform(context)
return [
Node(
package='vision_detect',
executable='detect_service_node',
parameters=[{
'checkpoint_name': checkpoint,
'output_boxes': output_boxes.lower() == 'true',
'output_masks': output_masks.lower() == 'true',
'set_confidence': float(conf),
'classes': classes,
}]
)
]
return LaunchDescription(args_detect + [
OpaqueFunction(function=create_detect_node),
])

View File

@@ -26,6 +26,7 @@ def generate_launch_description():
package='vision_detect',
executable='detect_node',
name=configs['node_name'],
output="screen",
parameters=[{
'configs_path': configs_path,
}]

View File

@@ -1,26 +1,47 @@
import os
from glob import glob
from setuptools import find_packages, setup
package_name = 'vision_detect'
openvino_files = []
for model_dir in glob("checkpoints/*_openvino_model"):
model_name = os.path.basename(model_dir)
model_files = glob(os.path.join(model_dir, '*'))
openvino_files.append(
('share/' + package_name + '/checkpoints/' + model_name, model_files)
)
data_files = [
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
('share/' + package_name, ['package.xml']),
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
('share/' + package_name + '/configs', glob('configs/*.json')),
('share/' + package_name + '/configs/flexiv_configs',
glob('configs/flexiv_configs/*.json')),
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
('share/' + package_name + '/configs/launch_configs',
glob('configs/launch_configs/*.json')),
('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
]
data_files.extend(openvino_files)
setup(
name=package_name,
version='0.0.0',
packages=find_packages(exclude=['test']),
data_files=[
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
('share/' + package_name, ['package.xml']),
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
('share/' + package_name + '/configs', glob('configs/*.json')),
('share/' + package_name + '/configs/flexiv_configs', glob('configs/flexiv_configs/*.json')),
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
('share/' + package_name + '/configs/launch_configs', glob('configs/launch_configs/*.json')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
],
data_files=data_files,
install_requires=['setuptools'],
zip_safe=True,
include_package_data=True,
@@ -31,10 +52,6 @@ setup(
tests_require=['pytest'],
entry_points={
'console_scripts': [
# 'detect_service_node = vision_detect.detect_service:main',
# 'detect_topic_node = vision_detect.detect_topic:main',
# 'box_detect_service_node = vision_detect.detect_box_service:main',
# 'red_detect_topic_node = vision_detect.detect_red_topic:main',
# 'red_detect_service_node = vision_detect.detect_red_service:main',
@@ -42,11 +59,8 @@ setup(
'flexiv_detect_service_node = vision_detect.flexivaidk_detect_service:main',
'sub_pose_node = vision_detect.sub_pose:main',
# 'calibration_node = vision_detect.hand_eye_calibration:main',
# 'crossboard_detect_node = vision_detect.crossboard_detect:main',
'service_client_node = vision_detect.service_client:main',
# 'get_camera_pose_node = vision_detect.get_camera_pose:main',
# 'calculate_node = vision_detect.calculate:main',
'get_camera_pose_node = vision_detect.get_camera_pose:main',
'detect_node = vision_detect.detect_node:main',
],

View File

@@ -0,0 +1 @@
from .gsnet import *

View File

@@ -0,0 +1,7 @@
from .models.graspnet import *
from .utils.collision_detector import *
__all__ = [
"GraspNet", "pred_decode", "ModelFreeCollisionDetector"
]

View File

@@ -0,0 +1,132 @@
import os
import sys
import numpy as np
import argparse
from PIL import Image
import torch
import open3d as o3d
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(ROOT_DIR)
from models.graspnet import GraspNet, pred_decode
import collections.abc as container_abcs
import MinkowskiEngine as ME
from utils.collision_detector import ModelFreeCollisionDetector
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint_path', default='checkpoints/minkuresunet_realsense.tar')
parser.add_argument('--dump_dir', help='Dump dir to save outputs', default='results/')
parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
parser.add_argument('--collision_thresh', type=float, default=0.01,
help='Collision Threshold in collision detection [default: 0.01]')
parser.add_argument('--voxel_size_cd', type=float, default=0.01,
help='Voxel Size for collision detection')
parser.add_argument('--infer', action='store_true', default=True)
parser.add_argument('--vis', action='store_true', default=True)
cfgs = parser.parse_args()
# ------------------------------------------------------------------------- GLOBAL CONFIG BEG
# if not os.path.exists(cfgs.dump_dir):
# os.mkdir(cfgs.dump_dir)
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
[d["feats"] for d in list_data])
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
coordinates_batch, features_batch, return_index=True, return_inverse=True)
res = {
"coors": coordinates_batch,
"feats": features_batch,
"quantize2original": quantize2original
}
def collate_fn_(batch):
if type(batch[0]).__module__ == 'numpy':
return torch.stack([torch.from_numpy(b) for b in batch], 0)
elif isinstance(batch[0], container_abcs.Sequence):
return [[torch.from_numpy(sample) for sample in b] for b in batch]
elif isinstance(batch[0], container_abcs.Mapping):
for key in batch[0]:
if key == 'coors' or key == 'feats':
continue
res[key] = collate_fn_([d[key] for d in batch])
return res
res = collate_fn_(list_data)
return res
def data_process(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
index = 0
camera_poses = np.array([np.eye(4)]) # 相机位姿
align_mat = np.eye(4) # Camera_0 相对桌面的位姿
trans = np.dot(align_mat, camera_poses[int(index)])
pcd.transform(trans)
points = np.asarray(pcd.points)
ret_dict = {
'point_clouds': points.astype(np.float32),
'coors': points.astype(np.float32) / voxel_size,
'feats': np.ones_like(points).astype(np.float32),
}
return ret_dict
def get_grasp_dict(preds):
grasp_dict = {
"score": preds[:, 0],
"width": preds[:, 1],
"height": preds[:, 2],
"depth": preds[:, 3],
"rotation": preds[:, 4:13].reshape(-1, 3, 3),
"translation": preds[:, 13:16].reshape(-1, 3),
"object_id": preds[:, 16]
}
return grasp_dict
def inference(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
data_input = data_process(pcd, voxel_size)
batch_data = minkowski_collate_fn([data_input])
net = GraspNet(seed_feat_dim=512, is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
net.to(device)
# Load checkpoint
checkpoint_path = os.path.join(ROOT_DIR, cfgs.checkpoint_path)
net.load_state_dict(checkpoint_path)
net.eval()
for key in batch_data:
batch_data[key] = batch_data[key].to(device)
# Forward pass
with torch.no_grad():
end_points = net(batch_data)
grasp_preds = pred_decode(end_points)
preds = grasp_preds[0].detach().cpu().numpy()
sorted_index = np.argsort(-preds[:, 0])
preds = preds[sorted_index]
preds = preds[:10]
# collision detection
if cfgs.collision_thresh > 0:
cloud = data_input['point_clouds']
mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
collision_mask = mfcdetector.detect(get_grasp_dict(preds), approach_dist=0.05,
collision_thresh=cfgs.collision_thresh)
preds = preds[~collision_mask]
return preds
if __name__ == '__main__':
data_dict = data_process()
inference(data_dict)

View File

@@ -0,0 +1,224 @@
import MinkowskiEngine as ME
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
from .resnet import ResNetBase
class MinkUNetBase(ResNetBase):
BLOCK = None
PLANES = None
DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1)
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
INIT_DIM = 32
OUT_TENSOR_STRIDE = 1
# To use the model, must call initialize_coords before forward pass.
# Once data is processed, call clear to reset the model before calling
# initialize_coords
def __init__(self, in_channels, out_channels, D=3):
ResNetBase.__init__(self, in_channels, out_channels, D)
def network_initialization(self, in_channels, out_channels, D):
# Output of the first conv concated to conv6
self.inplanes = self.INIT_DIM
self.conv0p1s1 = ME.MinkowskiConvolution(
in_channels, self.inplanes, kernel_size=5, dimension=D)
self.bn0 = ME.MinkowskiBatchNorm(self.inplanes)
self.conv1p1s2 = ME.MinkowskiConvolution(
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
self.bn1 = ME.MinkowskiBatchNorm(self.inplanes)
self.block1 = self._make_layer(self.BLOCK, self.PLANES[0],
self.LAYERS[0])
self.conv2p2s2 = ME.MinkowskiConvolution(
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
self.bn2 = ME.MinkowskiBatchNorm(self.inplanes)
self.block2 = self._make_layer(self.BLOCK, self.PLANES[1],
self.LAYERS[1])
self.conv3p4s2 = ME.MinkowskiConvolution(
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
self.bn3 = ME.MinkowskiBatchNorm(self.inplanes)
self.block3 = self._make_layer(self.BLOCK, self.PLANES[2],
self.LAYERS[2])
self.conv4p8s2 = ME.MinkowskiConvolution(
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
self.bn4 = ME.MinkowskiBatchNorm(self.inplanes)
self.block4 = self._make_layer(self.BLOCK, self.PLANES[3],
self.LAYERS[3])
self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose(
self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=D)
self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4])
self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion
self.block5 = self._make_layer(self.BLOCK, self.PLANES[4],
self.LAYERS[4])
self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose(
self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=D)
self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5])
self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion
self.block6 = self._make_layer(self.BLOCK, self.PLANES[5],
self.LAYERS[5])
self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose(
self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=D)
self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6])
self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion
self.block7 = self._make_layer(self.BLOCK, self.PLANES[6],
self.LAYERS[6])
self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose(
self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=D)
self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7])
self.inplanes = self.PLANES[7] + self.INIT_DIM
self.block8 = self._make_layer(self.BLOCK, self.PLANES[7],
self.LAYERS[7])
self.final = ME.MinkowskiConvolution(
self.PLANES[7] * self.BLOCK.expansion,
out_channels,
kernel_size=1,
bias=True,
dimension=D)
self.relu = ME.MinkowskiReLU(inplace=True)
def forward(self, x):
out = self.conv0p1s1(x)
out = self.bn0(out)
out_p1 = self.relu(out)
out = self.conv1p1s2(out_p1)
out = self.bn1(out)
out = self.relu(out)
out_b1p2 = self.block1(out)
out = self.conv2p2s2(out_b1p2)
out = self.bn2(out)
out = self.relu(out)
out_b2p4 = self.block2(out)
out = self.conv3p4s2(out_b2p4)
out = self.bn3(out)
out = self.relu(out)
out_b3p8 = self.block3(out)
# tensor_stride=16
out = self.conv4p8s2(out_b3p8)
out = self.bn4(out)
out = self.relu(out)
out = self.block4(out)
# tensor_stride=8
out = self.convtr4p16s2(out)
out = self.bntr4(out)
out = self.relu(out)
out = ME.cat(out, out_b3p8)
out = self.block5(out)
# tensor_stride=4
out = self.convtr5p8s2(out)
out = self.bntr5(out)
out = self.relu(out)
out = ME.cat(out, out_b2p4)
out = self.block6(out)
# tensor_stride=2
out = self.convtr6p4s2(out)
out = self.bntr6(out)
out = self.relu(out)
out = ME.cat(out, out_b1p2)
out = self.block7(out)
# tensor_stride=1
out = self.convtr7p2s2(out)
out = self.bntr7(out)
out = self.relu(out)
out = ME.cat(out, out_p1)
out = self.block8(out)
return self.final(out)
class MinkUNet14(MinkUNetBase):
BLOCK = BasicBlock
LAYERS = (1, 1, 1, 1, 1, 1, 1, 1)
class MinkUNet18(MinkUNetBase):
BLOCK = BasicBlock
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
class MinkUNet34(MinkUNetBase):
BLOCK = BasicBlock
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
class MinkUNet50(MinkUNetBase):
BLOCK = Bottleneck
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
class MinkUNet101(MinkUNetBase):
BLOCK = Bottleneck
LAYERS = (2, 3, 4, 23, 2, 2, 2, 2)
class MinkUNet14A(MinkUNet14):
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
class MinkUNet14B(MinkUNet14):
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
class MinkUNet14C(MinkUNet14):
PLANES = (32, 64, 128, 256, 192, 192, 128, 128)
class MinkUNet14Dori(MinkUNet14):
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
class MinkUNet14E(MinkUNet14):
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
class MinkUNet14D(MinkUNet14):
PLANES = (32, 64, 128, 256, 192, 192, 192, 192)
class MinkUNet18A(MinkUNet18):
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
class MinkUNet18B(MinkUNet18):
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
class MinkUNet18D(MinkUNet18):
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
class MinkUNet34A(MinkUNet34):
PLANES = (32, 64, 128, 256, 256, 128, 64, 64)
class MinkUNet34B(MinkUNet34):
PLANES = (32, 64, 128, 256, 256, 128, 64, 32)
class MinkUNet34C(MinkUNet34):
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)

View File

@@ -0,0 +1,128 @@
""" GraspNet baseline model definition.
Author: chenxi-wang
"""
import logging
# import os
# import sys
import numpy as np
import torch
import torch.nn as nn
import MinkowskiEngine as ME
from .backbone_resunet14 import MinkUNet14D
from .modules import ApproachNet, GraspableNet, CloudCrop, SWADNet
from ..utils.loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT
from ..utils.label_generation import (process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix)
from ...pointnet2.pointnet2_utils import furthest_point_sample, gather_operation
class GraspNet(nn.Module):
def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):
super().__init__()
self.is_training = is_training
self.seed_feature_dim = seed_feat_dim
self.num_depth = NUM_DEPTH
self.num_angle = NUM_ANGLE
self.M_points = M_POINT
self.num_view = NUM_VIEW
self.backbone = MinkUNet14D(in_channels=3, out_channels=self.seed_feature_dim, D=3)
self.graspable = GraspableNet(seed_feature_dim=self.seed_feature_dim)
self.rotation = ApproachNet(self.num_view, seed_feature_dim=self.seed_feature_dim, is_training=self.is_training)
self.crop = CloudCrop(nsample=16, cylinder_radius=cylinder_radius, seed_feature_dim=self.seed_feature_dim)
self.swad = SWADNet(num_angle=self.num_angle, num_depth=self.num_depth)
def forward(self, end_points):
seed_xyz = end_points['point_clouds'] # use all sampled point cloud, B*Ns*3
B, point_num, _ = seed_xyz.shape # batch _size
# point-wise features
coordinates_batch = end_points['coors']
features_batch = end_points['feats']
mink_input = ME.SparseTensor(features_batch, coordinates=coordinates_batch)
seed_features = self.backbone(mink_input).F
seed_features = seed_features[end_points['quantize2original']].view(B, point_num, -1).transpose(1, 2)
end_points = self.graspable(seed_features, end_points)
seed_features_flipped = seed_features.transpose(1, 2) # B*Ns*feat_dim
objectness_score = end_points['objectness_score']
graspness_score = end_points['graspness_score'].squeeze(1)
objectness_pred = torch.argmax(objectness_score, 1)
objectness_mask = (objectness_pred == 1)
graspness_mask = graspness_score > GRASPNESS_THRESHOLD
graspable_mask = objectness_mask & graspness_mask
seed_features_graspable = []
seed_xyz_graspable = []
graspable_num_batch = 0.
for i in range(B):
cur_mask = graspable_mask[i]
graspable_num_batch += cur_mask.sum()
cur_feat = seed_features_flipped[i][cur_mask] # Ns*feat_dim
cur_seed_xyz = seed_xyz[i][cur_mask] # Ns*3 Ns=0->error
logging.error(f"cur_mask: {cur_mask.shape}")
logging.error(f"true num: {torch.sum(cur_mask).item()}")
logging.error(f"cur_seed_xyz: {cur_seed_xyz.shape}")
if torch.sum(cur_mask).item() == 0:
return None
cur_seed_xyz = cur_seed_xyz.unsqueeze(0) # 1*Ns*3
fps_idxs = furthest_point_sample(cur_seed_xyz, self.M_points)
cur_seed_xyz_flipped = cur_seed_xyz.transpose(1, 2).contiguous() # 1*3*Ns
cur_seed_xyz = gather_operation(cur_seed_xyz_flipped, fps_idxs).transpose(1, 2).squeeze(0).contiguous() # Ns*3
cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous() # 1*feat_dim*Ns
cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns
seed_features_graspable.append(cur_feat)
seed_xyz_graspable.append(cur_seed_xyz)
seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0) # B*Ns*3
seed_features_graspable = torch.stack(seed_features_graspable) # B*feat_dim*Ns
end_points['xyz_graspable'] = seed_xyz_graspable
end_points['graspable_count_stage1'] = graspable_num_batch / B
end_points, res_feat = self.rotation(seed_features_graspable, end_points)
seed_features_graspable = seed_features_graspable + res_feat
if self.is_training:
end_points = process_grasp_labels(end_points)
grasp_top_views_rot, end_points = match_grasp_view_and_label(end_points)
else:
grasp_top_views_rot = end_points['grasp_top_view_rot']
group_features = self.crop(seed_xyz_graspable.contiguous(), seed_features_graspable.contiguous(), grasp_top_views_rot)
end_points = self.swad(group_features, end_points)
return end_points
def pred_decode(end_points):
batch_size = len(end_points['point_clouds'])
grasp_preds = []
for i in range(batch_size):
grasp_center = end_points['xyz_graspable'][i].float()
grasp_score = end_points['grasp_score_pred'][i].float()
grasp_score = grasp_score.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
grasp_score, grasp_score_inds = torch.max(grasp_score, -1) # [M_POINT]
grasp_score = grasp_score.view(-1, 1)
grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12
grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01
grasp_depth = grasp_depth.view(-1, 1)
grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.
grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))
grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)
approaching = -end_points['grasp_top_view_xyz'][i].float()
grasp_rot = batch_viewpoint_params_to_matrix(approaching, grasp_angle)
grasp_rot = grasp_rot.view(M_POINT, 9)
# merge preds
grasp_height = 0.02 * torch.ones_like(grasp_score)
obj_ids = -1 * torch.ones_like(grasp_score)
grasp_preds.append(
torch.cat([grasp_score, grasp_width, grasp_height, grasp_depth, grasp_rot, grasp_center, obj_ids], axis=-1))
return grasp_preds

View File

@@ -0,0 +1,80 @@
import torch.nn as nn
import torch
def get_loss(end_points):
objectness_loss, end_points = compute_objectness_loss(end_points)
graspness_loss, end_points = compute_graspness_loss(end_points)
view_loss, end_points = compute_view_graspness_loss(end_points)
score_loss, end_points = compute_score_loss(end_points)
width_loss, end_points = compute_width_loss(end_points)
loss = objectness_loss + 10 * graspness_loss + 100 * view_loss + 15 * score_loss + 10 * width_loss
end_points['loss/overall_loss'] = loss
return loss, end_points
def compute_objectness_loss(end_points):
criterion = nn.CrossEntropyLoss(reduction='mean')
objectness_score = end_points['objectness_score']
objectness_label = end_points['objectness_label']
loss = criterion(objectness_score, objectness_label)
end_points['loss/stage1_objectness_loss'] = loss
objectness_pred = torch.argmax(objectness_score, 1)
end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
objectness_pred == 1].float().mean()
end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
objectness_label == 1].float().mean()
return loss, end_points
def compute_graspness_loss(end_points):
criterion = nn.SmoothL1Loss(reduction='none')
graspness_score = end_points['graspness_score'].squeeze(1)
graspness_label = end_points['graspness_label'].squeeze(-1)
loss_mask = end_points['objectness_label'].bool()
loss = criterion(graspness_score, graspness_label)
loss = loss[loss_mask]
loss = loss.mean()
graspness_score_c = graspness_score.detach().clone()[loss_mask]
graspness_label_c = graspness_label.detach().clone()[loss_mask]
graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
end_points['stage1_graspness_acc_rank_error'] = rank_error
end_points['loss/stage1_graspness_loss'] = loss
return loss, end_points
def compute_view_graspness_loss(end_points):
criterion = nn.SmoothL1Loss(reduction='mean')
view_score = end_points['view_score']
view_label = end_points['batch_grasp_view_graspness']
loss = criterion(view_score, view_label)
end_points['loss/stage2_view_loss'] = loss
return loss, end_points
def compute_score_loss(end_points):
criterion = nn.SmoothL1Loss(reduction='mean')
grasp_score_pred = end_points['grasp_score_pred']
grasp_score_label = end_points['batch_grasp_score']
loss = criterion(grasp_score_pred, grasp_score_label)
end_points['loss/stage3_score_loss'] = loss
return loss, end_points
def compute_width_loss(end_points):
criterion = nn.SmoothL1Loss(reduction='none')
grasp_width_pred = end_points['grasp_width_pred']
grasp_width_label = end_points['batch_grasp_width'] * 10
loss = criterion(grasp_width_pred, grasp_width_label)
grasp_score_label = end_points['batch_grasp_score']
loss_mask = grasp_score_label > 0
loss = loss[loss_mask].mean()
end_points['loss/stage3_width_loss'] = loss
return loss, end_points

View File

@@ -0,0 +1,116 @@
# import os
# import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ROOT_DIR = os.path.dirname(BASE_DIR)
# sys.path.append(ROOT_DIR)
from ...pointnet2 import pytorch_utils as pt_utils
from ...pointnet2.pointnet2_utils import CylinderQueryAndGroup
from ..utils.loss_utils import generate_grasp_views, batch_viewpoint_params_to_matrix
class GraspableNet(nn.Module):
def __init__(self, seed_feature_dim):
super().__init__()
self.in_dim = seed_feature_dim # default: 512
self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)
def forward(self, seed_features, end_points):
graspable_score = self.conv_graspable(seed_features) # (B, 3, num_seed)
end_points['objectness_score'] = graspable_score[:, :2] # (B, 3, num_seed)
end_points['graspness_score'] = graspable_score[:, 2] # (B, 3, num_seed)
return end_points
class ApproachNet(nn.Module):
def __init__(self, num_view, seed_feature_dim, is_training=True):
super().__init__()
self.num_view = num_view
self.in_dim = seed_feature_dim
self.is_training = is_training
self.conv1 = nn.Conv1d(self.in_dim, self.in_dim, 1)
self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)
def forward(self, seed_features, end_points):
B, _, num_seed = seed_features.size()
res_features = F.relu(self.conv1(seed_features), inplace=True)
features = self.conv2(res_features)
view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)
end_points['view_score'] = view_score
if self.is_training:
# normalize view graspness score to 0~1
view_score_ = view_score.clone().detach()
view_score_max, _ = torch.max(view_score_, dim=2)
view_score_min, _ = torch.min(view_score_, dim=2)
view_score_max = view_score_max.unsqueeze(-1).expand(-1, -1, self.num_view)
view_score_min = view_score_min.unsqueeze(-1).expand(-1, -1, self.num_view)
view_score_ = (view_score_ - view_score_min) / (view_score_max - view_score_min + 1e-8)
top_view_inds = []
for i in range(B):
top_view_inds_batch = torch.multinomial(view_score_[i], 1, replacement=False)
top_view_inds.append(top_view_inds_batch)
top_view_inds = torch.stack(top_view_inds, dim=0).squeeze(-1) # B, num_seed
else:
_, top_view_inds = torch.max(view_score, dim=2) # (B, num_seed)
top_view_inds_ = top_view_inds.view(B, num_seed, 1, 1).expand(-1, -1, -1, 3).contiguous()
template_views = generate_grasp_views(self.num_view).to(features.device) # (num_view, 3)
template_views = template_views.view(1, 1, self.num_view, 3).expand(B, num_seed, -1, -1).contiguous()
vp_xyz = torch.gather(template_views, 2, top_view_inds_).squeeze(2) # (B, num_seed, 3)
vp_xyz_ = vp_xyz.view(-1, 3)
batch_angle = torch.zeros(vp_xyz_.size(0), dtype=vp_xyz.dtype, device=vp_xyz.device)
vp_rot = batch_viewpoint_params_to_matrix(-vp_xyz_, batch_angle).view(B, num_seed, 3, 3)
end_points['grasp_top_view_xyz'] = vp_xyz
end_points['grasp_top_view_rot'] = vp_rot
end_points['grasp_top_view_inds'] = top_view_inds
return end_points, res_features
class CloudCrop(nn.Module):
def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):
super().__init__()
self.nsample = nsample
self.in_dim = seed_feature_dim
self.cylinder_radius = cylinder_radius
mlps = [3 + self.in_dim, 256, 256] # use xyz, so plus 3
self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,
use_xyz=True, normalize_xyz=True)
self.mlps = pt_utils.SharedMLP(mlps, bn=True)
def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,
seed_features_graspable) # B*3 + feat_dim*M*K
new_features = self.mlps(grouped_feature) # (batch_size, mlps[-1], M, K)
new_features = F.max_pool2d(new_features, kernel_size=[1, new_features.size(3)]) # (batch_size, mlps[-1], M, 1)
new_features = new_features.squeeze(-1) # (batch_size, mlps[-1], M)
return new_features
class SWADNet(nn.Module):
def __init__(self, num_angle, num_depth):
super().__init__()
self.num_angle = num_angle
self.num_depth = num_depth
self.conv1 = nn.Conv1d(256, 256, 1) # input feat dim need to be consistent with CloudCrop module
self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)
def forward(self, vp_features, end_points):
B, _, num_seed = vp_features.size()
vp_features = F.relu(self.conv1(vp_features), inplace=True)
vp_features = self.conv_swad(vp_features)
vp_features = vp_features.view(B, 2, self.num_angle, self.num_depth, num_seed)
vp_features = vp_features.permute(0, 1, 4, 2, 3)
# split prediction
end_points['grasp_score_pred'] = vp_features[:, 0] # B * num_seed * num angle * num_depth
end_points['grasp_width_pred'] = vp_features[:, 1]
return end_points

View File

@@ -0,0 +1,196 @@
import torch.nn as nn
try:
import open3d as o3d
except ImportError:
raise ImportError("Please install open3d with `pip install open3d`.")
import MinkowskiEngine as ME
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
class ResNetBase(nn.Module):
BLOCK = None
LAYERS = ()
INIT_DIM = 64
PLANES = (64, 128, 256, 512)
def __init__(self, in_channels, out_channels, D=3):
nn.Module.__init__(self)
self.D = D
assert self.BLOCK is not None
self.network_initialization(in_channels, out_channels, D)
self.weight_initialization()
def network_initialization(self, in_channels, out_channels, D):
self.inplanes = self.INIT_DIM
self.conv1 = nn.Sequential(
ME.MinkowskiConvolution(
in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D
),
ME.MinkowskiInstanceNorm(self.inplanes),
ME.MinkowskiReLU(inplace=True),
ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),
)
self.layer1 = self._make_layer(
self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2
)
self.layer2 = self._make_layer(
self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2
)
self.layer3 = self._make_layer(
self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2
)
self.layer4 = self._make_layer(
self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2
)
self.conv5 = nn.Sequential(
ME.MinkowskiDropout(),
ME.MinkowskiConvolution(
self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D
),
ME.MinkowskiInstanceNorm(self.inplanes),
ME.MinkowskiGELU(),
)
self.glob_pool = ME.MinkowskiGlobalMaxPooling()
self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)
def weight_initialization(self):
for m in self.modules():
if isinstance(m, ME.MinkowskiConvolution):
ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu")
if isinstance(m, ME.MinkowskiBatchNorm):
nn.init.constant_(m.bn.weight, 1)
nn.init.constant_(m.bn.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
ME.MinkowskiConvolution(
self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
dimension=self.D,
),
ME.MinkowskiBatchNorm(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes,
planes,
stride=stride,
dilation=dilation,
downsample=downsample,
dimension=self.D,
)
)
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(
block(
self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D
)
)
return nn.Sequential(*layers)
def forward(self, x: ME.SparseTensor):
x = self.conv1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.conv5(x)
x = self.glob_pool(x)
return self.final(x)
class ResNet14(ResNetBase):
BLOCK = BasicBlock
LAYERS = (1, 1, 1, 1)
class ResNet18(ResNetBase):
BLOCK = BasicBlock
LAYERS = (2, 2, 2, 2)
class ResNet34(ResNetBase):
BLOCK = BasicBlock
LAYERS = (3, 4, 6, 3)
class ResNet50(ResNetBase):
BLOCK = Bottleneck
LAYERS = (3, 4, 6, 3)
class ResNet101(ResNetBase):
BLOCK = Bottleneck
LAYERS = (3, 4, 23, 3)
class ResFieldNetBase(ResNetBase):
def network_initialization(self, in_channels, out_channels, D):
field_ch = 32
field_ch2 = 64
self.field_network = nn.Sequential(
ME.MinkowskiSinusoidal(in_channels, field_ch),
ME.MinkowskiBatchNorm(field_ch),
ME.MinkowskiReLU(inplace=True),
ME.MinkowskiLinear(field_ch, field_ch),
ME.MinkowskiBatchNorm(field_ch),
ME.MinkowskiReLU(inplace=True),
ME.MinkowskiToSparseTensor(),
)
self.field_network2 = nn.Sequential(
ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),
ME.MinkowskiBatchNorm(field_ch2),
ME.MinkowskiReLU(inplace=True),
ME.MinkowskiLinear(field_ch2, field_ch2),
ME.MinkowskiBatchNorm(field_ch2),
ME.MinkowskiReLU(inplace=True),
ME.MinkowskiToSparseTensor(),
)
ResNetBase.network_initialization(self, field_ch2, out_channels, D)
def forward(self, x: ME.TensorField):
otensor = self.field_network(x)
otensor2 = self.field_network2(otensor.cat_slice(x))
return ResNetBase.forward(self, otensor2)
class ResFieldNet14(ResFieldNetBase):
BLOCK = BasicBlock
LAYERS = (1, 1, 1, 1)
class ResFieldNet18(ResFieldNetBase):
BLOCK = BasicBlock
LAYERS = (2, 2, 2, 2)
class ResFieldNet34(ResFieldNetBase):
BLOCK = BasicBlock
LAYERS = (3, 4, 6, 3)
class ResFieldNet50(ResFieldNetBase):
BLOCK = Bottleneck
LAYERS = (3, 4, 6, 3)
class ResFieldNet101(ResFieldNetBase):
BLOCK = Bottleneck
LAYERS = (3, 4, 23, 3)

View File

@@ -0,0 +1,132 @@
""" Collision detection to remove collided grasp pose predictions.
Author: chenxi-wang
"""
# import os
# import sys
import numpy as np
import open3d as o3d
class ModelFreeCollisionDetector():
""" Collision detection in scenes without object labels. Current finger width and length are fixed.
Input:
scene_points: [numpy.ndarray, (N,3), numpy.float32]
the scene points to detect
voxel_size: [float]
used for downsample
Example usage:
mfcdetector = ModelFreeCollisionDetector(scene_points, voxel_size=0.005)
collision_mask = mfcdetector.detect(grasp_group, approach_dist=0.03)
collision_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05, return_ious=True)
collision_mask, empty_mask = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
return_empty_grasp=True, empty_thresh=0.01)
collision_mask, empty_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
return_empty_grasp=True, empty_thresh=0.01, return_ious=True)
"""
def __init__(self, scene_points, voxel_size=0.005):
self.finger_width = 0.005
self.finger_length = 0.09
self.voxel_size = voxel_size
scene_cloud = o3d.geometry.PointCloud()
scene_cloud.points = o3d.utility.Vector3dVector(scene_points)
scene_cloud = scene_cloud.voxel_down_sample(voxel_size)
self.scene_points = np.array(scene_cloud.points) # (K, 3)
def detect(self, preds, approach_dist=0.03, collision_thresh=0.05, return_empty_grasp=False, empty_thresh=0.01, return_ious=False):
""" Detect collision of grasps.
Input:
grasp_group: [GraspGroup, M grasps]
the grasps to check
approach_dist: [float]
the distance for a gripper to move along approaching direction before grasping
this shifting space requires no voxel_sizepoint either
collision_thresh: [float]
if global collision iou is greater than this threshold,
a collision is detected
return_empty_grasp: [bool]
if True, return a mask to imply whether there are objects in a grasp
empty_thresh: [float]
if inner space iou is smaller than this threshold,
a collision is detected
only set when [return_empty_grasp] is True
return_ious: [bool]
if True, return global collision iou and part collision ious
Output:
collision_mask: [numpy.ndarray, (M,), numpy.bool]
True implies collision
[optional] empty_mask: [numpy.ndarray, (M,), numpy.bool]
True implies empty grasp
only returned when [return_empty_grasp] is True
[optional] iou_list: list of [numpy.ndarray, (M,), numpy.float32]
global and part collision ious, containing
[global_iou, left_iou, right_iou, bottom_iou, shifting_iou]
only returned when [return_ious] is True
"""
approach_dist = max(approach_dist, self.finger_width)
T = preds[:, 13:16].reshape(-1, 3) # (N, 3)
R = preds[:, 4:13].reshape(-1, 3, 3) # (N, 3, 3)
heights = preds[:, 2][:,np.newaxis] # (N,) -> (N, 1)
depths = preds[:, 3][:,np.newaxis] # (N,) -> (N, 1)
widths = preds[:, 1][:,np.newaxis] # (N,) -> (N, 1)
# 使用不同的抓取点偏置
targets = self.scene_points[np.newaxis,:,:] - T[:,np.newaxis,:] # (1, K, 3) - (N, 1, 3) = (N, K, 3)
targets = np.matmul(targets, R) # (N, K, 3) * (N, 3, 3) = (N, K, 3)
## collision detection
# height mask
mask1 = ((targets[:,:,2] > -heights/2) & (targets[:,:,2] < heights/2)) # (N, K) * (N, 3) = (N, K)
# left finger mask
mask2 = ((targets[:,:,0] > depths - self.finger_length) & (targets[:,:,0] < depths))
mask3 = (targets[:,:,1] > -(widths/2 + self.finger_width))
mask4 = (targets[:,:,1] < -widths/2)
# right finger mask
mask5 = (targets[:,:,1] < (widths/2 + self.finger_width))
mask6 = (targets[:,:,1] > widths/2)
# bottom mask
mask7 = ((targets[:,:,0] <= depths - self.finger_length)\
& (targets[:,:,0] > depths - self.finger_length - self.finger_width))
# shifting mask
mask8 = ((targets[:,:,0] <= depths - self.finger_length - self.finger_width)\
& (targets[:,:,0] > depths - self.finger_length - self.finger_width - approach_dist))
# get collision mask of each point
left_mask = (mask1 & mask2 & mask3 & mask4)
right_mask = (mask1 & mask2 & mask5 & mask6)
bottom_mask = (mask1 & mask3 & mask5 & mask7)
shifting_mask = (mask1 & mask3 & mask5 & mask8)
global_mask = (left_mask | right_mask | bottom_mask | shifting_mask)
# calculate equivalant volume of each part
left_right_volume = (heights * self.finger_length * self.finger_width / (self.voxel_size**3)).reshape(-1)
bottom_volume = (heights * (widths+2*self.finger_width) * self.finger_width / (self.voxel_size**3)).reshape(-1)
shifting_volume = (heights * (widths+2*self.finger_width) * approach_dist / (self.voxel_size**3)).reshape(-1)
volume = left_right_volume*2 + bottom_volume + shifting_volume
# get collision iou of each part
global_iou = global_mask.sum(axis=1) / (volume+1e-6)
# get collison mask
collision_mask = (global_iou > collision_thresh)
if not (return_empty_grasp or return_ious):
return collision_mask
ret_value = [collision_mask,]
if return_empty_grasp:
inner_mask = (mask1 & mask2 & (~mask4) & (~mask6))
inner_volume = (heights * self.finger_length * widths / (self.voxel_size**3)).reshape(-1)
empty_mask = (inner_mask.sum(axis=-1)/inner_volume < empty_thresh)
ret_value.append(empty_mask)
if return_ious:
left_iou = left_mask.sum(axis=1) / (left_right_volume+1e-6)
right_iou = right_mask.sum(axis=1) / (left_right_volume+1e-6)
bottom_iou = bottom_mask.sum(axis=1) / (bottom_volume+1e-6)
shifting_iou = shifting_mask.sum(axis=1) / (shifting_volume+1e-6)
ret_value.append([global_iou, left_iou, right_iou, bottom_iou, shifting_iou])
return ret_value

View File

@@ -0,0 +1,156 @@
""" Tools for data processing.
Author: chenxi-wang
"""
import numpy as np
class CameraInfo():
""" Camera intrisics for point cloud creation. """
def __init__(self, width, height, fx, fy, cx, cy, scale):
self.width = width
self.height = height
self.fx = fx
self.fy = fy
self.cx = cx
self.cy = cy
self.scale = scale
def create_point_cloud_from_depth_image(depth, camera, organized=True):
""" Generate point cloud using depth image only.
Input:
depth: [numpy.ndarray, (H,W), numpy.float32]
depth image
camera: [CameraInfo]
camera intrinsics
organized: bool
whether to keep the cloud in image shape (H,W,3)
Output:
cloud: [numpy.ndarray, (H,W,3)/(H*W,3), numpy.float32]
generated cloud, (H,W,3) for organized=True, (H*W,3) for organized=False
"""
assert (depth.shape[0] == camera.height and depth.shape[1] == camera.width)
xmap = np.arange(camera.width)
ymap = np.arange(camera.height)
xmap, ymap = np.meshgrid(xmap, ymap)
points_z = depth / camera.scale
points_x = (xmap - camera.cx) * points_z / camera.fx
points_y = (ymap - camera.cy) * points_z / camera.fy
cloud = np.stack([points_x, points_y, points_z], axis=-1)
if not organized:
cloud = cloud.reshape([-1, 3])
return cloud
def transform_point_cloud(cloud, transform, format='4x4'):
""" Transform points to new coordinates with transformation matrix.
Input:
cloud: [np.ndarray, (N,3), np.float32]
points in original coordinates
transform: [np.ndarray, (3,3)/(3,4)/(4,4), np.float32]
transformation matrix, could be rotation only or rotation+translation
format: [string, '3x3'/'3x4'/'4x4']
the shape of transformation matrix
'3x3' --> rotation matrix
'3x4'/'4x4' --> rotation matrix + translation matrix
Output:
cloud_transformed: [np.ndarray, (N,3), np.float32]
points in new coordinates
"""
if not (format == '3x3' or format == '4x4' or format == '3x4'):
raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
if format == '3x3':
cloud_transformed = np.dot(transform, cloud.T).T
elif format == '4x4' or format == '3x4':
ones = np.ones(cloud.shape[0])[:, np.newaxis]
cloud_ = np.concatenate([cloud, ones], axis=1)
cloud_transformed = np.dot(transform, cloud_.T).T
cloud_transformed = cloud_transformed[:, :3]
return cloud_transformed
def compute_point_dists(A, B):
""" Compute pair-wise point distances in two matrices.
Input:
A: [np.ndarray, (N,3), np.float32]
point cloud A
B: [np.ndarray, (M,3), np.float32]
point cloud B
Output:
dists: [np.ndarray, (N,M), np.float32]
distance matrix
"""
A = A[:, np.newaxis, :]
B = B[np.newaxis, :, :]
dists = np.linalg.norm(A - B, axis=-1)
return dists
def remove_invisible_grasp_points(cloud, grasp_points, pose, th=0.01):
""" Remove invisible part of object model according to scene point cloud.
Input:
cloud: [np.ndarray, (N,3), np.float32]
scene point cloud
grasp_points: [np.ndarray, (M,3), np.float32]
grasp point label in object coordinates
pose: [np.ndarray, (4,4), np.float32]
transformation matrix from object coordinates to world coordinates
th: [float]
if the minimum distance between a grasp point and the scene points is greater than outlier, the point will be removed
Output:
visible_mask: [np.ndarray, (M,), np.bool]
mask to show the visible part of grasp points
"""
grasp_points_trans = transform_point_cloud(grasp_points, pose)
dists = compute_point_dists(grasp_points_trans, cloud)
min_dists = dists.min(axis=1)
visible_mask = (min_dists < th)
return visible_mask
def get_workspace_mask(cloud, seg, trans=None, organized=True, outlier=0):
""" Keep points in workspace as input.
Input:
cloud: [np.ndarray, (H,W,3), np.float32]
scene point cloud
seg: [np.ndarray, (H,W,), np.uint8]
segmantation label of scene points
trans: [np.ndarray, (4,4), np.float32]
transformation matrix for scene points, default: None.
organized: [bool]
whether to keep the cloud in image shape (H,W,3)
outlier: [float]
if the distance between a point and workspace is greater than outlier, the point will be removed
Output:
workspace_mask: [np.ndarray, (H,W)/(H*W,), np.bool]
mask to indicate whether scene points are in workspace
"""
if organized:
h, w, _ = cloud.shape
cloud = cloud.reshape([h * w, 3])
seg = seg.reshape(h * w)
if trans is not None:
cloud = transform_point_cloud(cloud, trans)
foreground = cloud[seg > 0]
xmin, ymin, zmin = foreground.min(axis=0)
xmax, ymax, zmax = foreground.max(axis=0)
mask_x = ((cloud[:, 0] > xmin - outlier) & (cloud[:, 0] < xmax + outlier))
mask_y = ((cloud[:, 1] > ymin - outlier) & (cloud[:, 1] < ymax + outlier))
mask_z = ((cloud[:, 2] > zmin - outlier) & (cloud[:, 2] < zmax + outlier))
workspace_mask = (mask_x & mask_y & mask_z)
if organized:
workspace_mask = workspace_mask.reshape([h, w])
return workspace_mask

View File

@@ -0,0 +1,143 @@
""" Dynamically generate grasp labels during training.
Author: chenxi-wang
"""
import os
import sys
import torch
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ROOT_DIR = os.path.dirname(BASE_DIR)
# sys.path.append(ROOT_DIR)
# sys.path.append(os.path.join(ROOT_DIR, 'knn'))
from ...knn.knn_modules import knn
from ..utils.loss_utils import GRASP_MAX_WIDTH, batch_viewpoint_params_to_matrix, \
transform_point_cloud, generate_grasp_views
def process_grasp_labels(end_points):
""" Process labels according to scene points and object poses. """
seed_xyzs = end_points['xyz_graspable'] # (B, M_point, 3)
batch_size, num_samples, _ = seed_xyzs.size()
batch_grasp_points = []
batch_grasp_views_rot = []
batch_grasp_scores = []
batch_grasp_widths = []
for i in range(batch_size):
seed_xyz = seed_xyzs[i] # (Ns, 3)
poses = end_points['object_poses_list'][i] # [(3, 4),]
# get merged grasp points for label computation
grasp_points_merged = []
grasp_views_rot_merged = []
grasp_scores_merged = []
grasp_widths_merged = []
for obj_idx, pose in enumerate(poses):
grasp_points = end_points['grasp_points_list'][i][obj_idx] # (Np, 3)
grasp_scores = end_points['grasp_scores_list'][i][obj_idx] # (Np, V, A, D)
grasp_widths = end_points['grasp_widths_list'][i][obj_idx] # (Np, V, A, D)
_, V, A, D = grasp_scores.size()
num_grasp_points = grasp_points.size(0)
# generate and transform template grasp views
grasp_views = generate_grasp_views(V).to(pose.device) # (V, 3)
grasp_points_trans = transform_point_cloud(grasp_points, pose, '3x4')
grasp_views_trans = transform_point_cloud(grasp_views, pose[:3, :3], '3x3')
# generate and transform template grasp view rotation
angles = torch.zeros(grasp_views.size(0), dtype=grasp_views.dtype, device=grasp_views.device)
grasp_views_rot = batch_viewpoint_params_to_matrix(-grasp_views, angles) # (V, 3, 3)
grasp_views_rot_trans = torch.matmul(pose[:3, :3], grasp_views_rot) # (V, 3, 3)
# assign views
grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0)
grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0)
view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1
grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds) # (V, 3, 3)
grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1,
-1) # (Np, V, 3, 3)
grasp_scores = torch.index_select(grasp_scores, 1, view_inds) # (Np, V, A, D)
grasp_widths = torch.index_select(grasp_widths, 1, view_inds) # (Np, V, A, D)
# add to list
grasp_points_merged.append(grasp_points_trans)
grasp_views_rot_merged.append(grasp_views_rot_trans)
grasp_scores_merged.append(grasp_scores)
grasp_widths_merged.append(grasp_widths)
grasp_points_merged = torch.cat(grasp_points_merged, dim=0) # (Np', 3)
grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0) # (Np', V, 3, 3)
grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0) # (Np', V, A, D)
grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0) # (Np', V, A, D)
# compute nearest neighbors
seed_xyz_ = seed_xyz.transpose(0, 1).contiguous().unsqueeze(0) # (1, 3, Ns)
grasp_points_merged_ = grasp_points_merged.transpose(0, 1).contiguous().unsqueeze(0) # (1, 3, Np')
nn_inds = knn(grasp_points_merged_, seed_xyz_, k=1).squeeze() - 1 # (Ns)
# assign anchor points to real points
grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds) # (Ns, 3)
grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds) # (Ns, V, 3, 3)
grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds) # (Ns, V, A, D)
grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds) # (Ns, V, A, D)
# add to batch
batch_grasp_points.append(grasp_points_merged)
batch_grasp_views_rot.append(grasp_views_rot_merged)
batch_grasp_scores.append(grasp_scores_merged)
batch_grasp_widths.append(grasp_widths_merged)
batch_grasp_points = torch.stack(batch_grasp_points, 0) # (B, Ns, 3)
batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0) # (B, Ns, V, 3, 3)
batch_grasp_scores = torch.stack(batch_grasp_scores, 0) # (B, Ns, V, A, D)
batch_grasp_widths = torch.stack(batch_grasp_widths, 0) # (B, Ns, V, A, D)
# compute view graspness
view_u_threshold = 0.6
view_grasp_num = 48
batch_grasp_view_valid_mask = (batch_grasp_scores <= view_u_threshold) & (batch_grasp_scores > 0) # (B, Ns, V, A, D)
batch_grasp_view_valid = batch_grasp_view_valid_mask.float()
batch_grasp_view_graspness = torch.sum(torch.sum(batch_grasp_view_valid, dim=-1), dim=-1) / view_grasp_num # (B, Ns, V)
view_graspness_min, _ = torch.min(batch_grasp_view_graspness, dim=-1) # (B, Ns)
view_graspness_max, _ = torch.max(batch_grasp_view_graspness, dim=-1)
view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300) # (B, Ns, V)
view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300) # same shape as batch_grasp_view_graspness
batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5)
# process scores
label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH) # (B, Ns, V, A, D)
batch_grasp_scores[~label_mask] = 0
end_points['batch_grasp_point'] = batch_grasp_points
end_points['batch_grasp_view_rot'] = batch_grasp_views_rot
end_points['batch_grasp_score'] = batch_grasp_scores
end_points['batch_grasp_width'] = batch_grasp_widths
end_points['batch_grasp_view_graspness'] = batch_grasp_view_graspness
return end_points
def match_grasp_view_and_label(end_points):
""" Slice grasp labels according to predicted views. """
top_view_inds = end_points['grasp_top_view_inds'] # (B, Ns)
template_views_rot = end_points['batch_grasp_view_rot'] # (B, Ns, V, 3, 3)
grasp_scores = end_points['batch_grasp_score'] # (B, Ns, V, A, D)
grasp_widths = end_points['batch_grasp_width'] # (B, Ns, V, A, D, 3)
B, Ns, V, A, D = grasp_scores.size()
top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, 3, 3)
top_template_views_rot = torch.gather(template_views_rot, 2, top_view_inds_).squeeze(2)
top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, A, D)
top_view_grasp_scores = torch.gather(grasp_scores, 2, top_view_inds_).squeeze(2)
top_view_grasp_widths = torch.gather(grasp_widths, 2, top_view_inds_).squeeze(2)
u_max = top_view_grasp_scores.max()
po_mask = top_view_grasp_scores > 0
po_mask_num = torch.sum(po_mask)
if po_mask_num > 0:
u_min = top_view_grasp_scores[po_mask].min()
top_view_grasp_scores[po_mask] = torch.log(u_max / top_view_grasp_scores[po_mask]) / (torch.log(u_max / u_min) + 1e-6)
end_points['batch_grasp_score'] = top_view_grasp_scores # (B, Ns, A, D)
end_points['batch_grasp_width'] = top_view_grasp_widths # (B, Ns, A, D)
return top_template_views_rot, end_points

View File

@@ -0,0 +1,121 @@
""" Tools for loss computation.
Author: chenxi-wang
"""
import torch
import numpy as np
GRASP_MAX_WIDTH = 0.1
GRASPNESS_THRESHOLD = 0.1
NUM_VIEW = 300
NUM_ANGLE = 12
NUM_DEPTH = 4
M_POINT = 1024
def transform_point_cloud(cloud, transform, format='4x4'):
""" Transform points to new coordinates with transformation matrix.
Input:
cloud: [torch.FloatTensor, (N,3)]
points in original coordinates
transform: [torch.FloatTensor, (3,3)/(3,4)/(4,4)]
transformation matrix, could be rotation only or rotation+translation
format: [string, '3x3'/'3x4'/'4x4']
the shape of transformation matrix
'3x3' --> rotation matrix
'3x4'/'4x4' --> rotation matrix + translation matrix
Output:
cloud_transformed: [torch.FloatTensor, (N,3)]
points in new coordinates
"""
if not (format == '3x3' or format == '4x4' or format == '3x4'):
raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
if format == '3x3':
cloud_transformed = torch.matmul(transform, cloud.T).T
elif format == '4x4' or format == '3x4':
ones = cloud.new_ones(cloud.size(0), device=cloud.device).unsqueeze(-1)
cloud_ = torch.cat([cloud, ones], dim=1)
cloud_transformed = torch.matmul(transform, cloud_.T).T
cloud_transformed = cloud_transformed[:, :3]
return cloud_transformed
def generate_grasp_views(N=300, phi=(np.sqrt(5) - 1) / 2, center=np.zeros(3), r=1):
""" View sampling on a unit sphere using Fibonacci lattices.
Ref: https://arxiv.org/abs/0912.4540
Input:
N: [int]
number of sampled views
phi: [float]
constant for view coordinate calculation, different phi's bring different distributions, default: (sqrt(5)-1)/2
center: [np.ndarray, (3,), np.float32]
sphere center
r: [float]
sphere radius
Output:
views: [torch.FloatTensor, (N,3)]
sampled view coordinates
"""
views = []
for i in range(N):
zi = (2 * i + 1) / N - 1
xi = np.sqrt(1 - zi ** 2) * np.cos(2 * i * np.pi * phi)
yi = np.sqrt(1 - zi ** 2) * np.sin(2 * i * np.pi * phi)
views.append([xi, yi, zi])
views = r * np.array(views) + center
return torch.from_numpy(views.astype(np.float32))
def batch_viewpoint_params_to_matrix(batch_towards, batch_angle):
""" Transform approach vectors and in-plane rotation angles to rotation matrices.
Input:
batch_towards: [torch.FloatTensor, (N,3)]
approach vectors in batch
batch_angle: [torch.floatTensor, (N,)]
in-plane rotation angles in batch
Output:
batch_matrix: [torch.floatTensor, (N,3,3)]
rotation matrices in batch
"""
axis_x = batch_towards
ones = torch.ones(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
zeros = torch.zeros(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
axis_y = torch.stack([-axis_x[:, 1], axis_x[:, 0], zeros], dim=-1)
mask_y = (torch.norm(axis_y, dim=-1) == 0)
axis_y[mask_y, 1] = 1
axis_x = axis_x / torch.norm(axis_x, dim=-1, keepdim=True)
axis_y = axis_y / torch.norm(axis_y, dim=-1, keepdim=True)
axis_z = torch.cross(axis_x, axis_y)
sin = torch.sin(batch_angle)
cos = torch.cos(batch_angle)
R1 = torch.stack([ones, zeros, zeros, zeros, cos, -sin, zeros, sin, cos], dim=-1)
R1 = R1.reshape([-1, 3, 3])
R2 = torch.stack([axis_x, axis_y, axis_z], dim=-1)
batch_matrix = torch.matmul(R2, R1)
return batch_matrix
def huber_loss(error, delta=1.0):
"""
Args:
error: Torch tensor (d1,d2,...,dk)
Returns:
loss: Torch tensor (d1,d2,...,dk)
x = error = pred - gt or dist(pred,gt)
0.5 * |x|^2 if |x|<=d
0.5 * d^2 + d * (|x|-d) if |x|>d
Author: Charles R. Qi
Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
"""
abs_error = torch.abs(error)
quadratic = torch.clamp(abs_error, max=delta)
linear = (abs_error - quadratic)
loss = 0.5 * quadratic ** 2 + delta * linear
return loss

View File

@@ -0,0 +1,20 @@
# import unittest
# import gc
# import operator as op
# import functools
import torch
# from torch.autograd import Variable, Function
# from knn_pytorch import knn_pytorch
# import knn_pytorch
from . import knn_pytorch
def knn(ref, query, k=1):
""" Compute k nearest neighbors for each query point.
"""
device = ref.device
ref = ref.float().to(device)
query = query.float().to(device)
inds = torch.empty(query.shape[0], k, query.shape[2]).long().to(device)
knn_pytorch.knn(ref, query, inds)
return inds

View File

@@ -0,0 +1 @@
from .knn_pytorch import *

View File

@@ -0,0 +1,66 @@
#!/usr/bin/env python
import glob
import os
import torch
from setuptools import find_packages
from setuptools import setup
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
requirements = ["torch", "torchvision"]
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "src")
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
sources = main_file + source_cpu
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
if torch.cuda.is_available() and CUDA_HOME is not None:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
sources = [os.path.join(extensions_dir, s) for s in sources]
include_dirs = [extensions_dir]
ext_modules = [
extension(
"knn_pytorch.knn_pytorch",
sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
setup(
name="knn_pytorch",
version="0.1",
author="foolyc",
url="https://github.com/foolyc/torchKNN",
description="KNN implement in Pytorch 1.0 including both cpu version and gpu version",
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
)

View File

@@ -0,0 +1,56 @@
#include "cpu/vision.h"
void knn_cpu(float* ref_dev, int ref_width, float* query_dev, int query_width,
int height, int k, float* dist_dev, long* ind_dev, long* ind_buf)
{
// Compute all the distances
for(int query_idx = 0;query_idx<query_width;query_idx++)
{
for(int ref_idx = 0;ref_idx < ref_width;ref_idx++)
{
dist_dev[query_idx * ref_width + ref_idx] = 0;
for(int hi=0;hi<height;hi++)
dist_dev[query_idx * ref_width + ref_idx] += (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]) * (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]);
}
}
float temp_value;
long temp_idx;
// sort the distance and get the index
for(int query_idx = 0;query_idx<query_width;query_idx++)
{
for(int i = 0;i < ref_width;i++)
{
ind_buf[i] = i+1;
}
for(int i = 0;i < ref_width;i++)
for(int j = 0;j < ref_width - i - 1;j++)
{
if(dist_dev[query_idx * ref_width + j] > dist_dev[query_idx * ref_width + j + 1])
{
temp_value = dist_dev[query_idx * ref_width + j];
dist_dev[query_idx * ref_width + j] = dist_dev[query_idx * ref_width + j + 1];
dist_dev[query_idx * ref_width + j + 1] = temp_value;
temp_idx = ind_buf[j];
ind_buf[j] = ind_buf[j + 1];
ind_buf[j + 1] = temp_idx;
}
}
for(int i = 0;i < k;i++)
ind_dev[query_idx + i * query_width] = ind_buf[i];
#if DEBUG
for(int i = 0;i < ref_width;i++)
printf("%d, ", ind_buf[i]);
printf("\n");
#endif
}
}

View File

@@ -0,0 +1,6 @@
#pragma once
#include <torch/extension.h>
void knn_cpu(float* ref_dev, int ref_width,
float* query_dev, int query_width,
int height, int k, float* dist_dev, long* ind_dev, long* ind_buf);

View File

@@ -0,0 +1,269 @@
/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA
* The modifications are
* removed texture memory usage
* removed split query KNN computation
* added feature extraction with bilinear interpolation
*
* Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016
*/
// Includes
#include <cstdio>
#include "cuda.h"
#define IDX2D(i, j, dj) (dj * i + j)
#define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))
#define BLOCK 512
#define MAX_STREAMS 512
// Constants used by the program
#define BLOCK_DIM 16
#define DEBUG 0
/**
* Computes the distance between two matrix A (reference points) and
* B (query points) containing respectively wA and wB points.
*
* @param A pointer on the matrix A
* @param wA width of the matrix A = number of points in A
* @param B pointer on the matrix B
* @param wB width of the matrix B = number of points in B
* @param dim dimension of points = height of matrices A and B
* @param AB pointer on the matrix containing the wA*wB distances computed
*/
__global__ void cuComputeDistanceGlobal( float* A, int wA,
float* B, int wB, int dim, float* AB){
// Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B
__shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];
__shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];
// Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)
__shared__ int begin_A;
__shared__ int begin_B;
__shared__ int step_A;
__shared__ int step_B;
__shared__ int end_A;
// Thread index
int tx = threadIdx.x;
int ty = threadIdx.y;
// Other variables
float tmp;
float ssd = 0;
// Loop parameters
begin_A = BLOCK_DIM * blockIdx.y;
begin_B = BLOCK_DIM * blockIdx.x;
step_A = BLOCK_DIM * wA;
step_B = BLOCK_DIM * wB;
end_A = begin_A + (dim-1) * wA;
// Conditions
int cond0 = (begin_A + tx < wA); // used to write in shared memory
int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix
int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix
// Loop over all the sub-matrices of A and B required to compute the block sub-matrix
for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {
// Load the matrices from device memory to shared memory; each thread loads one element of each matrix
if (a/wA + ty < dim){
shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;
shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;
}
else{
shared_A[ty][tx] = 0;
shared_B[ty][tx] = 0;
}
// Synchronize to make sure the matrices are loaded
__syncthreads();
// Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix
if (cond2 && cond1){
for (int k = 0; k < BLOCK_DIM; ++k){
tmp = shared_A[k][ty] - shared_B[k][tx];
ssd += tmp*tmp;
}
}
// Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration
__syncthreads();
}
// Write the block sub-matrix to device memory; each thread writes one element
if (cond2 && cond1)
AB[(begin_A + ty) * wB + begin_B + tx] = ssd;
}
/**
* Gathers k-th smallest distances for each column of the distance matrix in the top.
*
* @param dist distance matrix
* @param ind index matrix
* @param width width of the distance matrix and of the index matrix
* @param height height of the distance matrix and of the index matrix
* @param k number of neighbors to consider
*/
__global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){
// Variables
int l, i, j;
float *p_dist;
long *p_ind;
float curr_dist, max_dist;
long curr_row, max_row;
unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (xIndex<width){
// Pointer shift, initialization, and max value
p_dist = dist + xIndex;
p_ind = ind + xIndex;
max_dist = p_dist[0];
p_ind[0] = 1;
// Part 1 : sort kth firt elementZ
for (l=1; l<k; l++){
curr_row = l * width;
curr_dist = p_dist[curr_row];
if (curr_dist<max_dist){
i=l-1;
for (int a=0; a<l-1; a++){
if (p_dist[a*width]>curr_dist){
i=a;
break;
}
}
for (j=l; j>i; j--){
p_dist[j*width] = p_dist[(j-1)*width];
p_ind[j*width] = p_ind[(j-1)*width];
}
p_dist[i*width] = curr_dist;
p_ind[i*width] = l+1;
} else {
p_ind[l*width] = l+1;
}
max_dist = p_dist[curr_row];
}
// Part 2 : insert element in the k-th first lines
max_row = (k-1)*width;
for (l=k; l<height; l++){
curr_dist = p_dist[l*width];
if (curr_dist<max_dist){
i=k-1;
for (int a=0; a<k-1; a++){
if (p_dist[a*width]>curr_dist){
i=a;
break;
}
}
for (j=k-1; j>i; j--){
p_dist[j*width] = p_dist[(j-1)*width];
p_ind[j*width] = p_ind[(j-1)*width];
}
p_dist[i*width] = curr_dist;
p_ind[i*width] = l+1;
max_dist = p_dist[max_row];
}
}
}
}
/**
* Computes the square root of the first line (width-th first element)
* of the distance matrix.
*
* @param dist distance matrix
* @param width width of the distance matrix
* @param k number of neighbors to consider
*/
__global__ void cuParallelSqrt(float *dist, int width, int k){
unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
if (xIndex<width && yIndex<k)
dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);
}
//-----------------------------------------------------------------------------------------------//
// K-th NEAREST NEIGHBORS //
//-----------------------------------------------------------------------------------------------//
/**
* K nearest neighbor algorithm
* - Initialize CUDA
* - Allocate device memory
* - Copy point sets (reference and query points) from host to device memory
* - Compute the distances + indexes to the k nearest neighbors for each query point
* - Copy distances from device to host memory
*
* @param ref_host reference points ; pointer to linear matrix
* @param ref_nb number of reference points ; width of the matrix
* @param query_host query points ; pointer to linear matrix
* @param query_nb number of query points ; width of the matrix
* @param dim dimension of points ; height of the matrices
* @param k number of neighbor to consider
* @param dist_host distances to k nearest neighbors ; pointer to linear matrix
* @param dist_host indexes of the k nearest neighbors ; pointer to linear matrix
*
*/
void knn_device(float* ref_dev, int ref_nb, float* query_dev, int query_nb,
int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){
// Grids and threads
dim3 g_16x16(query_nb/16, ref_nb/16, 1);
dim3 t_16x16(16, 16, 1);
if (query_nb%16 != 0) g_16x16.x += 1;
if (ref_nb %16 != 0) g_16x16.y += 1;
//
dim3 g_256x1(query_nb/256, 1, 1);
dim3 t_256x1(256, 1, 1);
if (query_nb%256 != 0) g_256x1.x += 1;
dim3 g_k_16x16(query_nb/16, k/16, 1);
dim3 t_k_16x16(16, 16, 1);
if (query_nb%16 != 0) g_k_16x16.x += 1;
if (k %16 != 0) g_k_16x16.y += 1;
// Kernel 1: Compute all the distances
cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb, query_dev, query_nb, dim, dist_dev);
// Kernel 2: Sort each column
cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev, query_nb, ref_nb, k);
// Kernel 3: Compute square root of k first elements
// cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);
#if DEBUG
unsigned int size_of_float = sizeof(float);
unsigned long size_of_long = sizeof(long);
float* dist_host = new float[query_nb * k];
long* idx_host = new long[query_nb * k];
// Memory copy of output from device to host
cudaMemcpy(&dist_host[0], dist_dev,
query_nb * k *size_of_float, cudaMemcpyDeviceToHost);
cudaMemcpy(&idx_host[0], ind_dev,
query_nb * k * size_of_long, cudaMemcpyDeviceToHost);
int i = 0;
for(i = 0; i < 100; i++){
printf("IDX[%d]: %d\n", i, (int)idx_host[i]);
}
#endif
}

View File

@@ -0,0 +1,8 @@
#pragma once
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
void knn_device(float* ref_dev, int ref_width,
float* query_dev, int query_width,
int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);

View File

@@ -0,0 +1,81 @@
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
//#include <THC/THC.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
//extern THCState *state;
#endif
int knn(at::Tensor& ref, at::Tensor& query, at::Tensor& idx)
{
// NOTE: check dimensions
long batch, ref_nb, query_nb, dim, k;
batch = ref.size(0);
dim = ref.size(1);
k = idx.size(1);
ref_nb = ref.size(2);
query_nb = query.size(2);
float *ref_dev = ref.data<float>();
float *query_dev = query.data<float>();
long *idx_dev = idx.data<long>();
if (ref.type().is_cuda()) {
#ifdef WITH_CUDA
// NOTE: raise error if not compiled with CUDA
// float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));
float *dist_dev = nullptr;
cudaError_t err = cudaMalloc((void**)&dist_dev, ref_nb * query_nb * sizeof(float));
if (err != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed\n");
exit(1);
}
cudaFree(dist_dev);
for (int b = 0; b < batch; b++)
{
// knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
// dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));
knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
dist_dev, idx_dev + b * k * query_nb, c10::cuda::getCurrentCUDAStream());
}
// THCudaFree(state, dist_dev);
cudaFree(dist_dev);
err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("error in knn: %s\n", cudaGetErrorString(err));
// THError("aborting");
fprintf(stderr, "aborting\n");
exit(1);
}
return 1;
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
float *dist_dev = (float*)malloc(ref_nb * query_nb * sizeof(float));
long *ind_buf = (long*)malloc(ref_nb * sizeof(long));
for (int b = 0; b < batch; b++) {
knn_cpu(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
dist_dev, idx_dev + b * k * query_nb, ind_buf);
}
free(dist_dev);
free(ind_buf);
return 1;
}

View File

@@ -0,0 +1,5 @@
#include "knn.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("knn", &knn, "k-nearest neighbors");
}

View File

@@ -0,0 +1,11 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
const int nsample);

View File

@@ -0,0 +1,46 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#ifndef _CUDA_UTILS_H
#define _CUDA_UTILS_H
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <cmath>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#define TOTAL_THREADS 512
inline int opt_n_threads(int work_size) {
const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
return max(min(1 << pow_2, TOTAL_THREADS), 1);
}
inline dim3 opt_block_config(int x, int y) {
const int x_threads = opt_n_threads(x);
const int y_threads =
max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
dim3 block_config(x_threads, y_threads, 1);
return block_config;
}
#define CUDA_CHECK_ERRORS() \
do { \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \
cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
__FILE__); \
exit(-1); \
} \
} while (0)
#endif

View File

@@ -0,0 +1,7 @@
// Author: chenxi-wang
#pragma once
#include <torch/extension.h>
at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
const int nsample);

View File

@@ -0,0 +1,10 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
at::Tensor group_points(at::Tensor points, at::Tensor idx);
at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);

View File

@@ -0,0 +1,15 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
#include <vector>
std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
at::Tensor weight);
at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
at::Tensor weight, const int m);

View File

@@ -0,0 +1,11 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
at::Tensor gather_points(at::Tensor points, at::Tensor idx);
at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);

View File

@@ -0,0 +1,32 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#ifdef WITH_CUDA
#include <ATen/cuda/CUDAContext.h>
#endif
#include <torch/extension.h>
#define CHECK_CUDA(x) \
do { \
TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor"); \
} while (0)
#define CHECK_CONTIGUOUS(x) \
do { \
TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \
} while (0)
#define CHECK_IS_INT(x) \
do { \
TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \
#x " must be an int tensor"); \
} while (0)
#define CHECK_IS_FLOAT(x) \
do { \
TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \
#x " must be a float tensor"); \
} while (0)

View File

@@ -0,0 +1,113 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include<iostream>
#include "ball_query.h"
#include "utils.h"
#ifdef WITH_CUDA
void query_ball_point_kernel_wrapper(
int b, int n, int m, float radius,
int nsample,
const float *new_xyz,
const float *xyz,
int *idx
);
#endif
void query_ball_point_cpu(
int b, int n, int m, float radius,
int nsample,
const float *new_xyz,
const float *xyz,
int *idx
);
#ifdef DEBUG
void ball_query_debug(const torch::Tensor& new_xyz, const torch::Tensor& xyz,
float radius, int nsample) {
// 优化后的打印代码,格式更清晰,便于调试
std::cout << "===== ball_query.cpp 调试信息 =====" << std::endl;
// 打印new_xyz的尺寸维度
std::cout << "new_xyz.size(): (";
for (auto s : new_xyz.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
// 打印xyz的尺寸维度
std::cout << "xyz.size(): (";
for (auto s : xyz.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
// 打印半径和采样数
std::cout << "radius: " << radius << std::endl;
std::cout << "nsample: " << nsample << std::endl;
std::cout << "====================================" << std::endl;
}
#endif
at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
const int nsample) {
CHECK_CONTIGUOUS(new_xyz);
CHECK_CONTIGUOUS(xyz);
CHECK_IS_FLOAT(new_xyz);
CHECK_IS_FLOAT(xyz);
#ifdef WITH_CUDA
if (new_xyz.is_cuda()) {
CHECK_CUDA(xyz);
// 新增1CUDA张量设备端连续性强校验解决内存布局碎片化问题
TORCH_CHECK(new_xyz.is_contiguous(), "new_xyz must be contiguous on CUDA device");
TORCH_CHECK(xyz.is_contiguous(), "xyz must be contiguous on CUDA device");
// 新增2CUDA设备同步确保张量内存分配完成解决指针无效问题
cudaDeviceSynchronize();
}
#endif
#ifdef DEBUG
ball_query_debug(new_xyz, xyz, radius, nsample);
#endif
at::Tensor idx =
torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
at::device(new_xyz.device()).dtype(at::ScalarType::Int));
#ifdef WITH_CUDA
if (new_xyz.is_cuda()) {
query_ball_point_kernel_wrapper(
xyz.size(0),
xyz.size(1),
new_xyz.size(1),
radius, nsample,
new_xyz.contiguous().data_ptr<float>(),
xyz.contiguous().data_ptr<float>(),
idx.contiguous().data_ptr<int>()
);
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
query_ball_point_cpu(
xyz.size(0),
xyz.size(1),
new_xyz.size(1),
radius,
nsample,
new_xyz.contiguous().data_ptr<float>(),
xyz.contiguous().data_ptr<float>(),
idx.contiguous().data_ptr<int>()
);
#ifdef WITH_CUDA
}
#endif
return idx;
}

View File

@@ -0,0 +1,44 @@
void query_ball_point_cpu(
int b, int n, int m, float radius,
int nsample,
const float* new_xyz,
const float* xyz,
int* idx
) {
float radius2 = radius * radius;
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float* xyz_b = xyz + batch_index * n * 3;
const float* new_xyz_b = new_xyz + batch_index * m * 3;
int* idx_b = idx + batch_index * m * nsample;
for (int j = 0; j < m; ++j) {
float nx = new_xyz_b[j * 3 + 0];
float ny = new_xyz_b[j * 3 + 1];
float nz = new_xyz_b[j * 3 + 2];
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
float x = xyz_b[k * 3 + 0];
float y = xyz_b[k * 3 + 1];
float z = xyz_b[k * 3 + 2];
float d2 =
(nx - x) * (nx - x) +
(ny - y) * (ny - y) +
(nz - z) * (nz - z);
if (d2 < radius2) {
if (cnt == 0) {
for (int l = 0; l < nsample; ++l) {
idx_b[j * nsample + l] = k;
}
}
idx_b[j * nsample + cnt] = k;
++cnt;
}
}
}
}
}

View File

@@ -0,0 +1,59 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
// input: new_xyz(b, m, 3) xyz(b, n, 3)
// output: idx(b, m, nsample)
__global__ void query_ball_point_kernel(int b, int n, int m, float radius,
int nsample,
const float *__restrict__ new_xyz,
const float *__restrict__ xyz,
int *__restrict__ idx) {
int batch_index = blockIdx.x;
xyz += batch_index * n * 3;
new_xyz += batch_index * m * 3;
idx += m * nsample * batch_index;
int index = threadIdx.x;
int stride = blockDim.x;
float radius2 = radius * radius;
for (int j = index; j < m; j += stride) {
float new_x = new_xyz[j * 3 + 0];
float new_y = new_xyz[j * 3 + 1];
float new_z = new_xyz[j * 3 + 2];
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
float x = xyz[k * 3 + 0];
float y = xyz[k * 3 + 1];
float z = xyz[k * 3 + 2];
float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
(new_z - z) * (new_z - z);
if (d2 < radius2) {
if (cnt == 0) {
for (int l = 0; l < nsample; ++l) {
idx[j * nsample + l] = k;
}
}
idx[j * nsample + cnt] = k;
++cnt;
}
}
}
}
void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
int nsample, const float *new_xyz,
const float *xyz, int *idx) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, radius, nsample, new_xyz, xyz, idx);
CUDA_CHECK_ERRORS();
}

View File

@@ -0,0 +1,27 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "ball_query.h"
#include "group_points.h"
#include "interpolate.h"
#include "sampling.h"
#include "cylinder_query.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("gather_points", &gather_points);
m.def("gather_points_grad", &gather_points_grad);
m.def("furthest_point_sampling", &furthest_point_sampling);
m.def("three_nn", &three_nn);
m.def("three_interpolate", &three_interpolate);
m.def("three_interpolate_grad", &three_interpolate_grad);
m.def("ball_query", &ball_query);
m.def("group_points", &group_points);
m.def("group_points_grad", &group_points_grad);
m.def("cylinder_query", &cylinder_query);
}

View File

@@ -0,0 +1,95 @@
// Author: chenxi-wang
#include "cylinder_query.h"
#include "utils.h"
#ifdef WITH_CUDA
void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
int nsample, const float *new_xyz,
const float *xyz, const float *rot, int *idx);
#endif
void query_cylinder_point_cpu(
int b, int n, int m, float radius, float hmin, float hmax,
int nsample, const float *new_xyz,
const float *xyz, const float *rot, int *idx);
#ifdef DEBUG
void cylinder_query_debug(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot,
const float radius, const float hmin, const float hmax,
const int nsample) {
// 优化后的打印代码,格式更清晰,便于调试
std::cout << "===== cylinder_query.cpp 调试信息 =====" << std::endl;
// 打印new_xyz的尺寸维度
std::cout << "new_xyz.size(): (";
for (auto s : new_xyz.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
// 打印xyz的尺寸维度
std::cout << "xyz.size(): (";
for (auto s : xyz.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
// 打印rot的尺寸维度
std::cout << "rot.size(): (";
for (auto s : rot.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
// 打印半径和采样数
std::cout << "radius: " << radius << std::endl;
std::cout << "hmin: " << hmin << std::endl;
std::cout << "hmax: " << hmax << std::endl;
std::cout << "nsample: " << nsample << std::endl;
std::cout << "====================================" << std::endl;
}
#endif
at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
const int nsample) {
CHECK_CONTIGUOUS(new_xyz);
CHECK_CONTIGUOUS(xyz);
CHECK_CONTIGUOUS(rot);
CHECK_IS_FLOAT(new_xyz);
CHECK_IS_FLOAT(xyz);
CHECK_IS_FLOAT(rot);
#ifdef WITH_CUDA
if (new_xyz.is_cuda()) {
CHECK_CUDA(xyz);
CHECK_CUDA(rot);
}
#endif
#ifdef DEBUG
cylinder_query_debug(new_xyz, xyz, rot, radius, hmin, hmax, nsample);
#endif
at::Tensor idx =
torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
at::device(new_xyz.device()).dtype(at::ScalarType::Int));
#ifdef WITH_CUDA
if (new_xyz.is_cuda()) {
query_cylinder_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
radius, hmin, hmax, nsample, new_xyz.data_ptr<float>(),
xyz.data_ptr<float>(), rot.data_ptr<float>(), idx.data_ptr<int>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
query_cylinder_point_cpu(xyz.size(0), xyz.size(1), new_xyz.size(1),
radius, hmin, hmax, nsample, new_xyz.data_ptr<float>(),
xyz.data_ptr<float>(), rot.data_ptr<float>(), idx.data_ptr<int>());
#ifdef WITH_CUDA
}
#endif
return idx;
}

View File

@@ -0,0 +1,56 @@
void query_cylinder_point_cpu(
int b, int n, int m,
float radius,
float hmin, float hmax,
int nsample,
const float *new_xyz,
const float *xyz,
const float *rot,
int *idx
) {
float radius2 = radius * radius;
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *xyz_b = xyz + batch_index * n * 3;
const float *new_xyz_b = new_xyz + batch_index * m * 3;
const float *rot_b = rot + batch_index * m * 9;
int *idx_b = idx + batch_index * m * nsample;
for (int j = 0; j < m; ++j) {
float nx = new_xyz_b[j * 3 + 0];
float ny = new_xyz_b[j * 3 + 1];
float nz = new_xyz_b[j * 3 + 2];
float r0 = rot_b[j * 9 + 0];
float r1 = rot_b[j * 9 + 1];
float r2 = rot_b[j * 9 + 2];
float r3 = rot_b[j * 9 + 3];
float r4 = rot_b[j * 9 + 4];
float r5 = rot_b[j * 9 + 5];
float r6 = rot_b[j * 9 + 6];
float r7 = rot_b[j * 9 + 7];
float r8 = rot_b[j * 9 + 8];
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
float x = xyz_b[k * 3 + 0] - nx;
float y = xyz_b[k * 3 + 1] - ny;
float z = xyz_b[k * 3 + 2] - nz;
float x_rot = r0 * x + r3 * y + r6 * z;
float y_rot = r1 * x + r4 * y + r7 * z;
float z_rot = r2 * x + r5 * y + r8 * z;
float d2 = y_rot * y_rot + z_rot * z_rot;
if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {
if (cnt == 0) {
for (int l = 0; l < nsample; ++l) {
idx_b[j * nsample + l] = k;
}
}
idx_b[j * nsample + cnt] = k;
++cnt;
}
}
}
}
}

View File

@@ -0,0 +1,67 @@
// Author: chenxi-wang
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
__global__ void query_cylinder_point_kernel(int b, int n, int m, float radius, float hmin, float hmax,
int nsample,
const float *__restrict__ new_xyz,
const float *__restrict__ xyz,
const float *__restrict__ rot,
int *__restrict__ idx) {
int batch_index = blockIdx.x;
xyz += batch_index * n * 3;
new_xyz += batch_index * m * 3;
rot += batch_index * m * 9;
idx += m * nsample * batch_index;
int index = threadIdx.x;
int stride = blockDim.x;
float radius2 = radius * radius;
for (int j = index; j < m; j += stride) {
float new_x = new_xyz[j * 3 + 0];
float new_y = new_xyz[j * 3 + 1];
float new_z = new_xyz[j * 3 + 2];
float r0 = rot[j * 9 + 0];
float r1 = rot[j * 9 + 1];
float r2 = rot[j * 9 + 2];
float r3 = rot[j * 9 + 3];
float r4 = rot[j * 9 + 4];
float r5 = rot[j * 9 + 5];
float r6 = rot[j * 9 + 6];
float r7 = rot[j * 9 + 7];
float r8 = rot[j * 9 + 8];
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
float x = xyz[k * 3 + 0] - new_x;
float y = xyz[k * 3 + 1] - new_y;
float z = xyz[k * 3 + 2] - new_z;
float x_rot = r0 * x + r3 * y + r6 * z;
float y_rot = r1 * x + r4 * y + r7 * z;
float z_rot = r2 * x + r5 * y + r8 * z;
float d2 = y_rot * y_rot + z_rot * z_rot;
if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {
if (cnt == 0) {
for (int l = 0; l < nsample; ++l) {
idx[j * nsample + l] = k;
}
}
idx[j * nsample + cnt] = k;
++cnt;
}
}
}
}
void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
int nsample, const float *new_xyz,
const float *xyz, const float *rot, int *idx) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
query_cylinder_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, radius, hmin, hmax, nsample, new_xyz, xyz, rot, idx);
CUDA_CHECK_ERRORS();
}

View File

@@ -0,0 +1,153 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "group_points.h"
#include "utils.h"
#ifdef WITH_CUDA
void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
const float *points, const int *idx,
float *out);
void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
int nsample, const float *grad_out,
const int *idx, float *grad_points);
#endif
void group_points_cpu(int b, int c, int n, int npoints, int nsample,
const float *points, const int *idx,
float *out);
void group_points_grad_cpu(int b, int c, int n, int npoints,
int nsample, const float *grad_out,
const int *idx, float *grad_points);
#ifdef DEBUG
void group_points_debug(at::Tensor points, at::Tensor idx) {
// 优化后的打印代码,格式更清晰,便于调试
std::cout << "===== group_points.cpp 调试信息 =====" << std::endl;
std::cout << "===== group points 函数调试信息 =====" << std::endl;
// 打印new_xyz的尺寸维度
std::cout << "points.size(): (";
for (auto s : points.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
// 打印xyz的尺寸维度
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "====================================" << std::endl;
}
void group_points_grad_debug(at::Tensor grad_out, at::Tensor idx, const int n) {
// 优化后的打印代码,格式更清晰,便于调试
std::cout << "===== group_points.cpp 调试信息 =====" << std::endl;
std::cout << "===== group_points_grad 函数调试信息 =====" << std::endl;
// 打印new_xyz的尺寸维度
std::cout << "grad_out.size(): (";
for (auto s : grad_out.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
// 打印xyz的尺寸维度
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "n: " << n << std::endl;
std::cout << "====================================" << std::endl;
}
#endif
at::Tensor group_points(at::Tensor points, at::Tensor idx) {
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(idx);
CHECK_IS_FLOAT(points);
CHECK_IS_INT(idx);
#ifdef WITH_CUDA
if (points.is_cuda()) {
CHECK_CUDA(idx);
}
#endif
#ifdef DEBUG
group_points_debug(points, idx);
#endif
at::Tensor output =
torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
at::device(points.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (points.is_cuda()) {
group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
idx.size(1), idx.size(2), points.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
group_points_cpu(points.size(0), points.size(1), points.size(2),
idx.size(1), idx.size(2), points.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}
at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
CHECK_CONTIGUOUS(grad_out);
CHECK_CONTIGUOUS(idx);
CHECK_IS_FLOAT(grad_out);
CHECK_IS_INT(idx);
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
CHECK_CUDA(idx);
}
#endif
#ifdef DEBUG
group_points_grad_debug(grad_out, idx, n);
#endif
at::Tensor output =
torch::zeros({grad_out.size(0), grad_out.size(1), n},
at::device(grad_out.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
group_points_grad_kernel_wrapper(
grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
grad_out.data_ptr<float>(), idx.data_ptr<int>(), output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
group_points_grad_cpu(
grad_out.size(0), grad_out.size(1), n,idx.size(1), idx.size(2),
grad_out.data_ptr<float>(), idx.data_ptr<int>(), output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}

View File

@@ -0,0 +1,54 @@
void group_points_cpu(
int b, int c, int n,
int npoints,
int nsample,
const float *points,
const int *idx,
float *out
) {
for (int batch = 0; batch < b; ++batch) {
const float *points_b = points + batch * n * c;
const int *idx_b = idx + batch * npoints * nsample;
float *out_b = out + batch * npoints * nsample * c;
for (int i = 0; i < c * npoints; ++i) {
const int l = i / npoints;
const int j = i % npoints;
for (int k = 0; k < nsample; ++k) {
int ii = idx_b[j * nsample + k];
out_b[(l * npoints + j) * nsample + k] = points_b[l * n + ii];
}
}
}
}
void group_points_grad_cpu(
int b, int c, int n,
int npoints,
int nsample,
const float *grad_out,
const int *idx,
float *grad_points
) {
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *grad_out_b = grad_out + batch_index * npoints * nsample * c;
const int *idx_b = idx + batch_index * npoints * nsample;
float *grad_points_b = grad_points + batch_index * n * c;
for (int i = 0; i < n * c; ++i) {
grad_points_b[i] = 0.0f;
}
for (int i = 0; i < c * npoints; ++i) {
const int l = i / npoints;
const int j = i % npoints;
for (int k = 0; k < nsample; ++k) {
int ii = idx_b[j * nsample + k];
grad_points_b[l * n + ii] += grad_out_b[(l * npoints + j) * nsample + k];
}
}
}
}

View File

@@ -0,0 +1,84 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
// input: points(b, c, n) idx(b, npoints, nsample)
// output: out(b, c, npoints, nsample)
__global__ void group_points_kernel(int b, int c, int n, int npoints,
int nsample,
const float *__restrict__ points,
const int *__restrict__ idx,
float *__restrict__ out) {
int batch_index = blockIdx.x;
points += batch_index * n * c;
idx += batch_index * npoints * nsample;
out += batch_index * npoints * nsample * c;
const int index = threadIdx.y * blockDim.x + threadIdx.x;
const int stride = blockDim.y * blockDim.x;
for (int i = index; i < c * npoints; i += stride) {
const int l = i / npoints;
const int j = i % npoints;
for (int k = 0; k < nsample; ++k) {
int ii = idx[j * nsample + k];
if (ii < 0 || ii >= n) {
out[(l * npoints + j) * nsample + k] = 0;
} else {
out[(l * npoints + j) * nsample + k] = points[l * n + ii];
}
}
}
}
void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
const float *points, const int *idx,
float *out) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
b, c, n, npoints, nsample, points, idx, out);
CUDA_CHECK_ERRORS();
}
// input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
// output: grad_points(b, c, n)
__global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
int nsample,
const float *__restrict__ grad_out,
const int *__restrict__ idx,
float *__restrict__ grad_points) {
int batch_index = blockIdx.x;
grad_out += batch_index * npoints * nsample * c;
idx += batch_index * npoints * nsample;
grad_points += batch_index * n * c;
const int index = threadIdx.y * blockDim.x + threadIdx.x;
const int stride = blockDim.y * blockDim.x;
for (int i = index; i < c * npoints; i += stride) {
const int l = i / npoints;
const int j = i % npoints;
for (int k = 0; k < nsample; ++k) {
int ii = idx[j * nsample + k];
atomicAdd(grad_points + l * n + ii,
grad_out[(l * npoints + j) * nsample + k]);
}
}
}
void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
int nsample, const float *grad_out,
const int *idx, float *grad_points) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
b, c, n, npoints, nsample, grad_out, idx, grad_points);
CUDA_CHECK_ERRORS();
}

View File

@@ -0,0 +1,241 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "interpolate.h"
#include "utils.h"
#ifdef WITH_CUDA
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx);
void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
const float *points, const int *idx,
const float *weight, float *out);
void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points);
#endif
void three_nn_cpu(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx);
void three_interpolate_cpu(int b, int c, int m, int n,
const float *points, const int *idx,
const float *weight, float *out);
void three_interpolate_grad_cpu(int b, int c, int n, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points);
#ifdef DEBUG
void three_nn_debug(at::Tensor unknowns, at::Tensor knows) {
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
std::cout << "=========== three_nn ===========" << std::endl;
std::cout << "unknowns.size(): (";
for (auto s : unknowns.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "knows.size(): (";
for (auto s : knows.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "====================================" << std::endl;
}
void three_interpolate_debug(at::Tensor points, at::Tensor idx,
at::Tensor weight) {
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
std::cout << "=========== three_interpolate ===========" << std::endl;
std::cout << "points.size(): (";
for (auto s : points.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "weight.size(): (";
for (auto s : weight.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "====================================" << std::endl;
}
void three_interpolate_grad_debug(at::Tensor grad_out, at::Tensor idx,
at::Tensor weight, const int m) {
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
std::cout << "=========== three_interpolate_grad ===========" << std::endl;
std::cout << "grad_out.size(): (";
for (auto s : grad_out.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "weight.size(): (";
for (auto s : weight.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
// 打印半径和采样数
std::cout << "m: " << m << std::endl;
std::cout << "====================================" << std::endl;
}
#endif
std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
CHECK_CONTIGUOUS(unknowns);
CHECK_CONTIGUOUS(knows);
CHECK_IS_FLOAT(unknowns);
CHECK_IS_FLOAT(knows);
#ifdef WITH_CUDA
if (unknowns.is_cuda()) {
CHECK_CUDA(knows);
}
#endif
#ifdef DEBUG
three_nn_debug(unknowns, knows);
#endif
at::Tensor idx =
torch::zeros({unknowns.size(0), unknowns.size(1), 3},
at::device(unknowns.device()).dtype(at::ScalarType::Int));
at::Tensor dist2 =
torch::zeros({unknowns.size(0), unknowns.size(1), 3},
at::device(unknowns.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (unknowns.is_cuda()) {
three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
unknowns.data_ptr<float>(), knows.data_ptr<float>(),
dist2.data_ptr<float>(), idx.data_ptr<int>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
three_nn_cpu(unknowns.size(0), unknowns.size(1), knows.size(1),
unknowns.data_ptr<float>(), knows.data_ptr<float>(),
dist2.data_ptr<float>(), idx.data_ptr<int>());
#ifdef WITH_CUDA
}
#endif
return {dist2, idx};
}
at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
at::Tensor weight) {
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(idx);
CHECK_CONTIGUOUS(weight);
CHECK_IS_FLOAT(points);
CHECK_IS_INT(idx);
CHECK_IS_FLOAT(weight);
#ifdef WITH_CUDA
if (points.is_cuda()) {
CHECK_CUDA(idx);
CHECK_CUDA(weight);
}
#endif
#ifdef DEBUG
three_interpolate_debug(points, idx, weight);
#endif
at::Tensor output =
torch::zeros({points.size(0), points.size(1), idx.size(1)},
at::device(points.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (points.is_cuda()) {
three_interpolate_kernel_wrapper(
points.size(0), points.size(1), points.size(2), idx.size(1),
points.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
three_interpolate_cpu(
points.size(0), points.size(1), points.size(2), idx.size(1),
points.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}
at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
at::Tensor weight, const int m) {
CHECK_CONTIGUOUS(grad_out);
CHECK_CONTIGUOUS(idx);
CHECK_CONTIGUOUS(weight);
CHECK_IS_FLOAT(grad_out);
CHECK_IS_INT(idx);
CHECK_IS_FLOAT(weight);
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
CHECK_CUDA(idx);
CHECK_CUDA(weight);
}
#endif
#ifdef DEBUG
three_interpolate_grad_debug(grad_out, idx, weight, m);
#endif
at::Tensor output =
torch::zeros({grad_out.size(0), grad_out.size(1), m},
at::device(grad_out.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
three_interpolate_grad_kernel_wrapper(
grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
grad_out.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
three_interpolate_grad_cpu(
grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
grad_out.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}

View File

@@ -0,0 +1,123 @@
void three_nn_cpu(
int b, int n, int m,
const float *unknown,
const float *known,
float *dist2,
int *idx
) {
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *unknown_batch = unknown + batch_index * n * 3;
const float *known_batch = known + batch_index * m * 3;
float *dist2_batch = dist2 + batch_index * n * 3;
int *idx_batch = idx + batch_index * n * 3;
for (int j = 0; j < n; ++j) {
float ux = unknown_batch[j * 3 + 0];
float uy = unknown_batch[j * 3 + 1];
float uz = unknown_batch[j * 3 + 2];
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
int besti1 = 0, besti2 = 0, besti3 = 0;
for (int k = 0; k < m; ++k) {
float x = known_batch[k * 3 + 0];
float y = known_batch[k * 3 + 1];
float z = known_batch[k * 3 + 2];
float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
if (d < best1) {
best3 = best2;
besti3 = besti2;
best2 = best1;
besti2 = besti1;
best1 = d;
besti1 = k;
} else if (d < best2) {
best3 = best2;
besti3 = besti2;
best2 = d;
besti2 = k;
} else if (d < best3) {
best3 = d;
besti3 = k;
}
}
dist2_batch[j * 3 + 0] = best1;
dist2_batch[j * 3 + 1] = best2;
dist2_batch[j * 3 + 2] = best3;
idx_batch[j * 3 + 0] = besti1;
idx_batch[j * 3 + 1] = besti2;
idx_batch[j * 3 + 2] = besti3;
}
}
}
void three_interpolate_cpu(
int b, int c, int m, int n,
const float *points,
const int *idx,
const float *weight,
float *out
) {
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *points_batch = points + batch_index * m * c;
const int *idx_batch = idx + batch_index * n * 3;
const float *weight_batch = weight + batch_index * n * 3;
float *out_batch = out + batch_index * n * c;
for (int i = 0; i < c * n; ++i) {
const int l = i / n;
const int j = i % n;
float w1 = weight_batch[j * 3 + 0];
float w2 = weight_batch[j * 3 + 1];
float w3 = weight_batch[j * 3 + 2];
int i1 = idx_batch[j * 3 + 0];
int i2 = idx_batch[j * 3 + 1];
int i3 = idx_batch[j * 3 + 2];
out_batch[i] = w1 * points_batch[l * m + i1] +
w2 * points_batch[l * m + i2] +
w3 * points_batch[l * m + i3];
}
}
}
void three_interpolate_grad_cpu(
int b, int c, int n, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points
) {
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *grad_out_batch = grad_out + batch_index * n * c;
const int *idx_batch = idx + batch_index * n * 3;
const float *weight_batch = weight + batch_index * n * 3;
float *grad_points_batch = grad_points + batch_index * m * c;
// 累积初始值置0
for (int i = 0; i < m * c; ++i) {
grad_points_batch[i] = 0.0f;
}
for (int i = 0; i < c * n; ++i) {
const int l = i / n;
const int j = i % n;
float w1 = weight_batch[j * 3 + 0];
float w2 = weight_batch[j * 3 + 1];
float w3 = weight_batch[j * 3 + 2];
int i1 = idx_batch[j * 3 + 0];
int i2 = idx_batch[j * 3 + 1];
int i3 = idx_batch[j * 3 + 2];
grad_points_batch[l * m + i1] += w1 * grad_out_batch[i];
grad_points_batch[l * m + i2] += w2 * grad_out_batch[i];
grad_points_batch[l * m + i3] += w3 * grad_out_batch[i];
}
}
}

View File

@@ -0,0 +1,159 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
// input: unknown(b, n, 3) known(b, m, 3)
// output: dist2(b, n, 3), idx(b, n, 3)
__global__ void three_nn_kernel(int b, int n, int m,
const float *__restrict__ unknown,
const float *__restrict__ known,
float *__restrict__ dist2,
int *__restrict__ idx) {
int batch_index = blockIdx.x;
unknown += batch_index * n * 3;
known += batch_index * m * 3;
dist2 += batch_index * n * 3;
idx += batch_index * n * 3;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < n; j += stride) {
float ux = unknown[j * 3 + 0];
float uy = unknown[j * 3 + 1];
float uz = unknown[j * 3 + 2];
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
int besti1 = 0, besti2 = 0, besti3 = 0;
for (int k = 0; k < m; ++k) {
float x = known[k * 3 + 0];
float y = known[k * 3 + 1];
float z = known[k * 3 + 2];
float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
if (d < best1) {
best3 = best2;
besti3 = besti2;
best2 = best1;
besti2 = besti1;
best1 = d;
besti1 = k;
} else if (d < best2) {
best3 = best2;
besti3 = besti2;
best2 = d;
besti2 = k;
} else if (d < best3) {
best3 = d;
besti3 = k;
}
}
dist2[j * 3 + 0] = best1;
dist2[j * 3 + 1] = best2;
dist2[j * 3 + 2] = best3;
idx[j * 3 + 0] = besti1;
idx[j * 3 + 1] = besti2;
idx[j * 3 + 2] = besti3;
}
}
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
dist2, idx);
CUDA_CHECK_ERRORS();
}
// input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
// output: out(b, c, n)
__global__ void three_interpolate_kernel(int b, int c, int m, int n,
const float *__restrict__ points,
const int *__restrict__ idx,
const float *__restrict__ weight,
float *__restrict__ out) {
int batch_index = blockIdx.x;
points += batch_index * m * c;
idx += batch_index * n * 3;
weight += batch_index * n * 3;
out += batch_index * n * c;
const int index = threadIdx.y * blockDim.x + threadIdx.x;
const int stride = blockDim.y * blockDim.x;
for (int i = index; i < c * n; i += stride) {
const int l = i / n;
const int j = i % n;
float w1 = weight[j * 3 + 0];
float w2 = weight[j * 3 + 1];
float w3 = weight[j * 3 + 2];
int i1 = idx[j * 3 + 0];
int i2 = idx[j * 3 + 1];
int i3 = idx[j * 3 + 2];
out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
points[l * m + i3] * w3;
}
}
void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
const float *points, const int *idx,
const float *weight, float *out) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
b, c, m, n, points, idx, weight, out);
CUDA_CHECK_ERRORS();
}
// input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
// output: grad_points(b, c, m)
__global__ void three_interpolate_grad_kernel(
int b, int c, int n, int m, const float *__restrict__ grad_out,
const int *__restrict__ idx, const float *__restrict__ weight,
float *__restrict__ grad_points) {
int batch_index = blockIdx.x;
grad_out += batch_index * n * c;
idx += batch_index * n * 3;
weight += batch_index * n * 3;
grad_points += batch_index * m * c;
const int index = threadIdx.y * blockDim.x + threadIdx.x;
const int stride = blockDim.y * blockDim.x;
for (int i = index; i < c * n; i += stride) {
const int l = i / n;
const int j = i % n;
float w1 = weight[j * 3 + 0];
float w2 = weight[j * 3 + 1];
float w3 = weight[j * 3 + 2];
int i1 = idx[j * 3 + 0];
int i2 = idx[j * 3 + 1];
int i3 = idx[j * 3 + 2];
atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
}
}
void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
b, c, n, m, grad_out, idx, weight, grad_points);
CUDA_CHECK_ERRORS();
}

View File

@@ -0,0 +1,207 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "sampling.h"
#include "utils.h"
#ifdef WITH_CUDA
void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
const float *points, const int *idx,
float *out);
void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
const float *grad_out, const int *idx,
float *grad_points);
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
const float *dataset, float *temp,
int *idxs);
#endif
void gather_points_cpu(int b, int c, int n, int npoints,
const float *points, const int *idx,
float *out);
void gather_points_grad_cpu(int b, int c, int n, int npoints,
const float *grad_out, const int *idx,
float *grad_points);
void furthest_point_sampling_cpu(int b, int n, int m,
const float *dataset, float *temp,
int *idxs);
#ifdef DEBUG
void gather_points_debug(at::Tensor points, at::Tensor idx) {
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
std::cout << "=========== gather_points ===========" << std::endl;
std::cout << "points.size(): (";
for (auto s : points.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "====================================" << std::endl;
}
void gather_points_grad_debug(at::Tensor grad_out, at::Tensor idx,
const int n) {
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
std::cout << "=========== gather_points_grad ===========" << std::endl;
std::cout << "grad_out.size(): (";
for (auto s : grad_out.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "idx.size(): (";
for (auto s : idx.sizes()) {
std::cout << s << " ";
}
std::cout << ")" << std::endl;
std::cout << "n: " << n << std::endl;
std::cout << "====================================" << std::endl;
}
void furthest_point_sampling_debug(at::Tensor points, const int nsamples) {
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
std::cout << "=========== furthest_point_sampling ===========" << std::endl;
std::cout << "points.size(): (";
for (auto s : points.sizes()) {
std::cout << s << " ";
}
std::cout << ")" <<std::endl;
std::cout << "nsamples: " << nsamples << std::endl;
std::cout << "====================================" << std::endl;
}
#endif
at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(idx);
CHECK_IS_FLOAT(points);
CHECK_IS_INT(idx);
#ifdef WITH_CUDA
if (points.is_cuda()) {
CHECK_CUDA(idx);
}
#endif
#ifdef DEBUG
gather_points_debug(points, idx);
#endif
at::Tensor output =
torch::zeros({points.size(0), points.size(1), idx.size(1)},
at::device(points.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (points.is_cuda()) {
gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
idx.size(1), points.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
gather_points_cpu(points.size(0), points.size(1), points.size(2),
idx.size(1), points.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}
at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
const int n) {
CHECK_CONTIGUOUS(grad_out);
CHECK_CONTIGUOUS(idx);
CHECK_IS_FLOAT(grad_out);
CHECK_IS_INT(idx);
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
CHECK_CUDA(idx);
}
#endif
#ifdef DEBUG
gather_points_grad_debug(grad_out, idx, n);
#endif
at::Tensor output =
torch::zeros({grad_out.size(0), grad_out.size(1), n},
at::device(grad_out.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (grad_out.is_cuda()) {
gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
idx.size(1), grad_out.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
gather_points_grad_cpu(grad_out.size(0), grad_out.size(1), n,
idx.size(1), grad_out.data_ptr<float>(),
idx.data_ptr<int>(), output.data_ptr<float>());
#ifdef WITH_CUDA
}
#endif
return output;
}
at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
CHECK_CONTIGUOUS(points);
CHECK_IS_FLOAT(points);
#ifdef DEBUG
furthest_point_sampling_debug(points, nsamples);
#endif
at::Tensor output =
torch::zeros({points.size(0), nsamples},
at::device(points.device()).dtype(at::ScalarType::Int));
at::Tensor tmp =
torch::full({points.size(0), points.size(1)}, 1e10,
at::device(points.device()).dtype(at::ScalarType::Float));
#ifdef WITH_CUDA
if (points.is_cuda()) {
furthest_point_sampling_kernel_wrapper(
points.size(0), points.size(1), nsamples, points.data_ptr<float>(),
tmp.data_ptr<float>(), output.data_ptr<int>());
} else {
#endif
// TORCH_CHECK(false, "CPU not supported");
furthest_point_sampling_cpu(
points.size(0), points.size(1), nsamples, points.data_ptr<float>(),
tmp.data_ptr<float>(), output.data_ptr<int>());
#ifdef WITH_CUDA
}
#endif
return output;
}

View File

@@ -0,0 +1,98 @@
void gather_points_cpu(
int b, int c, int n, int npoints,
const float *points,
const int *idx,
float *out
) {
for (int i = 0; i < b; ++i) {
for (int l = 0; l < c; ++l) {
for (int j = 0; j < npoints; ++j) {
int a = idx[i * npoints + j];
out[(i * c + l) * npoints + j] = points[(i * c + l) * n + a];
}
}
}
}
void gather_points_grad_cpu(
int b, int c, int n, int npoints,
const float *grad_out,
const int *idx,
float *grad_points
) {
for (int i = 0; i < b; ++i) {
for (int l = 0; l < c; ++l) {
for (int j = 0; j < npoints; ++j) {
int a = idx[i * npoints + j];
grad_points[(i * c + l) * n + a] += grad_out[(i * c + l) * npoints + j];
}
}
}
}
void furthest_point_sampling_cpu(
int b, int n, int m,
const float *dataset,
float *temp,
int *idxs
) {
// 1. 遍历每个批次对应CUDA的blockIdx.x
for (int batch_index = 0; batch_index < b; ++batch_index) {
const float *dataset_batch = dataset + batch_index * n * 3;
float *temp_batch = temp + batch_index * n;
int *idxs_batch = idxs + batch_index * m;
// 2. 初始化第一个采样点为索引0对应CUDA的threadIdx.x==0时的赋值
int old = 0;
idxs_batch[0] = old;
// 3. 初始化cur_temp设为极大值表示初始时与已选点集仅一个点的距离未计算
for (int k = 0; k < n; ++k) {
temp_batch[k] = 1e10f; // 极大值保证首次min操作有效
}
// 4. 迭代m-1次选择剩余m-1个采样点对应CUDA的for (int j=1; j<m; j++)
for (int j = 1; j < m; ++j) {
// 4.1 提取上一个选中点的坐标x1,y1,z1
float x1 = dataset_batch[old * 3 + 0];
float y1 = dataset_batch[old * 3 + 1];
float z1 = dataset_batch[old * 3 + 2];
// 4.2 遍历所有点计算距离并更新cur_temp替换CUDA的线程步长循环
for (int k = 0; k < n; ++k) {
// 提取当前点的坐标x2,y2,z2
float x2 = dataset_batch[k * 3 + 0];
float y2 = dataset_batch[k * 3 + 1];
float z2 = dataset_batch[k * 3 + 2];
// 过滤无效点模长过小近似为原点对应CUDA的mag<=1e-3判断
float mag = x2 * x2 + y2 * y2 + z2 * z2;
if (mag <= 1e-3f) continue;
// 计算当前点到上一个选中点的欧氏距离平方(避免开方,不影响大小比较)
float dx = x2 - x1;
float dy = y2 - y1;
float dz = z2 - z1;
float d = dx * dx + dy * dy + dz * dz;
// 核心:更新当前点到已选点集的最小距离
if (d < temp_batch[k]) {
temp_batch[k] = d;
}
}
// 4.3 规约操作找到cur_temp中最大值对应的点索引替换CUDA的__update+共享内存规约)
float best = -1.0f;
int besti = 0;
for (int k = 0; k < n; ++k) {
if (temp_batch[k] > best) {
best = temp_batch[k];
besti = k;
}
}
// 4.4 记录当前选中点更新old为下一轮的上一个选中点
old = besti;
idxs_batch[j] = old;
}
}
}

View File

@@ -0,0 +1,234 @@
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
// input: points(b, c, n) idx(b, m)
// output: out(b, c, m)
__global__ void gather_points_kernel(int b, int c, int n, int m,
const float *__restrict__ points,
const int *__restrict__ idx,
float *__restrict__ out) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int l = blockIdx.y; l < c; l += gridDim.y) {
for (int j = threadIdx.x; j < m; j += blockDim.x) {
int a = idx[i * m + j];
out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
}
}
}
}
void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
const float *points, const int *idx,
float *out) {
gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
points, idx, out);
CUDA_CHECK_ERRORS();
}
// input: grad_out(b, c, m) idx(b, m)
// output: grad_points(b, c, n)
__global__ void gather_points_grad_kernel(int b, int c, int n, int m,
const float *__restrict__ grad_out,
const int *__restrict__ idx,
float *__restrict__ grad_points) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int l = blockIdx.y; l < c; l += gridDim.y) {
for (int j = threadIdx.x; j < m; j += blockDim.x) {
int a = idx[i * m + j];
atomicAdd(grad_points + (i * c + l) * n + a,
grad_out[(i * c + l) * m + j]);
}
}
}
}
void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
const float *grad_out, const int *idx,
float *grad_points) {
gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
at::cuda::getCurrentCUDAStream()>>>(
b, c, n, npoints, grad_out, idx, grad_points);
CUDA_CHECK_ERRORS();
}
__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
int idx1, int idx2) {
const float v1 = dists[idx1], v2 = dists[idx2];
const int i1 = dists_i[idx1], i2 = dists_i[idx2];
dists[idx1] = max(v1, v2);
dists_i[idx1] = v2 > v1 ? i2 : i1;
}
// Input dataset: (b, n, 3), tmp: (b, n)
// Ouput idxs (b, m)
template <unsigned int block_size>
__global__ void furthest_point_sampling_kernel(
int b, int n, int m, const float *__restrict__ dataset,
float *__restrict__ temp, int *__restrict__ idxs) {
if (m <= 0) return;
__shared__ float dists[block_size];
__shared__ int dists_i[block_size];
int batch_index = blockIdx.x;
dataset += batch_index * n * 3;
temp += batch_index * n;
idxs += batch_index * m;
int tid = threadIdx.x;
const int stride = block_size;
int old = 0;
if (threadIdx.x == 0) idxs[0] = old;
__syncthreads();
for (int j = 1; j < m; j++) {
int besti = 0;
float best = -1;
float x1 = dataset[old * 3 + 0];
float y1 = dataset[old * 3 + 1];
float z1 = dataset[old * 3 + 2];
for (int k = tid; k < n; k += stride) {
float x2, y2, z2;
x2 = dataset[k * 3 + 0];
y2 = dataset[k * 3 + 1];
z2 = dataset[k * 3 + 2];
float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
if (mag <= 1e-3) continue;
float d =
(x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
float d2 = min(d, temp[k]);
temp[k] = d2;
besti = d2 > best ? k : besti;
best = d2 > best ? d2 : best;
}
dists[tid] = best;
dists_i[tid] = besti;
__syncthreads();
if (block_size >= 512) {
if (tid < 256) {
__update(dists, dists_i, tid, tid + 256);
}
__syncthreads();
}
if (block_size >= 256) {
if (tid < 128) {
__update(dists, dists_i, tid, tid + 128);
}
__syncthreads();
}
if (block_size >= 128) {
if (tid < 64) {
__update(dists, dists_i, tid, tid + 64);
}
__syncthreads();
}
if (block_size >= 64) {
if (tid < 32) {
__update(dists, dists_i, tid, tid + 32);
}
__syncthreads();
}
if (block_size >= 32) {
if (tid < 16) {
__update(dists, dists_i, tid, tid + 16);
}
__syncthreads();
}
if (block_size >= 16) {
if (tid < 8) {
__update(dists, dists_i, tid, tid + 8);
}
__syncthreads();
}
if (block_size >= 8) {
if (tid < 4) {
__update(dists, dists_i, tid, tid + 4);
}
__syncthreads();
}
if (block_size >= 4) {
if (tid < 2) {
__update(dists, dists_i, tid, tid + 2);
}
__syncthreads();
}
if (block_size >= 2) {
if (tid < 1) {
__update(dists, dists_i, tid, tid + 1);
}
__syncthreads();
}
old = dists_i[0];
if (tid == 0) idxs[j] = old;
}
}
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
const float *dataset, float *temp,
int *idxs) {
unsigned int n_threads = opt_n_threads(n);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
switch (n_threads) {
case 512:
furthest_point_sampling_kernel<512>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 256:
furthest_point_sampling_kernel<256>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 128:
furthest_point_sampling_kernel<128>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 64:
furthest_point_sampling_kernel<64>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 32:
furthest_point_sampling_kernel<32>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 16:
furthest_point_sampling_kernel<16>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 8:
furthest_point_sampling_kernel<8>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 4:
furthest_point_sampling_kernel<4>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 2:
furthest_point_sampling_kernel<2>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
case 1:
furthest_point_sampling_kernel<1>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
break;
default:
furthest_point_sampling_kernel<512>
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
}
CUDA_CHECK_ERRORS();
}

View File

@@ -0,0 +1 @@
from .pointnet2 import *

View File

@@ -0,0 +1,518 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
''' Pointnet2 layers.
Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch
Extended with the following:
1. Uniform sampling in each local region (sample_uniformly)
2. Return sampled points indices to support votenet.
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
# import os
# import sys
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(BASE_DIR)
from . import pointnet2_utils
from . import pytorch_utils as pt_utils
from typing import List
class _PointnetSAModuleBase(nn.Module):
def __init__(self):
super().__init__()
self.npoint = None
self.groupers = None
self.mlps = None
def forward(self, xyz: torch.Tensor,
features: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
r"""
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor of the xyz coordinates of the features
features : torch.Tensor
(B, N, C) tensor of the descriptors of the the features
Returns
-------
new_xyz : torch.Tensor
(B, npoint, 3) tensor of the new features' xyz
new_features : torch.Tensor
(B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
"""
new_features_list = []
xyz_flipped = xyz.transpose(1, 2).contiguous()
new_xyz = pointnet2_utils.gather_operation(
xyz_flipped,
pointnet2_utils.furthest_point_sample(xyz, self.npoint)
).transpose(1, 2).contiguous() if self.npoint is not None else None
for i in range(len(self.groupers)):
new_features = self.groupers[i](
xyz, new_xyz, features
) # (B, C, npoint, nsample)
new_features = self.mlps[i](
new_features
) # (B, mlp[-1], npoint, nsample)
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
new_features_list.append(new_features)
return new_xyz, torch.cat(new_features_list, dim=1)
class PointnetSAModuleMSG(_PointnetSAModuleBase):
r"""Pointnet set abstrction layer with multiscale grouping
Parameters
----------
npoint : int
Number of features
radii : list of float32
list of radii to group with
nsamples : list of int32
Number of samples in each ball query
mlps : list of list of int32
Spec of the pointnet before the global max_pool for each scale
bn : bool
Use batchnorm
"""
def __init__(
self,
*,
npoint: int,
radii: List[float],
nsamples: List[int],
mlps: List[List[int]],
bn: bool = True,
use_xyz: bool = True,
sample_uniformly: bool = False
):
super().__init__()
assert len(radii) == len(nsamples) == len(mlps)
self.npoint = npoint
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
)
mlp_spec = mlps[i]
if use_xyz:
mlp_spec[0] += 3
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
class PointnetSAModule(PointnetSAModuleMSG):
r"""Pointnet set abstrction layer
Parameters
----------
npoint : int
Number of features
radius : float
Radius of ball
nsample : int
Number of samples in the ball query
mlp : list
Spec of the pointnet before the global max_pool
bn : bool
Use batchnorm
"""
def __init__(
self,
*,
mlp: List[int],
npoint: int = None,
radius: float = None,
nsample: int = None,
bn: bool = True,
use_xyz: bool = True
):
super().__init__(
mlps=[mlp],
npoint=npoint,
radii=[radius],
nsamples=[nsample],
bn=bn,
use_xyz=use_xyz
)
class PointnetSAModuleVotes(nn.Module):
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
with extra support for returning point indices for getting their GT votes '''
def __init__(
self,
*,
mlp: List[int],
npoint: int = None,
radius: float = None,
nsample: int = None,
bn: bool = True,
use_xyz: bool = True,
pooling: str = 'max',
sigma: float = None, # for RBF pooling
normalize_xyz: bool = False, # noramlize local XYZ with radius
sample_uniformly: bool = False,
ret_unique_cnt: bool = False
):
super().__init__()
self.npoint = npoint
self.radius = radius
self.nsample = nsample
self.pooling = pooling
self.mlp_module = None
self.use_xyz = use_xyz
self.sigma = sigma
if self.sigma is None:
self.sigma = self.radius/2
self.normalize_xyz = normalize_xyz
self.ret_unique_cnt = ret_unique_cnt
if npoint is not None:
self.grouper = pointnet2_utils.QueryAndGroup(radius, nsample,
use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz,
sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt)
else:
self.grouper = pointnet2_utils.GroupAll(use_xyz, ret_grouped_xyz=True)
mlp_spec = mlp
if use_xyz and len(mlp_spec)>0:
mlp_spec[0] += 3
self.mlp_module = pt_utils.SharedMLP(mlp_spec, bn=bn)
def forward(self, xyz: torch.Tensor,
features: torch.Tensor = None,
inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
r"""
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor of the xyz coordinates of the features
features : torch.Tensor
(B, C, N) tensor of the descriptors of the the features
inds : torch.Tensor
(B, npoint) tensor that stores index to the xyz points (values in 0-N-1)
Returns
-------
new_xyz : torch.Tensor
(B, npoint, 3) tensor of the new features' xyz
new_features : torch.Tensor
(B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors
inds: torch.Tensor
(B, npoint) tensor of the inds
"""
xyz_flipped = xyz.transpose(1, 2).contiguous()
if inds is None:
inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)
else:
assert(inds.shape[1] == self.npoint)
new_xyz = pointnet2_utils.gather_operation(
xyz_flipped, inds
).transpose(1, 2).contiguous() if self.npoint is not None else None
if not self.ret_unique_cnt:
grouped_features, grouped_xyz = self.grouper(
xyz, new_xyz, features
) # (B, C, npoint, nsample)
else:
grouped_features, grouped_xyz, unique_cnt = self.grouper(
xyz, new_xyz, features
) # (B, C, npoint, nsample), (B,3,npoint,nsample), (B,npoint)
new_features = self.mlp_module(
grouped_features
) # (B, mlp[-1], npoint, nsample)
if self.pooling == 'max':
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
elif self.pooling == 'avg':
new_features = F.avg_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
elif self.pooling == 'rbf':
# Use radial basis function kernel for weighted sum of features (normalized by nsample and sigma)
# Ref: https://en.wikipedia.org/wiki/Radial_basis_function_kernel
rbf = torch.exp(-1 * grouped_xyz.pow(2).sum(1,keepdim=False) / (self.sigma**2) / 2) # (B, npoint, nsample)
new_features = torch.sum(new_features * rbf.unsqueeze(1), -1, keepdim=True) / float(self.nsample) # (B, mlp[-1], npoint, 1)
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
if not self.ret_unique_cnt:
return new_xyz, new_features, inds
else:
return new_xyz, new_features, inds, unique_cnt
class PointnetSAModuleMSGVotes(nn.Module):
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
with extra support for returning point indices for getting their GT votes '''
def __init__(
self,
*,
mlps: List[List[int]],
npoint: int,
radii: List[float],
nsamples: List[int],
bn: bool = True,
use_xyz: bool = True,
sample_uniformly: bool = False
):
super().__init__()
assert(len(mlps) == len(nsamples) == len(radii))
self.npoint = npoint
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
)
mlp_spec = mlps[i]
if use_xyz:
mlp_spec[0] += 3
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
def forward(self, xyz: torch.Tensor,
features: torch.Tensor = None, inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
r"""
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor of the xyz coordinates of the features
features : torch.Tensor
(B, C, C) tensor of the descriptors of the the features
inds : torch.Tensor
(B, npoint) tensor that stores index to the xyz points (values in 0-N-1)
Returns
-------
new_xyz : torch.Tensor
(B, npoint, 3) tensor of the new features' xyz
new_features : torch.Tensor
(B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors
inds: torch.Tensor
(B, npoint) tensor of the inds
"""
new_features_list = []
xyz_flipped = xyz.transpose(1, 2).contiguous()
if inds is None:
inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)
new_xyz = pointnet2_utils.gather_operation(
xyz_flipped, inds
).transpose(1, 2).contiguous() if self.npoint is not None else None
for i in range(len(self.groupers)):
new_features = self.groupers[i](
xyz, new_xyz, features
) # (B, C, npoint, nsample)
new_features = self.mlps[i](
new_features
) # (B, mlp[-1], npoint, nsample)
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
new_features_list.append(new_features)
return new_xyz, torch.cat(new_features_list, dim=1), inds
class PointnetFPModule(nn.Module):
r"""Propigates the features of one set to another
Parameters
----------
mlp : list
Pointnet module parameters
bn : bool
Use batchnorm
"""
def __init__(self, *, mlp: List[int], bn: bool = True):
super().__init__()
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
def forward(
self, unknown: torch.Tensor, known: torch.Tensor,
unknow_feats: torch.Tensor, known_feats: torch.Tensor
) -> torch.Tensor:
r"""
Parameters
----------
unknown : torch.Tensor
(B, n, 3) tensor of the xyz positions of the unknown features
known : torch.Tensor
(B, m, 3) tensor of the xyz positions of the known features
unknow_feats : torch.Tensor
(B, C1, n) tensor of the features to be propigated to
known_feats : torch.Tensor
(B, C2, m) tensor of features to be propigated
Returns
-------
new_features : torch.Tensor
(B, mlp[-1], n) tensor of the features of the unknown features
"""
if known is not None:
dist, idx = pointnet2_utils.three_nn(unknown, known)
dist_recip = 1.0 / (dist + 1e-8)
norm = torch.sum(dist_recip, dim=2, keepdim=True)
weight = dist_recip / norm
interpolated_feats = pointnet2_utils.three_interpolate(
known_feats, idx, weight
)
else:
interpolated_feats = known_feats.expand(
*known_feats.size()[0:2], unknown.size(1)
)
if unknow_feats is not None:
new_features = torch.cat([interpolated_feats, unknow_feats],
dim=1) #(B, C2 + C1, n)
else:
new_features = interpolated_feats
new_features = new_features.unsqueeze(-1)
new_features = self.mlp(new_features)
return new_features.squeeze(-1)
class PointnetLFPModuleMSG(nn.Module):
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
learnable feature propagation layer.'''
def __init__(
self,
*,
mlps: List[List[int]],
radii: List[float],
nsamples: List[int],
post_mlp: List[int],
bn: bool = True,
use_xyz: bool = True,
sample_uniformly: bool = False
):
super().__init__()
assert(len(mlps) == len(nsamples) == len(radii))
self.post_mlp = pt_utils.SharedMLP(post_mlp, bn=bn)
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz,
sample_uniformly=sample_uniformly)
)
mlp_spec = mlps[i]
if use_xyz:
mlp_spec[0] += 3
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
def forward(self, xyz2: torch.Tensor, xyz1: torch.Tensor,
features2: torch.Tensor, features1: torch.Tensor) -> torch.Tensor:
r""" Propagate features from xyz1 to xyz2.
Parameters
----------
xyz2 : torch.Tensor
(B, N2, 3) tensor of the xyz coordinates of the features
xyz1 : torch.Tensor
(B, N1, 3) tensor of the xyz coordinates of the features
features2 : torch.Tensor
(B, C2, N2) tensor of the descriptors of the the features
features1 : torch.Tensor
(B, C1, N1) tensor of the descriptors of the the features
Returns
-------
new_features1 : torch.Tensor
(B, \sum_k(mlps[k][-1]), N1) tensor of the new_features descriptors
"""
new_features_list = []
for i in range(len(self.groupers)):
new_features = self.groupers[i](
xyz1, xyz2, features1
) # (B, C1, N2, nsample)
new_features = self.mlps[i](
new_features
) # (B, mlp[-1], N2, nsample)
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], N2, 1)
new_features = new_features.squeeze(-1) # (B, mlp[-1], N2)
if features2 is not None:
new_features = torch.cat([new_features, features2],
dim=1) #(B, mlp[-1] + C2, N2)
new_features = new_features.unsqueeze(-1)
new_features = self.post_mlp(new_features)
new_features_list.append(new_features)
return torch.cat(new_features_list, dim=1).squeeze(-1)
if __name__ == "__main__":
from torch.autograd import Variable
torch.manual_seed(1)
torch.cuda.manual_seed_all(1)
xyz = Variable(torch.randn(2, 9, 3).cuda(), requires_grad=True)
xyz_feats = Variable(torch.randn(2, 9, 6).cuda(), requires_grad=True)
test_module = PointnetSAModuleMSG(
npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]]
)
test_module.cuda()
print(test_module(xyz, xyz_feats))
for _ in range(1):
_, new_features = test_module(xyz, xyz_feats)
new_features.backward(
torch.cuda.FloatTensor(*new_features.size()).fill_(1)
)
print(new_features)
print(xyz.grad)

View File

@@ -0,0 +1,554 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
''' Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch '''
from __future__ import (
division,
absolute_import,
with_statement,
print_function,
unicode_literals,
)
import torch
from torch.autograd import Function
import torch.nn as nn
# import pytorch_utils as pt_utils
from . import pytorch_utils as pt_utils
# import sys
try:
import builtins
except:
import __builtin__ as builtins
try:
from . import pointnet2
except ImportError:
if not getattr(builtins, "__POINTNET2_SETUP__", False):
raise ImportError(
"Could not import pointnet2 module.\n"
"Please see the setup instructions in the README: "
"https://github.com/erikwijmans/Pointnet2_PyTorch/blob/master/README.rst"
)
if False:
# Workaround for type hints without depending on the `typing` module
from typing import *
class RandomDropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
super(RandomDropout, self).__init__()
self.p = p
self.inplace = inplace
def forward(self, X):
theta = torch.Tensor(1).uniform_(0, self.p)[0]
return pt_utils.feature_dropout_no_scaling(X, theta, self.train, self.inplace)
class FurthestPointSampling(Function):
@staticmethod
def forward(ctx, xyz, npoint):
# type: (Any, torch.Tensor, int) -> torch.Tensor
r"""
Uses iterative furthest point sampling to select a set of npoint features that have the largest
minimum distance
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor where N > npoint
npoint : int32
number of features in the sampled set
Returns
-------
torch.Tensor
(B, npoint) tensor containing the set
"""
return pointnet2.furthest_point_sampling(xyz, npoint)
@staticmethod
def backward(xyz, a=None):
return None, None
furthest_point_sample = FurthestPointSampling.apply
class GatherOperation(Function):
@staticmethod
def forward(ctx, features, idx):
# type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor
r"""
Parameters
----------
features : torch.Tensor
(B, C, N) tensor
idx : torch.Tensor
(B, npoint) tensor of the features to gather
Returns
-------
torch.Tensor
(B, C, npoint) tensor
"""
_, C, N = features.size()
ctx.for_backwards = (idx, C, N)
return pointnet2.gather_points(features, idx)
@staticmethod
def backward(ctx, grad_out):
idx, C, N = ctx.for_backwards
grad_features = pointnet2.gather_points_grad(grad_out.contiguous(), idx, N)
return grad_features, None
gather_operation = GatherOperation.apply
class ThreeNN(Function):
@staticmethod
def forward(ctx, unknown, known):
# type: (Any, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]
r"""
Find the three nearest neighbors of unknown in known
Parameters
----------
unknown : torch.Tensor
(B, n, 3) tensor of known features
known : torch.Tensor
(B, m, 3) tensor of unknown features
Returns
-------
dist : torch.Tensor
(B, n, 3) l2 distance to the three nearest neighbors
idx : torch.Tensor
(B, n, 3) index of 3 nearest neighbors
"""
dist2, idx = pointnet2.three_nn(unknown, known)
return torch.sqrt(dist2), idx
@staticmethod
def backward(ctx, a=None, b=None):
return None, None
three_nn = ThreeNN.apply
class ThreeInterpolate(Function):
@staticmethod
def forward(ctx, features, idx, weight):
# type(Any, torch.Tensor, torch.Tensor, torch.Tensor) -> Torch.Tensor
r"""
Performs weight linear interpolation on 3 features
Parameters
----------
features : torch.Tensor
(B, c, m) Features descriptors to be interpolated from
idx : torch.Tensor
(B, n, 3) three nearest neighbors of the target features in features
weight : torch.Tensor
(B, n, 3) weights
Returns
-------
torch.Tensor
(B, c, n) tensor of the interpolated features
"""
B, c, m = features.size()
n = idx.size(1)
ctx.three_interpolate_for_backward = (idx, weight, m)
return pointnet2.three_interpolate(features, idx, weight)
@staticmethod
def backward(ctx, grad_out):
# type: (Any, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
r"""
Parameters
----------
grad_out : torch.Tensor
(B, c, n) tensor with gradients of ouputs
Returns
-------
grad_features : torch.Tensor
(B, c, m) tensor with gradients of features
None
None
"""
idx, weight, m = ctx.three_interpolate_for_backward
grad_features = pointnet2.three_interpolate_grad(
grad_out.contiguous(), idx, weight, m
)
return grad_features, None, None
three_interpolate = ThreeInterpolate.apply
class GroupingOperation(Function):
@staticmethod
def forward(ctx, features, idx):
# type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor
r"""
Parameters
----------
features : torch.Tensor
(B, C, N) tensor of features to group
idx : torch.Tensor
(B, npoint, nsample) tensor containing the indicies of features to group with
Returns
-------
torch.Tensor
(B, C, npoint, nsample) tensor
"""
B, nfeatures, nsample = idx.size()
_, C, N = features.size()
ctx.for_backwards = (idx, N)
return pointnet2.group_points(features, idx)
@staticmethod
def backward(ctx, grad_out):
# type: (Any, torch.tensor) -> Tuple[torch.Tensor, torch.Tensor]
r"""
Parameters
----------
grad_out : torch.Tensor
(B, C, npoint, nsample) tensor of the gradients of the output from forward
Returns
-------
torch.Tensor
(B, C, N) gradient of the features
None
"""
idx, N = ctx.for_backwards
grad_features = pointnet2.group_points_grad(grad_out.contiguous(), idx, N)
return grad_features, None
grouping_operation = GroupingOperation.apply
class BallQuery(Function):
@staticmethod
def forward(ctx, radius, nsample, xyz, new_xyz):
# type: (Any, float, int, torch.Tensor, torch.Tensor) -> torch.Tensor
r"""
Parameters
----------
radius : float
radius of the balls
nsample : int
maximum number of features in the balls
xyz : torch.Tensor
(B, N, 3) xyz coordinates of the features
new_xyz : torch.Tensor
(B, npoint, 3) centers of the ball query
Returns
-------
torch.Tensor
(B, npoint, nsample) tensor with the indicies of the features that form the query balls
"""
return pointnet2.ball_query(new_xyz, xyz, radius, nsample)
@staticmethod
def backward(ctx, a=None):
return None, None, None, None
ball_query = BallQuery.apply
class QueryAndGroup(nn.Module):
r"""
Groups with a ball query of radius
Parameters
---------
radius : float32
Radius of ball
nsample : int32
Maximum number of features to gather in the ball
"""
def __init__(self, radius, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, sample_uniformly=False, ret_unique_cnt=False):
# type: (QueryAndGroup, float, int, bool) -> None
super(QueryAndGroup, self).__init__()
self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
self.ret_grouped_xyz = ret_grouped_xyz
self.normalize_xyz = normalize_xyz
self.sample_uniformly = sample_uniformly
self.ret_unique_cnt = ret_unique_cnt
if self.ret_unique_cnt:
assert(self.sample_uniformly)
def forward(self, xyz, new_xyz, features=None):
# type: (QueryAndGroup, torch.Tensor. torch.Tensor, torch.Tensor) -> Tuple[Torch.Tensor]
r"""
Parameters
----------
xyz : torch.Tensor
xyz coordinates of the features (B, N, 3)
new_xyz : torch.Tensor
centriods (B, npoint, 3)
features : torch.Tensor
Descriptors of the features (B, C, N)
Returns
-------
new_features : torch.Tensor
(B, 3 + C, npoint, nsample) tensor
"""
idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
if self.sample_uniformly:
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
for i_batch in range(idx.shape[0]):
for i_region in range(idx.shape[1]):
unique_ind = torch.unique(idx[i_batch, i_region, :])
num_unique = unique_ind.shape[0]
unique_cnt[i_batch, i_region] = num_unique
sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
idx[i_batch, i_region, :] = all_ind
xyz_trans = xyz.transpose(1, 2).contiguous()
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
if self.normalize_xyz:
grouped_xyz /= self.radius
if features is not None:
grouped_features = grouping_operation(features, idx)
if self.use_xyz:
new_features = torch.cat(
[grouped_xyz, grouped_features], dim=1
) # (B, C + 3, npoint, nsample)
else:
new_features = grouped_features
else:
assert (
self.use_xyz
), "Cannot have not features and not use xyz as a feature!"
new_features = grouped_xyz
ret = [new_features]
if self.ret_grouped_xyz:
ret.append(grouped_xyz)
if self.ret_unique_cnt:
ret.append(unique_cnt)
if len(ret) == 1:
return ret[0]
else:
return tuple(ret)
class GroupAll(nn.Module):
r"""
Groups all features
Parameters
---------
"""
def __init__(self, use_xyz=True, ret_grouped_xyz=False):
# type: (GroupAll, bool) -> None
super(GroupAll, self).__init__()
self.use_xyz = use_xyz
def forward(self, xyz, new_xyz, features=None):
# type: (GroupAll, torch.Tensor, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor]
r"""
Parameters
----------
xyz : torch.Tensor
xyz coordinates of the features (B, N, 3)
new_xyz : torch.Tensor
Ignored
features : torch.Tensor
Descriptors of the features (B, C, N)
Returns
-------
new_features : torch.Tensor
(B, C + 3, 1, N) tensor
"""
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
if features is not None:
grouped_features = features.unsqueeze(2)
if self.use_xyz:
new_features = torch.cat(
[grouped_xyz, grouped_features], dim=1
) # (B, 3 + C, 1, N)
else:
new_features = grouped_features
else:
new_features = grouped_xyz
if self.ret_grouped_xyz:
return new_features, grouped_xyz
else:
return new_features
class CylinderQuery(Function):
@staticmethod
def forward(ctx, radius, hmin, hmax, nsample, xyz, new_xyz, rot):
# type: (Any, float, float, float, int, torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
r"""
Parameters
----------
radius : float
radius of the cylinders
hmin, hmax : float
endpoints of cylinder height in x-rotation axis
nsample : int
maximum number of features in the cylinders
xyz : torch.Tensor
(B, N, 3) xyz coordinates of the features
new_xyz : torch.Tensor
(B, npoint, 3) centers of the cylinder query
rot: torch.Tensor
(B, npoint, 9) flatten rotation matrices from
cylinder frame to world frame
Returns
-------
torch.Tensor
(B, npoint, nsample) tensor with the indicies of the features that form the query balls
"""
return pointnet2.cylinder_query(new_xyz, xyz, rot, radius, hmin, hmax, nsample)
@staticmethod
def backward(ctx, a=None):
return None, None, None, None, None, None, None
cylinder_query = CylinderQuery.apply
class CylinderQueryAndGroup(nn.Module):
r"""
Groups with a cylinder query of radius and height
Parameters
---------
radius : float32
Radius of cylinder
hmin, hmax: float32
endpoints of cylinder height in x-rotation axis
nsample : int32
Maximum number of features to gather in the ball
"""
def __init__(self, radius, hmin, hmax, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, rotate_xyz=True, sample_uniformly=False, ret_unique_cnt=False):
super(CylinderQueryAndGroup, self).__init__()
self.radius, self.nsample, self.hmin, self.hmax, = radius, nsample, hmin, hmax
self.use_xyz = use_xyz
self.ret_grouped_xyz = ret_grouped_xyz
self.normalize_xyz = normalize_xyz
self.rotate_xyz = rotate_xyz
self.sample_uniformly = sample_uniformly
self.ret_unique_cnt = ret_unique_cnt
if self.ret_unique_cnt:
assert(self.sample_uniformly)
def forward(self, xyz, new_xyz, rot, features=None):
r"""
Parameters
----------
xyz : torch.Tensor
xyz coordinates of the features (B, N, 3)
new_xyz : torch.Tensor
centriods (B, npoint, 3)
rot : torch.Tensor
rotation matrices (B, npoint, 3, 3)
features : torch.Tensor
Descriptors of the features (B, C, N)
Returns
-------
new_features : torch.Tensor
(B, 3 + C, npoint, nsample) tensor
"""
B, npoint, _ = new_xyz.size()
idx = cylinder_query(self.radius, self.hmin, self.hmax, self.nsample, xyz, new_xyz, rot.view(B, npoint, 9))
if self.sample_uniformly:
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
for i_batch in range(idx.shape[0]):
for i_region in range(idx.shape[1]):
unique_ind = torch.unique(idx[i_batch, i_region, :])
num_unique = unique_ind.shape[0]
unique_cnt[i_batch, i_region] = num_unique
sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
idx[i_batch, i_region, :] = all_ind
xyz_trans = xyz.transpose(1, 2).contiguous()
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
if self.normalize_xyz:
grouped_xyz /= self.radius
if self.rotate_xyz:
grouped_xyz_ = grouped_xyz.permute(0, 2, 3, 1).contiguous() # (B, npoint, nsample, 3)
grouped_xyz_ = torch.matmul(grouped_xyz_, rot)
grouped_xyz = grouped_xyz_.permute(0, 3, 1, 2).contiguous()
if features is not None:
grouped_features = grouping_operation(features, idx)
if self.use_xyz:
new_features = torch.cat(
[grouped_xyz, grouped_features], dim=1
) # (B, C + 3, npoint, nsample)
else:
new_features = grouped_features
else:
assert (
self.use_xyz
), "Cannot have not features and not use xyz as a feature!"
new_features = grouped_xyz
ret = [new_features]
if self.ret_grouped_xyz:
ret.append(grouped_xyz)
if self.ret_unique_cnt:
ret.append(unique_cnt)
if len(ret) == 1:
return ret[0]
else:
return tuple(ret)

View File

@@ -0,0 +1,298 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''
# import torch
import torch.nn as nn
from typing import List, Tuple
class SharedMLP(nn.Sequential):
def __init__(
self,
args: List[int],
*,
bn: bool = False,
activation=nn.ReLU(inplace=True),
preact: bool = False,
first: bool = False,
name: str = ""
):
super().__init__()
for i in range(len(args) - 1):
self.add_module(
name + 'layer{}'.format(i),
Conv2d(
args[i],
args[i + 1],
bn=(not first or not preact or (i != 0)) and bn,
activation=activation
if (not first or not preact or (i != 0)) else None,
preact=preact
)
)
class _BNBase(nn.Sequential):
def __init__(self, in_size, batch_norm=None, name=""):
super().__init__()
self.add_module(name + "bn", batch_norm(in_size))
nn.init.constant_(self[0].weight, 1.0)
nn.init.constant_(self[0].bias, 0)
class BatchNorm1d(_BNBase):
def __init__(self, in_size: int, *, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
class BatchNorm2d(_BNBase):
def __init__(self, in_size: int, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
class BatchNorm3d(_BNBase):
def __init__(self, in_size: int, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
class _ConvBase(nn.Sequential):
def __init__(
self,
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=None,
batch_norm=None,
bias=True,
preact=False,
name=""
):
super().__init__()
bias = bias and (not bn)
conv_unit = conv(
in_size,
out_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=bias
)
init(conv_unit.weight)
if bias:
nn.init.constant_(conv_unit.bias, 0)
if bn:
if not preact:
bn_unit = batch_norm(out_size)
else:
bn_unit = batch_norm(in_size)
if preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
self.add_module(name + 'conv', conv_unit)
if not preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
class Conv1d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: int = 1,
stride: int = 1,
padding: int = 0,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = ""
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv1d,
batch_norm=BatchNorm1d,
bias=bias,
preact=preact,
name=name
)
class Conv2d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int] = (1, 1),
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = ""
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv2d,
batch_norm=BatchNorm2d,
bias=bias,
preact=preact,
name=name
)
class Conv3d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int, int] = (1, 1, 1),
stride: Tuple[int, int, int] = (1, 1, 1),
padding: Tuple[int, int, int] = (0, 0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = ""
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv3d,
batch_norm=BatchNorm3d,
bias=bias,
preact=preact,
name=name
)
class FC(nn.Sequential):
def __init__(
self,
in_size: int,
out_size: int,
*,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=None,
preact: bool = False,
name: str = ""
):
super().__init__()
fc = nn.Linear(in_size, out_size, bias=not bn)
if init is not None:
init(fc.weight)
if not bn:
nn.init.constant_(fc.bias, 0)
if preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(in_size))
if activation is not None:
self.add_module(name + 'activation', activation)
self.add_module(name + 'fc', fc)
if not preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(out_size))
if activation is not None:
self.add_module(name + 'activation', activation)
def set_bn_momentum_default(bn_momentum):
def fn(m):
if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
m.momentum = bn_momentum
return fn
class BNMomentumScheduler(object):
def __init__(
self, model, bn_lambda, last_epoch=-1,
setter=set_bn_momentum_default
):
if not isinstance(model, nn.Module):
raise RuntimeError(
"Class '{}' is not a PyTorch nn Module".format(
type(model).__name__
)
)
self.model = model
self.setter = setter
self.lmbd = bn_lambda
self.step(last_epoch + 1)
self.last_epoch = last_epoch
def step(self, epoch=None):
if epoch is None:
epoch = self.last_epoch + 1
self.last_epoch = epoch
self.model.apply(self.setter(self.lmbd(epoch)))

View File

@@ -0,0 +1,61 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
import glob
import os
ROOT = os.path.dirname(os.path.abspath(__file__))
_ext_src_root = "_ext_src"
_ext_cpp_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root))
_ext_cu_sources = glob.glob("{}/src/*.cu".format(_ext_src_root))
_ext_cuda_sources = _ext_cpp_sources + _ext_cu_sources
def is_cuda_available():
# return False
try:
import torch
return torch.cuda.is_available() and hasattr(torch.utils.cpp_extension, "CUDAExtension")
except (ImportError, AttributeError, RuntimeError):
return False
_ext_modules = []
if is_cuda_available():
_ext_modules.append(
CUDAExtension(
name='pointnet2.pointnet2',
sources=_ext_cuda_sources,
extra_compile_args={
"cxx": ["-O2", "-DWITH_CUDA", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
"nvcc": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
},
)
)
else:
_ext_modules.append(
CppExtension(
name='pointnet2.pointnet2',
sources=_ext_cpp_sources,
extra_compile_args={
"cxx": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
},
)
)
setup(
name='pointnet2',
ext_modules=_ext_modules,
cmdclass={
'build_ext': BuildExtension
}
)

View File

@@ -0,0 +1,28 @@
import os
# 启用2个关键CUDA调试环境变量
os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # 强制CUDA核函数同步执行错误信息实时返回
os.environ["TORCH_USE_CUDA_DSA"] = "1"
import torch
import pointnet2
# 1. 构造简单的点云张量(避免空张量、形状异常)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print("使用设备:", device)
# 点云数据:(batch_size, 3, num_points)
points = torch.randn(1, 3, 1024, device=device, dtype=torch.float32)
# 中心点数据:(batch_size, 3, num_centers)
centers = torch.randn(1, 3, 128, device=device, dtype=torch.float32)
# 半径和近邻数(符合扩展函数预期)
radius = 0.1
k = 32
try:
# 调用扩展函数
idx = pointnet2.ball_query(centers, points, radius, k)
print("扩展函数执行成功,输出形状:", idx.shape)
print("扩展函数执行正常,无内存错误!")
except Exception as e:
print(f"扩展函数执行失败:{e}")

View File

@@ -6,9 +6,7 @@ from rclpy.node import Node
import numpy as np
import open3d as o3d
from ..utils import read_calibration_mat
import yaml
share_dir = get_package_share_directory('vision_detect')
@@ -16,7 +14,14 @@ __all__ = [
"ConfigBase"
]
class ConfigBase(Node):
SHARE_DIR = get_package_share_directory('vision_detect')
with open(os.path.join(
SHARE_DIR, "configs/error_configs/report_logging_define.json"), "r"
) as f:
WARNING_LOG_MAP = json.load(f)["warring"]
def __init__(self, name):
super().__init__(name)
"""init parameter"""
@@ -53,9 +58,13 @@ class ConfigBase(Node):
self.camera_size = None
self.position = None
self.e2e_model = None
self.map1 = self.map2 = None
self.calculate_function = None
self.fx = self.fy = 0.5
self.camera_data = {}
self.distance = 1500
@@ -82,7 +91,7 @@ class ConfigBase(Node):
self.detect_mode = self.configs['detect_mode']
self.calculate_mode = self.configs['calculate_mode']
self._read_calibration_mat()
# self._read_calibration_mat()
if self.get_camera_mode == "Service":
self.service_name = self.configs["Service_configs"]["service_name"]
@@ -126,29 +135,10 @@ class ConfigBase(Node):
os.path.join(share_dir, self.calculate_configs['complete_model_path'])
)
self.calculate_configs["source"] = source
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
self.calculate_configs = self.configs['E2E_configs']
else:
self.get_logger().warning("Unknown calculate_mode, use PCA")
self.calculate_configs = self.configs['PCA_configs']
self.get_logger().info("Get parameters done")
def _read_calibration_mat(self):
eye_in_left_hand_path = os.path.join(share_dir, self.configs["calibration"]["left_hand"])
eye_in_right_hand_path = os.path.join(share_dir, self.configs["calibration"]["right_hand"])
eye_to_hand_path = os.path.join(share_dir, self.configs["calibration"]["head"])
self.eye_in_left_hand_mat, info, sign = read_calibration_mat(eye_in_left_hand_path)
if not sign:
self.get_logger().warning(f"left_mat: {info}")
self.eye_in_right_hand_mat, info, sign = read_calibration_mat(eye_in_right_hand_path)
if not sign:
self.get_logger().warning(f"right_mat: {info}")
self.eye_to_hand_mat, info, sign = read_calibration_mat(eye_to_hand_path)
if not sign:
self.get_logger().warning(f"head_mat: {info}")
self.get_logger().info("Read calibration mat file done")

View File

@@ -14,10 +14,18 @@ from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
from ..utils import *
from ..utils import distortion_correction, crop_imgs_box_xywh, draw_box, draw_mask, rmat2quat, \
crop_imgs_mask, get_map, create_o3d_pcd, save_img, refine_grasp_pose
from .init_node import InitBase
E2E_SIGN = True
try:
from ..utils import calculate_pose_e2e
except (ImportError, ModuleNotFoundError):
E2E_SIGN = False
class DetectNode(InitBase):
"""Detect Node"""
def __init__(self, name):
@@ -55,10 +63,11 @@ class DetectNode(InitBase):
"""同步回调函数"""
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
img, pose_list = self.function(color_img_cv, depth_img_cv)
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
# masks为空结束这一帧
if img is None:
@@ -83,7 +92,8 @@ class DetectNode(InitBase):
self.pub_pose_list.publish(pose_list_all)
def _service_callback(self, request, response):
# self.get_logger().info("service callback start")
print(" \n ")
print("========================== < start > ==========================")
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
@@ -95,23 +105,24 @@ class DetectNode(InitBase):
response.success = False
response.info = "Camera data have not objects"
response.objects = []
# self.get_logger().info("service callback done")
print("=========================== < end > ===========================")
return response
response.success = False
response.info = f"Name is wrong: {request.camera_position}"
response.objects = []
# self.get_logger().info("service callback done")
print("=========================== < end > ===========================")
return response
if request.camera_position == 'left':
self.hand_eye_mat = self.eye_in_left_hand_mat
self.p = "left"
elif request.camera_position == 'right':
self.hand_eye_mat = self.eye_in_right_hand_mat
self.p = "right"
else:
self.hand_eye_mat = self.eye_to_hand_mat
self.p = "head"
self.camera_size = [color_img_ros.width, color_img_ros.height]
@@ -121,15 +132,15 @@ class DetectNode(InitBase):
map1, map2, self.k = get_map(self.k, d, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_list = self.function(color_img_cv, depth_img_cv)
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
# masks为空结束这一帧
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
response.info = "pose dict is empty"
response.success = False
response.objects = []
if pose_list:
else:
response.info = "Success get pose"
response.success = True
for item in pose_list:
@@ -141,19 +152,23 @@ class DetectNode(InitBase):
grab_width = item["grab_width"]
)
)
else:
response.info = "pose dict is empty"
response.success = False
response.objects = []
# self.get_logger().info("service callback done")
# publish detect image
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
print("=========================== < end > ===========================")
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
self.get_logger().info('start')
pose_list = []
home = os.path.expanduser("~")
save_dir = os.path.join(home, "images")
save_img(rgb_img.copy(), "orign_image.png", save_dir=save_dir, mark_cur_time=True)
# Get Predict Results
time1 = time.time()
results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
@@ -162,8 +177,7 @@ class DetectNode(InitBase):
# Get masks
if result.masks is None or len(result.masks) == 0:
self.get_logger().info(f"Detect object num: 0")
return None, None
return None, 1000, False
masks = result.masks.data.cpu().numpy()
# Get boxes
@@ -171,81 +185,96 @@ class DetectNode(InitBase):
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
sorted_index = np.lexsort((-y_centers, x_centers))
masks = masks[sorted_index]
boxes = boxes[sorted_index]
class_ids = class_ids[sorted_index]
time3 = time.time()
self.get_logger().info(f"Detect object num: {len(masks)}")
full_points = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
)
time_full_points = time.time()
print(f"create full points: {(time_full_points - time3) * 1000}")
if self.calculate_mode == "E2E" and self.detect_mode == 'Detect' and E2E_SIGN:
self.calculate_configs["orign_point_clouds"] = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
)
for i, (mask, box) in enumerate(zip(masks, boxes)):
imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_img, mask], box, True)
depth_crop, mask_crop = imgs_crop
if depth_crop is None:
self.get_logger().warning("Depth crop is None")
continue
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, grab_width = self.calculate_function(
mask_crop,
depth_crop,
intrinsics,
calculate_grab_width=True,
**self.calculate_configs
rmat, grab_width, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics,
calculate_grab_width=True, **self.calculate_configs
)
if rmat is None:
self.get_logger().warning("Object point cloud have too many noise")
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
self.get_logger().info(f"grab_width: {grab_width}")
rmat = self.hand_eye_mat @ rmat
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_list.append(
{
"class_id": int(class_ids[i]),
"class_name": labels[class_ids[i]],
"pose": pose,
"grab_width": grab_width
}
time_c = time.time()
if self.p == "left" or self.p == "right":
position = rmat[0:3, 3]
rmat, sign = refine_grasp_pose(
full_points, self.calculate_configs.get("voxel_size"), position,
search_mode=True
)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
time_e = time.time()
print(f"Refine: {(time_e - time_c) * 1000} ms")
self.get_logger().info(f"grab_width: {grab_width}")
x, y, z, rw, rx, ry, rz = rmat2quat(self.hand_eye_mat @ rmat)
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
self.get_logger().info(f"xyz, wxyz: {x, y, z, rw, rx, ry, rz}")
pose_list.append(
{
"class_id": int(class_ids[i]),
"class_name": labels[class_ids[i]],
"pose": pose,
"grab_width": grab_width
}
)
time4 = time.time()
if not pose_list:
return None, 1003, False
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
# self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
self.get_logger().info('end')
# mask_img and box_img is or not output
if self.output_boxes and not self.output_masks:
home = os.path.expanduser("~")
save_path = os.path.join(home, "detect_image.png")
draw_box(rgb_img, result, save_path=save_path)
# draw_box(rgb_img, result, save_path=False)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
elif self.output_boxes and self.output_masks:
draw_box(rgb_img, result)
if not self.output_boxes and not self.output_masks:
return None, pose_list, True
if self.output_boxes:
draw_box(rgb_img, result, save_dir=save_dir, mark_time = True)
if self.output_masks:
draw_mask(rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
elif not self.output_boxes and self.output_masks:
draw_mask(rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
else:
return None, pose_list
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
def _seg_color(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
@@ -268,31 +297,26 @@ class DetectNode(InitBase):
depth_crop = imgs_crop[0]
if depth_crop is None:
self.get_logger().warning("depth_crop is None")
return None, None
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
return None, 1001, False
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, _ = self.calculate_function(
mask_crop,
depth_crop,
intrinsics,
**self.calculate_configs
rmat, _, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics, **self.calculate_configs
)
if rmat is None:
self.get_logger().warning("Color Area point cloud have too many noise")
return None, None
if not sign:
# self.get_logger().warning("Color Area point cloud have too many noise")
return None, 1100, False
rmat = self.hand_eye_mat @ rmat
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
T = self.hand_eye_mat @ rmat
x, y, z, rw, rx, ry, rz = rmat2quat(T)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
@@ -307,12 +331,13 @@ class DetectNode(InitBase):
}
)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
def _seg_crossboard(self, rgb_img, depth_img):
pose_list = []
rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(rgb_img_gray, self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
ret, corners = cv2.findChessboardCorners(rgb_img_gray,
self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
if ret:
# 角点亚像素精确化(提高标定精度)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
@@ -337,64 +362,50 @@ class DetectNode(InitBase):
depth_crop = img_crop[0]
if depth_crop is None:
return None, None
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
return None, 1001, False
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, _ = self.calculate_function(
mask_crop,
depth_crop,
intrinsics,
**self.calculate_configs
rmat, _, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics, **self.calculate_configs
)
if rmat is None:
self.get_logger().warning("Corssboard point cloud have too many noise")
return None, None
if not sign:
# self.get_logger().warning("Corssboard point cloud have too many noise")
return None, 1100, False
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_list.append(
{
"class_id": int(99),
"class_name": 'crossboard',
"pose": pose,
"grab_width": 0.0
}
)
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_list.append(
{
"class_id": int(99),
"class_name": 'crossboard',
"pose": pose,
"grab_width": 0.0
}
)
cv2.putText(
rgb_img,
f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
(0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 0),
2
rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
(0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
)
cv2.putText(
rgb_img,
f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
(0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 0),
2
rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
(0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None, True
if __name__ == '__main__':

View File

@@ -1,3 +1,4 @@
import os
from ament_index_python.packages import get_package_share_directory
import torch
@@ -12,15 +13,22 @@ from sensor_msgs.msg import Image, CameraInfo
from interfaces.msg import PoseArrayClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
from ..utils import calculate_pose_pca, calculate_pose_icp
from ..utils import calculate_pose_pca, calculate_pose_icp, read_calibration_mat
from .config_node import ConfigBase
share_dir = get_package_share_directory('vision_detect')
E2E_SIGN = True
try:
from ..net import GraspNet
from ..utils import calculate_pose_e2e
except (ImportError, ModuleNotFoundError):
E2E_SIGN = False
class InitBase(ConfigBase):
def __init__(self, name):
super().__init__(name)
self.future = Future()
self._read_calibration_mat()
if self.get_camera_mode == "Service":
self._init_service()
@@ -53,6 +61,19 @@ class InitBase(ConfigBase):
self.calculate_function = calculate_pose_pca
elif self.calculate_mode == "ICP" and self.detect_mode == 'Detect':
self.calculate_function = calculate_pose_icp
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
self.calculate_function = calculate_pose_e2e
self.e2e_model = GraspNet(seed_feat_dim=512, is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.e2e_model.to(device)
self.e2e_model.load_state_dict(
torch.load(
os.path.join(self.SHARE_DIR, self.calculate_configs["checkpoint_path"]),
map_location=device
)
)
self.e2e_model.eval()
self.calculate_configs["model"] = self.e2e_model
else:
self.calculate_function = calculate_pose_pca
@@ -145,6 +166,28 @@ class InitBase(ConfigBase):
self.get_logger().info("Initialize subscriber done")
def _read_calibration_mat(self):
eye_in_left_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["left_hand"])
eye_in_right_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["right_hand"])
eye_to_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["head"])
self.eye_in_left_hand_mat, info, sign = read_calibration_mat(eye_in_left_hand_path)
self.get_logger().info(f"left_hand_mat: {self.eye_in_left_hand_mat}")
if not sign:
self.get_logger().warning(f"left_mat: {info}")
self.eye_in_right_hand_mat, info, sign = read_calibration_mat(eye_in_right_hand_path)
self.get_logger().info(f"right_hand_mat: {self.eye_in_right_hand_mat}")
if not sign:
self.get_logger().warning(f"right_mat: {info}")
self.eye_to_hand_mat, info, sign = read_calibration_mat(eye_to_hand_path)
self.get_logger().info(f"head_mat: {self.eye_to_hand_mat}")
if not sign:
self.get_logger().warning(f"head_mat: {info}")
self.get_logger().info("Read calibration mat file done")
def _camera_info_callback(self, msg: CameraInfo):
pass

View File

@@ -2,3 +2,5 @@ from .image_tools import *
from .draw_tools import *
from .calculate_tools import *
from .file_tools import *
from .pointclouds_tools import *
from .grasp_refine import *

View File

@@ -1,18 +1,35 @@
"""计算工具"""
import logging
import cv2
# import cv2
import numpy as np
import open3d as o3d
import transforms3d as tfs
from .object_icp import object_icp
from .object_icp import *
from .pointclouds_tools import *
E2E_SIGN = True
try:
import torch
import MinkowskiEngine as ME
import collections.abc as container_abcs
from ..net import pred_decode, ModelFreeCollisionDetector
except (ImportError, OSError, RuntimeError):
logging.warning("ImportError or OSError or RuntimeError")
E2E_SIGN = False
__all__ = [
"calculate_pose_pca", "calculate_pose_icp",
"rmat2quat", "quat2rmat",
"rmat2quat", "quat2rmat", "calculate_pose_e2e"
]
if not E2E_SIGN:
__all__.remove("calculate_pose_e2e")
def pca(data: np.ndarray, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
@@ -40,30 +57,52 @@ def calculate_pose_pca(
calculate_grab_width: bool = False,
**kwargs
):
"""点云主成分分析法计算位态"""
"""点云主成分分析法计算位态
----------
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool
**kwargs:
output:
rmat: np.ndarray (4, 4)
grab_width: list
sign: bool
"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
if not sign:
return point_cloud, [], False
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0),
)
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
#
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
# depth=depth_o3d,
# intrinsic=intrinsics,
# depth_scale=kwargs.get("depth_scale", 1000.0),
# depth_trunc=kwargs.get("depth_trunc", 3.0),
# )
#
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
# if not sign:
# return 1100, [], False
#
# if len(point_cloud.points) == 0:
# # logging.warning("point_cloud is empty")
# return 1101, [], False
point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.005))
if point_cloud is None:
return None, [0.0, 0.0, 0.0]
if len(point_cloud.points) == 0:
logging.warning("clean_pcd is empty")
return np.eye(4), [0.0, 0.0, 0.0]
center = point_cloud.get_center()
x, y, z = center
if calculate_grab_width:
if np.asarray(point_cloud.points).shape[0] < 4:
# logging.warning("点数不足,不能算 OBB")
return 1200, [0.0, 0.0, 0.0], False
obb = point_cloud.get_oriented_bounding_box()
x, y, z = obb.center
extent = obb.extent
order = np.argsort(-extent)
@@ -74,20 +113,16 @@ def calculate_pose_pca(
v = v[:, order]
if v is None:
logging.warning("PCA output v is None")
return np.eye(4), [0.0, 0.0, 0.0]
return 1201, [0.0, 0.0, 0.0], False
grab_width = grab_width * 1.05
else:
center = point_cloud.get_center()
x, y, z = center
w, v = pca(np.asarray(point_cloud.points))
if w is None or v is None:
logging.warning("PCA output w or v is None")
return np.eye(4), 0.0
return 1201, 0.0, False
grab_width = [0.0, 0.0, 0.0]
vx, vy, vz = v[:,0], v[:,1], v[:,2]
@@ -105,7 +140,7 @@ def calculate_pose_pca(
# draw(point_cloud_u, rmat)
# draw(point_cloud, rmat)
return rmat, grab_width
return rmat, grab_width, True
def calculate_pose_icp(
@@ -119,22 +154,26 @@ def calculate_pose_icp(
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
if not sign:
return point_cloud, [], False
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0)
)
point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
if point_cloud is None:
return None, [0.0, 0.0, 0.0]
if len(point_cloud.points) == 0:
logging.warning("clean_pcd is empty")
return np.eye(4), [0.0, 0.0, 0.0]
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
#
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
# depth=depth_o3d,
# intrinsic=intrinsics,
# depth_scale=kwargs.get("depth_scale", 1000.0),
# depth_trunc=kwargs.get("depth_trunc", 3.0)
# )
#
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
# if not sign:
# return 1100, [], False
#
# if len(point_cloud.points) == 0:
# # logging.warning("clean_pcd is empty")
# return 1101, [0.0, 0.0, 0.0], False
if calculate_grab_width:
pass
@@ -148,97 +187,190 @@ def calculate_pose_icp(
)
grab_width = [0.0, 0.0, 0.0]
return rmat, grab_width
return rmat, grab_width, True
def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
"""点云去噪"""
point_cloud = point_cloud.remove_non_finite_points()
while True:
down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size * 0.5)
point_num = len(down_pcd.points)
if point_num <= 1000:
break
voxel_size *= 2
if E2E_SIGN:
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
[d["feats"] for d in list_data])
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
coordinates_batch, features_batch, return_index=True, return_inverse=True)
# logging.fatal("point_cloud_denoising: point_num={}".format(len(point_cloud.points)))
# logging.fatal("point_cloud_denoising: point_num={}".format(point_num))
res = {
"coors": coordinates_batch,
"feats": features_batch,
"quantize2original": quantize2original,
"point_clouds": torch.stack([torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
}
return res
# 半径滤波
clean_pcd, _ = down_pcd.remove_radius_outlier(
nb_points=10,
radius=voxel_size * 5
def calculate_pose_e2e(
mask,
depth_img: np.ndarray,
intrinsics,
calculate_grab_width: bool = False,
**kwargs
):
"""
点云抓取姿态预测模型计算位态
-----
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool (abandon)
**kwargs:
"depth_scale": float
"depth_trunc": float
"voxel_size": float
"model":
"collision_thresh": float
"""
# logging.error("stage 1")
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
# 点云创建
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0),
)
point_cloud = point_cloud.remove_non_finite_points()
# 点云去噪过程
down_pcd = point_cloud.voxel_down_sample(voxel_size=kwargs.get("voxel_size", 0.002))
clean_pcd, _ = down_pcd.remove_radius_outlier(
nb_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
radius=kwargs.get("voxel_size", 0.002) * 10
)
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
nb_neighbors=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
std_ratio=2.0
)
points = np.asarray(clean_pcd.points)
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
labels = np.array(
clean_pcd.cluster_dbscan(
eps=kwargs.get("voxel_size", 0.002) * 10,
min_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3)
)
)
# 统计滤波
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
nb_neighbors=10,
std_ratio=2.0
)
# 点云簇过滤
points = np.asarray(clean_pcd.points)
cluster_label = set(labels)
point_cloud_clusters = []
for label in cluster_label:
if label == -1:
continue
idx = np.where(labels == label)[0]
point_cloud_cluster = clean_pcd.select_by_index(idx)
points_cluster_z = points[idx, 2]
z_avg = np.mean(points_cluster_z)
if z_avg < 0.2:
continue
point_cloud_clusters.append((point_cloud_cluster, z_avg))
# 过滤过近的点
points = np.asarray(clean_pcd.points)
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
point_cloud_clusters.sort(key=lambda x: x[1])
point_cloud = point_cloud_clusters[0][0]
# # 使用数量最大簇判定噪声强度
# _, counts = np.unique(labels[labels >= 0], return_counts=True)
# largest_cluster_ratio = counts.max() / len(labels)
# if largest_cluster_ratio < 0.5:
# return None
# 判断点云是否为空
if len(point_cloud.points) == 0:
return 1101, [0.0, 0.0, 0.0], False
labels = np.array(clean_pcd.cluster_dbscan(eps=voxel_size * 5, min_points=10))
if len(labels[labels >= 0]) == 0:
return clean_pcd
# # 点云补齐15000个点
# points = np.asarray(point_cloud.points)
# if len(points) >= 15000:
# idxs = np.random.choice(len(points), 15000, replace=False)
# else:
# idxs1 = np.arange(len(points))
# idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
# idxs = np.concatenate([idxs1, idxs2], axis=0)
# points = points[idxs]
# 使用距离最近簇作为物体
points = np.asarray(clean_pcd.points)
cluster_label = set(labels)
point_cloud_clusters = []
for label in cluster_label:
if label == -1:
continue
idx = np.where(labels == label)[0]
point_cloud_cluster = clean_pcd.select_by_index(idx)
points_cluster_z = points[idx, 2]
z_avg = np.mean(points_cluster_z)
if z_avg < 0.2:
continue
point_cloud_clusters.append((point_cloud_cluster, z_avg))
# 构建推理数据结构
ret_dict = {
'point_clouds': points.astype(np.float32),
'coors': points.astype(np.float32) / kwargs.get("voxel_size", 0.002),
'feats': np.ones_like(points).astype(np.float32),
}
batch_data = minkowski_collate_fn([ret_dict])
if len(point_cloud_clusters) == 0:
return clean_pcd
# 将数据置于对应的设备上
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for key in batch_data:
batch_data[key] = batch_data[key].to(device)
point_cloud_clusters.sort(key=lambda x: x[1])
clean_pcd = point_cloud_clusters[0][0]
logging.warning(f'points num: {len(points)}')
# 使用最近簇判断噪音强度
largest_cluster_ratio = len(clean_pcd.points) / point_num
if largest_cluster_ratio < 0.20:
return None
# 点云数量判断,是否返回
if batch_data['coors'].shape[0] < 128: # 例如 128 / 256
return 1300, [0.0], False
if batch_data["point_clouds"].shape[1] < 128: # 例如 128 / 256
return 1301, [0.0], False
return clean_pcd
# 梯度置0进入推理
with torch.no_grad():
end_points = kwargs.get("model")(batch_data)
if end_points is None:
return 1302, [0.0, 0.0, 0.0], False
grasp_preds = pred_decode(end_points)
# 推理结果后处理
preds = grasp_preds[0].detach().cpu().numpy()
sorted_index = np.argsort(-preds[:, 0])
preds = preds[sorted_index]
preds = preds[:10]
if kwargs.get("collision_thresh", 0.01) > 0:
cloud = kwargs.get("orign_point_clouds")
if cloud is not None and len(preds) > 0:
mfcdetector = ModelFreeCollisionDetector(
cloud,
voxel_size=kwargs.get("voxel_size", 0.002)
)
collision_mask = mfcdetector.detect(
preds, approach_dist=0.05,
collision_thresh=kwargs.get("collision_thresh", 0.01)
)
preds = preds[~collision_mask]
# logging.error("stage 8")
Rs = preds[:, 4:13].reshape(-1, 3, 3)
centers = preds[:, 13:16].reshape(-1, 3)
grab_width = preds[:, 1]
if not len(Rs):
return 1303, [0.0, 0.0, 0.0], False
# logging.error("stage 9")
rmat = []
for r, center in zip(Rs, centers):
vz, vx, vy = r[:, 0], r[:, 1], r[:, 2]
if vx[0] < 0:
vx = -vx
if vz[2] < 0:
vz = -vz
if not np.allclose(np.cross(vx, vy), vz):
vy = -vy
R = np.column_stack((vx, vy, vz))
rmat.append(tfs.affines.compose(np.squeeze(np.asarray(center)), R, [1, 1, 1]))
if len(rmat) == 0:
return 1304, [0.0, 0.0, 0.0], False
return rmat[0], [grab_width[0]], True
# def calculate_grab_width(mask, k, depth):
# """计算重心宽度"""
# mask = mask.astype(np.uint8) * 255
# contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#
# if not contours:
# return 0.0
#
# c = max(contours, key=cv2.contourArea)
# box = cv2.boxPoints(cv2.minAreaRect(c))
# if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
# point_diff = box[1] - box[0]
# else:
# point_diff = box[1] - box[2]
#
# grab_width = depth * np.sqrt(
# point_diff[0]**2 / k[0]**2 + point_diff[1]**2 / k[4]**2
# )
# return grab_width
def rmat2quat(rmat):
"""Convert rotation matrix to quaternion."""

View File

@@ -6,6 +6,7 @@ import cv2
import numpy as np
import open3d as o3d
from .image_tools import save_img
__all__ = [
@@ -16,7 +17,8 @@ def draw_box(
rgb_img: np.ndarray,
segmentation_result,
put_text: bool = True,
save_path: Union[bool, str] = False
save_dir: Union[bool, str] = False,
mark_time: bool = False
):
"""
绘制目标检测边界框
@@ -26,6 +28,12 @@ def draw_box(
class_ids = segmentation_result.boxes.cls.cpu().numpy()
labels = segmentation_result.names
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
sorted_index = np.lexsort((-y_centers, x_centers))
boxes = boxes[sorted_index]
class_ids = class_ids[sorted_index]
confidences = confidences[sorted_index]
for i, box in enumerate(boxes):
x_center, y_center, width, height = box[:4]
@@ -41,15 +49,8 @@ def draw_box(
cv2.putText(rgb_img, f"{i}", (p1[0] + 15, p1[1] + 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
if save_path:
if isinstance(save_path, str):
dir_path = os.path.dirname(save_path)
if dir_path:
os.makedirs(dir_path, exist_ok=True)
else:
home_path = os.path.expanduser("~")
save_path = os.path.join(home_path, "detect_color_img.png")
cv2.imwrite(save_path, rgb_img)
if save_dir:
save_img(rgb_img, "detect_color_img.png", save_dir, mark_time)
def draw_mask(rgb_img: np.ndarray, segmentation_result):

View File

@@ -0,0 +1,282 @@
import numpy as np
import transforms3d as tfs
# import open3d as o3d
__all__ = ["refine_grasp_pose"]
def collision_detector(
points: np.ndarray,
refine: np.ndarray,
volume: list[float],
iou: float = 0.001,
search_mode: bool = False,
**kwargs
) -> int:
"""
collision detector
-----
input:
points: np.ndarray (3, n)
refine: np.ndarray (3, ), Grab target poes coordinate system
volume: list [left, right]
iou : float
search_mode: bool, Default False
**kwargs:
"grab_width": float
"hand_size": list [width, height, length]
"left_size": list [thick, width, length]
"right_size": list [thick, width, length]
thick of gripper finger, width of gripper finger, length of gripper finger
output:
collision_code: int
"""
hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
grab_width = kwargs.get('grab_width', 0.10) * 0.95
x, y, z = refine
if not search_mode:
hand_top_box = (
(points[2] < z) & (points[2] > (z - hand_size[2]))
& (points[0] < (x - hand_size[1]*1/4)) & (points[0] > (x - hand_size[1]/2))
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
)
hand_center_box = (
(points[2] < z) & (points[2] > (z - hand_size[2]))
& (points[0] < (x + hand_size[1]*1/4)) & (points[0] > (x - hand_size[1]*1/4))
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
)
hand_bottom_box = (
(points[2] < z) & (points[2] > (z - hand_size[2]))
& (points[0] < (x + hand_size[1]/2)) & (points[0] > (x + hand_size[1]*1/4))
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
)
if (hand_top_box.any() and hand_bottom_box.any()) or hand_center_box.any():
return 1
if hand_bottom_box.any():
return 2
if hand_top_box.any():
return 3
left_finger_box = (
(points[2] < (z + left_size[2])) & (points[2] > z -0.05)
& (points[0] < (x + left_size[1]/2)) & (points[0] > (x - left_size[1]/2))
& (points[1] < (y + grab_width/2 + left_size[0])) & (points[1] > (y + grab_width/2))
)
right_finger_box = (
(points[2] < (z + right_size[2])) & (points[2] > z - 0.05)
& (points[0] < (x + right_size[1]/2)) & (points[0] > (x - right_size[1]/2))
& (points[1] < (y - grab_width/2)) & (points[1] > (y-(grab_width/2 + right_size[0])))
)
left_iou = left_finger_box.sum() / (volume[0]+1e-6)
right_iou = right_finger_box.sum() / (volume[1]+1e-6)
if left_iou > iou and right_iou > iou:
return 6
if left_iou > iou :
return 4
if right_iou > iou :
return 5
return 0
def refine_grasp_pose(
points: np.ndarray,
voxel_size: float,
target_position: np.ndarray,
expect_orientation: np.ndarray | None = None,
search_mode: bool = False,
**kwargs
):
"""
refine grasp pose
-----
input:
points: np.ndarray (n, 3), already complete voxel downsample
voxel_size: float
target_position: np.ndarray (3, ), grab target position
expect_orientation: np.ndarray (3, 3), expect grab target orientation
search_mode: bool, Default False
**kwargs:
"grab_width": float
"hand_size": list [width, height, length]
"left_size": list [thick, width, length]
"right_size": list [thick, width, length]
thick of gripper finger, width of gripper finger, length of gripper finger
output:
"""
if expect_orientation is None:
expect_orientation = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
refine = np.zeros(3)
grab_width = kwargs.get('grab_width', 0.10)
hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
left_volume = left_size[0] * left_size[1] * left_size[2] / (voxel_size**3)
right_volume = right_size[0] * right_size[1] * right_size[2] / (voxel_size**3)
points = points - target_position[None, :] # (n, 3) - (1, 3) = (n, 3)
points = expect_orientation.T @ points.T # (3, 3) @ (3, n) = (3, n)
points = points[:,
(points[2] < left_size[2]) & (points[2] > -(0.1 + hand_size[2]))
& (points[0] < (0.05 + hand_size[1]/2)) & (points[0] > -(0.05 + hand_size[1]/2))
& (points[1] < (0.05 + grab_width/2 + left_size[0]))
& (points[1] > -(0.05 + grab_width/2 + right_size[0]))
]
frist_sign = False
y_n = y_p = False
left_last = right_last = False
hand_num = left_num = right_num = 0
while hand_num < 20 and left_num < 10 and right_num < 10:
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], **kwargs
)
if collision_code == 0:
if y_p and not y_n:
refine[1] += 0.004
y_p = y_n = False
left_last = True
right_last = False
continue
if not y_p and y_n:
refine[1] -= 0.004
y_p = y_n = False
left_last = False
right_last = True
continue
if search_mode:
break
position = target_position + (expect_orientation @ refine.T).T
rmat = tfs.affines.compose(
np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
)
return rmat, True
if collision_code == 6:
return 1202, False
if collision_code == 1:
refine[2] -= 0.008
hand_num += 1
print("z + 0.008")
continue
if collision_code == 2:
if frist_sign:
print("z + 0.004")
refine[2] -= 0.004
frist_sign = True
continue
refine[0] -= 0.002
hand_num += 1
print("x - 0.002")
continue
if collision_code == 3:
if frist_sign:
print("z + 0.004")
refine[2] -= 0.004
frist_sign = True
continue
refine[0] += 0.002
hand_num += 1
print("x + 0.002")
continue
if collision_code == 4:
y_p = True
y_n = False
refine[1] += 0.004
left_num += 1
print("y + 0.004")
continue
if collision_code == 5:
y_p = False
y_n = True
refine[1] -= 0.004
right_num += 1
print("y - 0.004")
continue
else:
return 1202, False
if search_mode:
right_num = left_num = 0
if left_last and not right_last:
y_min = refine[1]
while left_num < 10:
left_num += 1
refine[1] += 0.004
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
)
if collision_code:
refine[1] = (refine[1] - 0.004 + y_min) / 2
break
else:
refine[1] = y_min + 0.2
print(f"left_num = {left_num}")
elif not left_last and right_last:
y_max = refine[1]
while right_num < 10:
right_num += 1
refine[1] -= 0.004
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
)
if collision_code:
refine[1] = (refine[1] + 0.004 + y_max) / 2
break
else:
refine[1] = y_max - 0.2
print(f"right_num = {right_num}")
elif not left_last and not right_last:
y_cur = refine[1]
while left_num < 6:
left_num += 1
refine[1] += 0.004
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
)
if collision_code:
y_max = refine[1] - 0.004
break
else:
y_max = refine[1] + 0.024
print(f"left_num = {left_num}")
refine[1] = y_cur
while right_num < 6:
right_num += 1
refine[1] -= 0.004
collision_code = collision_detector(
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
)
if collision_code:
refine[1] = (refine[1] + 0.004 + y_max) / 2
break
else:
refine[1] = (y_cur - 0.024 + y_max) / 2
print(f"right_num = {right_num}")
position = target_position + (expect_orientation @ refine.T).T
rmat = tfs.affines.compose(
np.squeeze(np.asarray(position)),
expect_orientation,
[1, 1, 1]
)
return rmat, True
return 1202, False

View File

@@ -1,3 +1,6 @@
import os
import time
import cv2
import logging
import numpy as np
@@ -5,7 +8,7 @@ import numpy as np
__all__ = [
"crop_imgs_box_xywh", "crop_imgs_box_xyxy", "crop_imgs_mask", "get_map",
"distortion_correction",
"distortion_correction", "save_img"
]
def crop_imgs_box_xywh(imgs: list, box, same_sign: bool = False):
@@ -154,3 +157,28 @@ def distortion_correction(
return undistorted_color, undistorted_depth
def save_img(img: np.ndarray, save_name, save_dir: str | None = None, mark_cur_time: bool = False):
"""
保存图像
input:
img: np.ndarray
- uint8, BGR (H, W, 3)
- uint16 single-channel
save_name: str
save_path: str | None
mark_cur_time: bool
"""
if isinstance(save_dir, str):
os.makedirs(save_dir, exist_ok=True)
save_path = os.path.join(save_dir, save_name)
else:
home_path = os.path.expanduser("~")
save_path = os.path.join(home_path, save_name)
if mark_cur_time:
cur_time = int(time.time() * 1000)
path, ext = os.path.splitext(save_path)
save_path = path + f'_{cur_time}' + ext
cv2.imwrite(save_path, img)

View File

@@ -0,0 +1,152 @@
# import logging
#
# import cv2
import numpy as np
import open3d as o3d
__all__ = ["create_o3d_pcd", "create_o3d_denoised_pcd"]
def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
"""点云去噪"""
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
point_num = len(down_pcd.points)
# 半径滤波
clean_pcd, _ = down_pcd.remove_radius_outlier(
nb_points=max(int(round(10 * voxel_size / 0.005)), 3),
radius=voxel_size * 10
)
# 统计滤波
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
nb_neighbors=max(int(round(10 * voxel_size / 0.005)), 3),
std_ratio=2.0
)
# 过滤过近的点
points = np.asarray(clean_pcd.points)
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
# # 使用数量最大簇判定噪声强度
# _, counts = np.unique(labels[labels >= 0], return_counts=True)
# largest_cluster_ratio = counts.max() / len(labels)
# if largest_cluster_ratio < 0.5:
# return None
labels = np.array(
clean_pcd.cluster_dbscan(
eps=voxel_size * 10,
min_points=max(int(round(10 * voxel_size / 0.005)), 3)
)
)
if len(labels[labels >= 0]) == 0:
return clean_pcd, True
# 使用距离最近簇作为物体
points = np.asarray(clean_pcd.points)
cluster_label = set(labels)
point_cloud_clusters = []
for label in cluster_label:
if label == -1:
continue
idx = np.where(labels == label)[0]
point_cloud_cluster = clean_pcd.select_by_index(idx)
points_cluster_z = points[idx, 2]
z_avg = np.mean(points_cluster_z)
if z_avg < 0.2:
continue
point_cloud_clusters.append((point_cloud_cluster, z_avg))
if len(point_cloud_clusters) == 0:
return clean_pcd
point_cloud_clusters.sort(key=lambda x: x[1])
clean_pcd = point_cloud_clusters[0][0]
# 使用最近簇判断噪音强度
largest_cluster_ratio = len(clean_pcd.points) / point_num
if largest_cluster_ratio < 0.05:
return 1100, False
return clean_pcd, True
def create_o3d_pcd(depth_img, camera_size, k, **kwargs):
"""
create o3d pcd
--------
input:
depth_img: np.ndarray
camera_size: list
k: np.ndarray | list
**kwargs:
"depth_scale": float
"depth_trunc": float
"voxel_size": float
output:
orign_points: o3d.geometry.PointCloud
"""
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(camera_size[0]),
int(camera_size[1]),
k[0], k[4], k[2], k[5]
)
depth_o3d = o3d.geometry.Image(depth_img.astype(np.uint16))
orign_point_clouds = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0),
)
orign_point_clouds = orign_point_clouds.voxel_down_sample(
kwargs.get("voxel_size", 0.01)
)
# # 半径滤波
# orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
# nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
# radius=kwargs.get("voxel_size", 0.01) * 10
# )
#
# # 统计滤波
# orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
# nb_neighbors=int(round(20 * kwargs.get("voxel_size", 0.01) / 0.005)),
# std_ratio=2.0
# )
orign_points = np.asarray(orign_point_clouds.points)
return orign_points
def create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs):
"""
create o3d denoised pcd
--------
input:
depth_img_mask: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
**kwargs:
"depth_scale": float
"depth_trunc": float
"voxel_size": float
output:
point_cloud: o3d.geometry.PointCloud
"""
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0),
)
point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
if not sign:
return 1100, False
if len(point_cloud.points) == 0:
return 1101, False
return point_cloud, True

View File

@@ -1,260 +0,0 @@
#!/usr/bin/env python
# coding: utf-8
import transforms3d as tfs
import numpy as np
import math
import json
import rclpy
from rclpy.node import Node
def get_matrix_quat(x, y, z, rw, rx, ry, rz):
"""从单位四元数构建齐次变换矩阵"""
'''构造旋转矩阵'''
q = [rw, rx, ry, rz]
rmat = tfs.quaternions.quat2mat(q)
"""构造齐次变换矩阵"""
rmat = tfs.affines.compose(
np.squeeze(np.asarray((x, y, z))),
rmat,
[1, 1, 1]
)
return rmat
def get_matrix_eular_radu(x, y, z, rx, ry, rz):
"""从欧拉角构建齐次变换矩阵"""
'''构造旋转矩阵'''
rmat = tfs.euler.euler2mat(
# math.radians(rx), math.radians(ry), math.radians(rz)
rx, ry, rz
)
"""构造齐次变换矩阵"""
rmat = tfs.affines.compose(
np.squeeze(np.asarray((x, y, z))),
rmat,
[1, 1, 1]
)
return rmat
def get_matrix_rotvector(x, y, z, rx, ry, rz):
"""从旋转向量构建齐次变换矩阵"""
'''构造旋转矩阵'''
rvec = np.array([rx, ry, rz])
theta = np.linalg.norm(rvec)
if theta < 1e-8:
rmat = np.eye(3) # 小角度直接返回单位矩阵
else:
axis = rvec / theta
rmat = tfs.axangles.axangle2mat(axis, theta)
"""构造齐次变换矩阵"""
rmat = tfs.affines.compose(
np.squeeze(np.asarray((x, y, z))),
rmat,
[1, 1, 1]
)
return rmat
def skew(v):
return np.array([[0, -v[2], v[1]],
[v[2], 0, -v[0]],
[-v[1], v[0], 0]])
def R2P(T):
"""旋转矩阵 --> 修正罗德里格斯向量"""
axis, angle= tfs.axangles.mat2axangle(T[0:3, 0:3])
P = 2 * math.sin(angle / 2) * axis
return P
def P2R(P):
"""修正罗德里格斯向量 --> 旋转矩阵"""
P2 = np.dot(P.T, P)
part_1 = (1 - P2 / 2) * np.eye(3)
part_2 = np.add(np.dot(P, P.T), np.sqrt(4- P2) * skew(P.flatten().T))
R = np.add(part_1, np.divide(part_2, 2))
return R
def calculate(Hgs, Hcs):
"""计算标定矩阵"""
# H代表矩阵、h代表标量
Hgijs = []
Hcijs = []
A = []
B = []
size = 0
for i in range(len(Hgs)):
for j in range(i + 1, len(Hgs)):
size += 1
Hgij = np.dot(np.linalg.inv(Hgs[j]), Hgs[i])
Hgijs.append(Hgij)
Pgij = np.dot(2, R2P(Hgij))
Hcij = np.dot(Hcs[j], np.linalg.inv(Hcs[i]))
Hcijs.append(Hcij)
Pcij = np.dot(2, R2P(Hcij))
A.append(skew(np.add(Pgij, Pcij)))
B.append(np.subtract(Pcij, Pgij).reshape(3, 1))
MA = np.vstack(A)
MB = np.vstack(B)
Pcg_ = np.dot(np.linalg.pinv(MA), MB)
pcg = np.sqrt(np.add(1, np.dot(Pcg_.T, Pcg_)))
Pcg = np.dot(np.dot(2, Pcg_), np.linalg.inv(pcg))
Rcg = P2R(Pcg).reshape(3, 3)
A = []
B = []
id = 0
for i in range(len(Hgs)):
for j in range(i + 1, len(Hgs)):
Hgij = Hgijs[id]
Hcij = Hcijs[id]
A.append(np.subtract(Hgij[0:3, 0:3], np.eye(3, 3)))
B.append(np.subtract(np.dot(Rcg, Hcij[0:3, 3:4]), Hgij[0:3, 3:4]))
id += 1
MA = np.asarray(A).reshape(size * 3, 3)
MB = np.asarray(B).reshape(size * 3, 1)
Tcg = np.dot(np.linalg.pinv(MA), MB).reshape(3, )
return tfs.affines.compose(Tcg, np.squeeze(Rcg), [1, 1, 1])
class Calibration(Node):
def __init__(self, name):
super(Calibration, self).__init__(name)
self.sync_subscriber = None
self.sub_camera_pose = None
self.sub_hand_pose = None
self.Hgs, self.Hcs = [], []
self.hand, self.camera = [], []
self.calibration_matrix = None
self.declare_parameter('matrix_name', 'eye_to_hand')
self.matrix_name = self.get_parameter('matrix_name').value
self.declare_parameter('mode', 'eye_to_hand')
self.mode = self.get_parameter('mode').value.lower()
if self.mode not in ['eye_in_hand', 'eye_to_hand']:
raise ValueError("mode must be 'eye_in_hand' or 'eye_to_hand'")
self.declare_parameter('input', 'quat')
self.input = self.get_parameter('input').value.lower()
if self.input == 'eular':
self.function = get_matrix_eular_radu
elif self.input == 'rotvertor':
self.function = get_matrix_rotvector
elif self.input == 'quat':
self.function = get_matrix_quat
else:
raise ValueError("INPUT ERROR")
self.declare_parameter('camera_pose_path', 'camera_pose_data.json')
self.declare_parameter('hand_pose_path', 'hand_pose_data.json')
self.camera_pose_path = self.get_parameter('camera_pose_path').value
self.hand_pose_path = self.get_parameter('hand_pose_path').value
self.get_pose()
self.done = False
def get_pose(self):
with open(f'{self.camera_pose_path}', 'r', encoding='utf-8') as f:
self.camera = json.load(f)
with open(f'{self.hand_pose_path}', 'r', encoding='utf-8') as f:
self.hand = json.load(f)
self.calculate_calibration()
print(self.hand)
print(self.camera)
self.get_logger().info(f"{self.calibration_matrix}")
hand_eye_result = {
"T": self.calibration_matrix.tolist()
}
with open(f"{self.matrix_name}_matrix.json", "w") as f:
json.dump(hand_eye_result, f, indent=4)
self.get_logger().info(f"Save as {self.matrix_name}_matrix.json")
self.done = True
def calculate_data(self):
if self.input == 'quat':
for i in range(0, len(self.hand), 7):
self.Hgs.append(
np.linalg.inv(
self.function(
self.hand[i], self.hand[i + 1], self.hand[i + 2],
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
)
)
if self.mode == 'eye_to_hand' else
self.function(
self.hand[i], self.hand[i + 1], self.hand[i + 2],
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
)
)
self.Hcs.append(
self.function(
self.camera[i], self.camera[i + 1], self.camera[i + 2],
self.camera[i + 3], self.camera[i + 4], self.camera[i + 5], self.camera[i + 6]
)
)
else:
for i in range(0, len(self.hand), 6):
self.Hgs.append(
np.linalg.inv(
self.function(
self.hand[i], self.hand[i + 1], self.hand[i + 2],
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
)
)
if self.mode == 'eye_to_hand' else
self.function(
self.hand[i], self.hand[i + 1], self.hand[i + 2],
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
)
)
self.Hcs.append(
self.function(
self.camera[i], self.camera[i + 1], self.camera[i + 2],
self.camera[i + 3], self.camera[i + 4], self.camera[i + 5]
)
)
def calculate_calibration(self):
self.calculate_data()
self.calibration_matrix = calculate(self.Hgs, self.Hcs)
def main(args=None):
rclpy.init(args=args)
node = Calibration('calibration')
try:
while rclpy.ok() and not node.done:
rclpy.spin_once(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -1,323 +0,0 @@
import os
from collections import defaultdict
from ament_index_python.packages import get_package_share_directory
import cv2
import open3d as o3d
import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
import rclpy
from rclpy.node import Node
from message_filters import ApproximateTimeSynchronizer, Subscriber
from sensor_msgs.msg import Image, CameraInfo
from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
share_dir = get_package_share_directory('vision_detect')
def get_map(K, D, camera_size):
h, w = camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def pca(data, sort=True):
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):
"""计算位态"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=1000.0,
depth_trunc=2,
)
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
if len(clean_pcd.points) == 0:
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
center = clean_pcd.get_center()
x, y, z = center
w, v = pca(np.asarray(clean_pcd.points))
if w is not None and v is not None:
vx, vy, vz = v[:,0], v[:,1], v[:,2]
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
# point = [
# [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
# # 构造open3d中的LineSet对象用于主成分显示
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
# o3d.visualization.draw_geometries([point_cloud, line_set])
# o3d.visualization.draw_geometries([clean_pcd, line_set])
return x, y, z, rw, rx, ry, rz
else:
return 0.0, 0.0, 0.0, None, None, None, None
def distortion_correction(color_image, depth_image, map1, map2):
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
undistorted_color = undistorted_color.astype(color_image.dtype)
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
return undistorted_color, undistorted_depth
def crop_mask_bbox(depth_img, mask):
"""
输入:
depth_img: H x W
mask: H x W (0/1 或 bool)
输出:
depth_crop, mask_crop
"""
high, width = depth_img.shape
ys, xs = np.where(mask > 0)
x_min, x_max = int(round(xs.min())), int(round(xs.max()))
y_min, y_max = int(round(ys.min())), int(round(ys.max()))
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
class DetectNode(Node):
def __init__(self, name):
super().__init__(name)
self.output_boxes = None
self.output_masks = None
self.K = self.D = None
self.map1 = self.map2 = None
self.intrinsics = None
self.function = self._test_image
self.calculate_function = calculate_pose_cpu
self.cv_bridge = CvBridge()
'''init'''
self._init_param()
self._init_publisher()
self._init_subscriber()
def _init_param(self):
"""init parameter"""
self.declare_parameter('output_boxes', True)
self.output_boxes = self.get_parameter('output_boxes').value
self.declare_parameter('output_masks', False)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('color_image_topic', '/camera/color/image_raw')
self.color_image_topic = self.get_parameter('color_image_topic').value
self.declare_parameter('depth_image_topic', '/camera/depth/image_raw')
self.depth_image_topic = self.get_parameter('depth_image_topic').value
self.declare_parameter('camera_info_topic', '/camera/color/camera_info')
self.camera_info_topic = self.get_parameter('camera_info_topic').value
def _init_publisher(self):
"""init_publisher"""
self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
if self.output_boxes or self.output_masks:
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
def _init_subscriber(self):
"""init_subscriber"""
self.sub_camera_info = self.create_subscription(
CameraInfo,
self.camera_info_topic,
self._camera_info_callback,
10
)
'''sync get color and depth img'''
self.sub_color_image = Subscriber(self, Image, self.color_image_topic)
self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic)
self.sync_subscriber = ApproximateTimeSynchronizer(
[self.sub_color_image, self.sub_depth_image],
queue_size=10,
slop=0.1
)
self.sync_subscriber.registerCallback(self._sync_callback)
def _camera_info_callback(self, msg: CameraInfo):
"""Get camera info"""
self.K = msg.k
self.D = msg.d
self.camera_size = [msg.width, msg.height]
if self.K is not None and self.D is not None:
self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
if len(self.D) != 0:
self.destroy_subscription(self.sub_camera_info)
else:
self.D = [0, 0, 0, 0, 0]
self.destroy_subscription(self.sub_camera_info)
else:
raise "K and D are not defined"
def _sync_callback(self, color_img_ros, depth_img_ros):
"""同步回调函数"""
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
img, pose_dict = self.function(color_img_cv, depth_img_cv)
"""masks为空结束这一帧"""
if img is None:
img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
if self.output_boxes or self.output_masks:
self.pub_detect_image.publish(img)
if pose_dict:
pose_list_all = PoseArrayClassAndID()
for (class_id, class_name), pose_list in pose_dict.items():
pose_list_all.objects.append(
PoseClassAndID(
class_name = class_name,
class_id = class_id,
pose_list = pose_list
)
)
pose_list_all.header.stamp = self.get_clock().now().to_msg()
pose_list_all.header.frame_id = "pose_list"
self.pub_pose_list.publish(pose_list_all)
def _test_image(self, rgb_img, depth_img):
pose_dict = defaultdict(list)
rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
pattern_size = (8, 5)
# pattern_size = (15, 7)
ret, corners = cv2.findChessboardCorners(rgb_img_gray, pattern_size, cv2.CALIB_CB_FAST_CHECK)
if ret:
# 角点亚像素精确化(提高标定精度)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
corners_subpix = corners_subpix.reshape(pattern_size[1], pattern_size[0], 2)
mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
for i in range(0, pattern_size[1] - 1):
for j in range(0, pattern_size[0] - 1):
pts = np.array([
corners_subpix[i, j],
corners_subpix[i, j + 1],
corners_subpix[i + 1, j + 1],
corners_subpix[i + 1, j]
], dtype=np.int32)
cv2.fillConvexPoly(mask, pts, 1)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
orig_shape = rgb_img_gray.shape
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask)
if depth_crop is None:
return None, None
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics)
self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_dict[int(99), 'crossboard'].append(pose)
cv2.putText(rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}', (0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
cv2.putText(rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}', (0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
else:
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
def main(args=None):
rclpy.init(args=args)
node = DetectNode('detect')
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -1,575 +0,0 @@
import os
import time
import threading
import ast
from collections import defaultdict
from ament_index_python.packages import get_package_share_directory
import cv2
import open3d as o3d
import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
import torch
from ultralytics import YOLO
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image
from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
share_dir = get_package_share_directory('vision_detect')
def draw_pointcloud(pcd, T):
R = T[0:3, 0:3]
point = T[0:3, 3:4].flatten()
x, y, z = R[:, 0], R[:, 1], R[:, 2]
points = [
[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
point, point + x, point + y, point + z
] # 画点:原点、第一主成分、第二主成分
lines = [
[0, 1], [0, 2], [0, 3],
[4, 5], [4, 6], [4, 7]
] # 画出三点之间两两连线
colors = [
[1, 0, 0], [0, 1, 0], [0, 0, 1],
[1, 0, 0], [0, 1, 0], [0, 0, 1]
]
line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
line_set.colors = o3d.utility.Vector3dVector(colors)
pcd.append(line_set)
o3d.visualization.draw_geometries(pcd)
def preprocess_point_cloud(pcd, voxel_size):
pcd_down = pcd.voxel_down_sample(voxel_size)
radius_normal = voxel_size * 2
pcd_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
radius_feature = voxel_size * 5
pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
pcd_down,
o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
)
return pcd_down, pcd_fpfh
def prepare_dataset(source, target, voxel_size):
trans_init = np.identity(4)
source.transform(trans_init)
source_down, source_fpfh = preprocess_point_cloud(source, voxel_size)
target_down, target_fpfh = preprocess_point_cloud(target, voxel_size)
return source_down, target_down, source_fpfh, target_fpfh
def execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size):
distance_threshold = voxel_size * 1.5
result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
source_down,
target_down,
source_fpfh,
target_fpfh,
True,
distance_threshold,
o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
3,
[
o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
],
o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
return result.transformation
def object_icp(
target: o3d.geometry.PointCloud,
source: o3d.geometry.PointCloud | str,
ransac_voxel_size: float = 0.005,
icp_voxel_radius: list[float] | None = None,
icp_max_iter: list[int] | None = None,
):
if icp_voxel_radius is None:
icp_voxel_radius = [0.004, 0.002, 0.001]
if icp_max_iter is None:
icp_max_iter = [50, 30, 14]
if isinstance(source, str):
source = o3d.io.read_point_cloud(source)
elif isinstance(source, o3d.geometry.PointCloud):
pass
else:
raise TypeError(f"Unsupported Type {type(source)}")
voxel_size = 0.005 # means 5mm for this dataset
source_down, target_down, source_fpfh, target_fpfh = prepare_dataset(source, target, voxel_size)
T = execute_global_registration(
source_down,
target_down,
source_fpfh,
target_fpfh,
ransac_voxel_size
)
for scale in range(3):
iter = icp_max_iter[scale]
radius = icp_voxel_radius[scale]
# print([iter, radius, scale])
source_down = source.voxel_down_sample(radius)
target_down = target.voxel_down_sample(radius)
source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
result_icp = o3d.pipelines.registration.registration_icp(
source_down,
target_down,
radius * 5,
T,
o3d.pipelines.registration.TransformationEstimationPointToPoint(),
o3d.pipelines.registration.ICPConvergenceCriteria(
relative_fitness=1e-6,
relative_rmse=1e-6,
max_iteration=iter
)
)
T = result_icp.transformation
draw_pointcloud([source.transform(T), target], T)
return T
def get_map(K, D, camera_size):
h, w = camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def pca(data, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, model_pcd):
"""计算位态"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=1000.0,
depth_trunc=2.0,
)
point_cloud = point_cloud.remove_non_finite_points()
clean_pcd, _ = point_cloud.remove_radius_outlier(nb_points=10, radius=0.1)
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
if len(clean_pcd.points) == 0:
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
rmat = object_icp(model_pcd, clean_pcd)
x, y, z = rmat[0:3, 3:4].flatten()
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
return x, y, z, rw, rx, ry, rz
def draw_box(set_confidence, rgb_img, result):
"""绘制目标检测边界框"""
boxes = result.boxes.xywh.cpu().numpy()
confidences = result.boxes.conf.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
for i, box in enumerate(boxes):
if confidences[i] >= set_confidence:
x_center, y_center, width, height = box[:4]
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
def draw_mask(set_confidence, rgb_img, result):
"""绘制实例分割mask"""
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
confidences = result.boxes.conf.cpu().numpy()
for i, mask in enumerate(masks):
if confidences[i] >= set_confidence:
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
else:
continue
def distortion_correction(color_image, depth_image, map1, map2):
"""畸变矫正"""
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
undistorted_color = undistorted_color.astype(color_image.dtype)
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
return undistorted_color, undistorted_depth
def crop_mask_bbox(depth_img, mask, box):
"""
输入:
depth_img: H x W
mask: H x W (0/1 或 bool)
输出:
depth_crop, mask_crop
"""
high, width = depth_img.shape
x_center, y_center, w, h = box[:4]
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
def calculate_grav_width(mask, K, depth):
"""计算重心宽度"""
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
box = cv2.boxPoints(cv2.minAreaRect(contours[0]))
if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
point_diff = box[1] - box[0]
else:
point_diff = box[1] - box[2]
grab_width = depth * np.sqrt(point_diff[0]**2 / K[0]**2 + point_diff[1]**2 / K[4]**2)
return grab_width
else:
return 0.0
class DetectNode(Node):
def __init__(self, name, mode):
super().__init__(name)
self.mode = mode
self.device = None
self.checkpoint_path = None
self.checkpoint_name = None
self.output_boxes = None
self.output_masks = None
self.function = None
self.calculate_function = None
self.K = None
self.fx = self.fy = 0.5
self.camera_data = {}
self.cv_bridge = CvBridge()
self.lock = threading.Lock()
'''init'''
self._init_param()
self.source_model = o3d.io.read_point_cloud(self.source_model_path)
if mode == 'detect':
self._init_model()
else:
self.function = None
self.get_logger().error('Error: Mode Unknown')
if self.device == 'cpu':
self.calculate_function = calculate_pose_cpu
elif self.device == 'gpu':
raise NotImplementedError('Function: calculate_pose_gpu not implemented')
else:
raise ValueError(f"device must be cpu or gpu, now {self.device}")
self._init_publisher()
self._init_subscriber()
self._init_service()
def _init_param(self):
"""init parameter"""
self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
self.checkpoint_name = self.get_parameter('checkpoint_name').value
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
self.source_model_path = os.path.join(share_dir, 'pointclouds/bottle_model.pcd')
self.declare_parameter('output_boxes', True)
self.output_boxes = self.get_parameter('output_boxes').value
self.declare_parameter('output_masks', False)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('set_confidence', 0.25)
self.set_confidence = self.get_parameter('set_confidence').value
self.declare_parameter('device', 'cpu')
self.device = self.get_parameter('device').value
self.declare_parameter('classes', 'None')
self.classes = ast.literal_eval(self.get_parameter('classes').value)
def _init_model(self):
"""init model"""
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
if self.checkpoint_name.endswith('-seg.pt'):
self.function = self._seg_image
else:
self.function = None
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
def _init_publisher(self):
"""init publisher"""
if self.output_boxes or self.output_masks:
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
def _init_service(self):
"""init service server"""
self.server = self.create_service(
VisionObjectRecognition,
"/vision_object_recognition",
self._service_callback
)
def _init_subscriber(self):
"""init subscriber"""
self.sub_img = self.create_subscription(
ImgMsg,
"/img_msg",
self._sub_callback,
10
)
def _sub_callback(self, msg):
"""同步回调函数"""
with self.lock:
self.camera_data[msg.position] = [
msg.image_color,
msg.image_depth,
msg.karr,
msg.darr
]
def _service_callback(self, request, response):
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
with self.lock:
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
else:
response.success = False
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
response.objects = []
return response
self.camera_size = [color_img_ros.width, color_img_ros.height]
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
map1, map2, self.K = get_map(self.K, D, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_dict = self.function(color_img_cv, depth_img_cv)
"""masks为空结束这一帧"""
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
if pose_dict:
response.info = "Success get pose"
response.success = True
# self.get_logger().info('get_pose')
for (class_id, class_name), pose_list in pose_dict.items():
response.objects.append(
PoseClassAndID(
class_name = class_name,
class_id = class_id,
pose_list = pose_list
)
)
else:
response.info = "pose dict is empty"
response.success = False
response.objects = []
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
pose_dict = defaultdict(list)
'''Get Predict Results'''
# time1 = time.time()
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[39])
# time2 = time.time()
result = results[0]
'''Get masks'''
if result.masks is None or len(result.masks) == 0:
return None, None
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
'''Get boxes'''
boxes = result.boxes.data.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
# time3 = time.time()
for i, (mask, box) in enumerate(zip(masks, boxes)):
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
if depth_crop is None:
continue
if mask.shape[0] >= (orig_shape[0] * 0.5) and mask.shape[1] >= (orig_shape[1] * 0.5):
mask_crop = cv2.resize(mask_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
depth_crop = cv2.resize(depth_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0] * self.fx),
int(self.camera_size[1] * self.fy),
self.K[0] * self.fx,
self.K[4] * self.fy,
(self.K[2] - x_min) * self.fx,
(self.K[5] - y_min) * self.fy
)
else:
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, self.source_model)
grab_width = calculate_grav_width(mask, self.K, z)
z = z + grab_width * 0.45
if (x, y, z) == (None, None, None):
self.get_logger().error("have wrong pose")
continue
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
# time4 = time.time()
# self.get_logger().info(f'start')
# self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
# self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
# self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
# self.get_logger().info(f'{(time4 - time1)*1000} ms, completing a picture entire process')
# self.get_logger().info(f'end')
'''mask_img and box_img is or not output'''
if self.output_boxes and not self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif self.output_boxes and self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif not self.output_boxes and self.output_masks:
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
else:
return None, pose_dict
def main(args=None):
rclpy.init(args=args)
node = DetectNode('detect', 'detect')
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -1,446 +0,0 @@
import os
import time
import threading
import ast
from collections import defaultdict
from ament_index_python.packages import get_package_share_directory
import cv2
import open3d as o3d
import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
import torch
from ultralytics import YOLO
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image
from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
share_dir = get_package_share_directory('vision_detect')
def get_map(K, D, camera_size):
h, w = camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def pca(data, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
"""计算位态"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=1000.0,
depth_trunc=8.0,
)
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
if len(clean_pcd.points) == 0:
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
center = clean_pcd.get_center()
x, y, z = center
w, v = pca(np.asarray(clean_pcd.points))
if w is not None and v is not None:
vx, vy, vz = v[:,0], v[:,1], v[:,2]
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
rmat = hand_eye_mat @ rmat
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
x, y, z = rmat[0:3, 3].flatten()
# point = [
# [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
# # 构造open3d中的LineSet对象用于主成分显示
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
# o3d.visualization.draw_geometries([point_cloud, line_set])
# o3d.visualization.draw_geometries([clean_pcd, line_set])
return x, y, z, rw, rx, ry, rz
def draw_box(set_confidence, rgb_img, result):
"""绘制目标检测边界框"""
boxes = result.boxes.xywh.cpu().numpy()
confidences = result.boxes.conf.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
for i, box in enumerate(boxes):
if confidences[i] >= set_confidence:
x_center, y_center, width, height = box[:4]
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
def draw_mask(set_confidence, rgb_img, result):
"""绘制实例分割mask"""
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
confidences = result.boxes.conf.cpu().numpy()
for i, mask in enumerate(masks):
if confidences[i] >= set_confidence:
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
else:
continue
def distortion_correction(color_image, depth_image, map1, map2):
"""畸变矫正"""
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
undistorted_color = undistorted_color.astype(color_image.dtype)
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
return undistorted_color, undistorted_depth
def crop_mask_bbox(depth_img, mask, box):
"""
输入:
depth_img: H x W
mask: H x W (0/1 或 bool)
输出:
depth_crop, mask_crop
"""
high, width = depth_img.shape
x_center, y_center, w, h = box[:4]
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
class DetectNode(Node):
def __init__(self, name, mode):
super().__init__(name)
self.mode = mode
self.device = None
self.checkpoint_path = None
self.checkpoint_name = None
self.output_boxes = None
self.output_masks = None
self.function = None
self.calculate_function = None
self.eye_in_hand_mat = None
self.eye_to_hand_mat = None
self.K = None
self.fx = self.fy = 0.5
self.camera_data = {}
self.cv_bridge = CvBridge()
self.lock = threading.Lock()
'''init'''
self._init_param()
if mode == 'detect':
self._init_model()
else:
self.function = None
self.get_logger().error('Error: Mode Unknown')
if self.device == 'cpu':
self.calculate_function = calculate_pose_cpu
elif self.device == 'gpu':
raise NotImplementedError('Function: calculate_pose_gpu not implemented')
else:
raise ValueError(f"device must be cpu or gpu, now {self.device}")
self._init_publisher()
self._init_subscriber()
self._init_service()
def _init_param(self):
"""init parameter"""
self.declare_parameter('checkpoint_name', 'hivecorebox-seg.pt')
self.checkpoint_name = self.get_parameter('checkpoint_name').value
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
self.declare_parameter('output_boxes', True)
self.output_boxes = self.get_parameter('output_boxes').value
self.declare_parameter('output_masks', True)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('set_confidence', 0.60)
self.set_confidence = self.get_parameter('set_confidence').value
self.declare_parameter('device', 'cpu')
self.device = self.get_parameter('device').value
self.declare_parameter('classes', 'None')
self.classes = ast.literal_eval(self.get_parameter('classes').value)
self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0])
self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
self.declare_parameter('eye_to_hand', [1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0])
self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
def _init_model(self):
"""init model"""
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
if self.checkpoint_name.endswith('-seg.pt'):
self.function = self._seg_image
else:
self.function = None
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
def _init_publisher(self):
"""init publisher"""
if self.output_boxes or self.output_masks:
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
def _init_service(self):
"""init service server"""
self.server = self.create_service(
VisionObjectRecognition,
"/vision_object_recognition",
self._service_callback
)
def _init_subscriber(self):
"""init subscriber"""
self.sub_img = self.create_subscription(
ImgMsg,
"/img_msg",
self._sub_callback,
10
)
def _sub_callback(self, msg):
"""同步回调函数"""
with self.lock:
self.camera_data[msg.position] = [
msg.image_color,
msg.image_depth,
msg.karr,
msg.darr
]
def _service_callback(self, request, response):
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
with self.lock:
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
else:
response.success = False
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
response.objects = []
return response
if request.camera_position == 'left' or request.camera_position == 'right':
hand_eye_mat = self.eye_in_hand_mat
else:
hand_eye_mat = self.eye_to_hand_mat
self.camera_size = [color_img_ros.width, color_img_ros.height]
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
map1, map2, self.K = get_map(self.K, D, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
"""masks为空结束这一帧"""
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
if pose_dict:
response.info = "Success get pose"
response.success = True
# self.get_logger().info('get_pose')
for (class_id, class_name), pose_list in pose_dict.items():
response.objects.append(
PoseClassAndID(
class_name = class_name,
class_id = class_id,
pose_list = pose_list
)
)
else:
response.info = "pose dict is empty"
response.success = False
response.objects = []
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
"""Use segmentation model"""
pose_dict = defaultdict(list)
depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
depth_filter_mask[(depth_img > 0) & (depth_img < 2000)] = 1
rgb_img[depth_filter_mask == 0] = 0
'''Get Predict Results'''
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[0])
result = results[0]
'''Get masks'''
if result.masks is None or len(result.masks) == 0:
return None, None
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
'''Get boxes'''
boxes = result.boxes.xywh.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
for i, (mask, box) in enumerate(zip(masks, boxes)):
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
if depth_crop is None:
continue
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
if (x, y, z) == (None, None, None):
self.get_logger().error("have wrong pose")
continue
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
'''mask_img and box_img is or not output'''
if self.output_boxes and not self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif self.output_boxes and self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif not self.output_boxes and self.output_masks:
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
else:
return None, pose_dict
def main(args=None):
rclpy.init(args=args)
node = DetectNode('detect', 'detect')
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -1,415 +0,0 @@
import os
import time
import threading
import ast
from collections import defaultdict
from ament_index_python.packages import get_package_share_directory
import cv2
import open3d as o3d
import numpy as np
import transforms3d as tfs
from cv_bridge import CvBridge
import torch
from ultralytics import YOLO
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image
from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
share_dir = get_package_share_directory('vision_detect')
def get_map(K, D, camera_size):
h, w = camera_size[::-1]
K = np.array(K).reshape(3, 3)
D = np.array(D)
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
return map1, map2, new_K.flatten()
def pca(data, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
"""计算位态"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=1000.0,
depth_trunc=3.0,
)
point_cloud = point_cloud.remove_non_finite_points()
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.020)
clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
if len(clean_pcd.points) == 0:
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
center = clean_pcd.get_center()
x, y, z = center
w, v = pca(np.asarray(clean_pcd.points))
if w is not None and v is not None:
vx, vy, vz = v[:,0], v[:,1], v[:,2]
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
rmat = hand_eye_mat @ rmat
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
x, y, z = rmat[0:3, 3].flatten()
return x, y, z, rw, rx, ry, rz
else:
return 0.0, 0.0, 0.0, None, None, None, None
def draw_box(set_confidence, rgb_img, result):
"""绘制目标检测边界框"""
boxes = result.boxes.xywh.cpu().numpy()
confidences = result.boxes.conf.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
for i, box in enumerate(boxes):
if confidences[i] >= set_confidence:
x_center, y_center, width, height = box[:4]
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
def draw_mask(set_confidence, rgb_img, result):
"""绘制实例分割mask"""
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
confidences = result.boxes.conf.cpu().numpy()
for i, mask in enumerate(masks):
if confidences[i] >= set_confidence:
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
else:
continue
def distortion_correction(color_image, depth_image, map1, map2):
"""畸变矫正"""
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
undistorted_color = undistorted_color.astype(color_image.dtype)
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
return undistorted_color, undistorted_depth
def crop_mask_bbox(depth_img, mask, box):
"""
输入:
depth_img: H x W
mask: H x W (0/1 或 bool)
输出:
depth_crop, mask_crop
"""
high, width = depth_img.shape
x_center, y_center, w, h = box[:4]
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
class DetectNode(Node):
def __init__(self, name):
super().__init__(name)
self.device = None
self.checkpoint_path = None
self.checkpoint_name = None
self.output_boxes = None
self.output_masks = None
self.function = None
self.K = None
self.eye_in_hand_mat = None
self.eye_to_hand_mat = None
self.calculate_function = calculate_pose_cpu
self.fx = self.fy = 0.5
self.camera_data = {}
self.cv_bridge = CvBridge()
self.lock = threading.Lock()
'''init'''
self._init_param()
self._init_model()
self._init_publisher()
self._init_subscriber()
self._init_service()
def _init_param(self):
"""init parameter"""
self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
self.checkpoint_name = self.get_parameter('checkpoint_name').value
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
self.declare_parameter('output_boxes', True)
self.output_boxes = self.get_parameter('output_boxes').value
self.declare_parameter('output_masks', False)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('set_confidence', 0.25)
self.set_confidence = self.get_parameter('set_confidence').value
self.declare_parameter('classes', 'None')
self.classes = ast.literal_eval(self.get_parameter('classes').value)
self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.0, 0.0,
0.0, 0.0, 1.0, 0.0,
0.0, 0.0, 0.0, 1.0])
self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
self.declare_parameter('eye_to_hand', [1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1])
self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
def _init_model(self):
"""init model"""
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
if self.checkpoint_name.endswith('-seg.pt'):
self.function = self._seg_image
else:
self.function = None
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
def _init_publisher(self):
"""init publisher"""
if self.output_boxes or self.output_masks:
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
def _init_service(self):
"""init service server"""
self.server = self.create_service(
VisionObjectRecognition,
"/vision_object_recognition",
self._service_callback
)
def _init_subscriber(self):
"""init subscriber"""
self.sub_img = self.create_subscription(
ImgMsg,
"/img_msg",
self._sub_callback,
10
)
def _sub_callback(self, msg):
"""同步回调函数"""
with self.lock:
self.camera_data[msg.position] = [
msg.image_color,
msg.image_depth,
msg.karr,
msg.darr
]
def _service_callback(self, request, response):
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
with self.lock:
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
else:
response.success = False
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
response.objects = []
return response
if request.camera_position == 'left' or request.camera_position == 'right':
hand_eye_mat = self.eye_in_hand_mat
else:
hand_eye_mat = self.eye_to_hand_mat
self.camera_size = [color_img_ros.width, color_img_ros.height]
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
map1, map2, self.K = get_map(self.K, D, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
"""masks为空结束这一帧"""
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
if pose_dict:
response.info = "Success get pose"
response.success = True
for (class_id, class_name), pose_list in pose_dict.items():
response.objects.append(
PoseClassAndID(
class_name = class_name,
class_id = class_id,
pose_list = pose_list
)
)
return response
else:
response.info = "pose dict is empty"
response.success = False
response.objects = []
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
"""Use segmentation model"""
pose_dict = defaultdict(list)
'''Get Predict Results'''
time1 = time.time()
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=self.classes)
time2 = time.time()
result = results[0]
'''Get masks'''
if result.masks is None or len(result.masks) == 0:
return None, None
masks = result.masks.data.cpu().numpy()
orig_shape = result.masks.orig_shape
'''Get boxes'''
boxes = result.boxes.xywh.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
time3 = time.time()
for i, (mask, box) in enumerate(zip(masks, boxes)):
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
if depth_crop is None:
continue
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
time4 = time.time()
self.get_logger().info(f'start')
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
self.get_logger().info(f'end')
'''mask_img and box_img is or not output'''
if self.output_boxes and not self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif self.output_boxes and self.output_masks:
draw_box(self.set_confidence, rgb_img, result)
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
elif not self.output_boxes and self.output_masks:
draw_mask(self.set_confidence, rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
else:
return None, pose_dict
def main(args=None):
rclpy.init(args=args)
node = DetectNode('detect')
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == '__main__':
main()

Some files were not shown because too many files have changed in this diff Show More