Compare commits
53 Commits
feature-hu
...
feature_cp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc520c349e | ||
|
|
52c10dcb3c | ||
|
|
611b529075 | ||
|
|
039bb2d165 | ||
|
|
1c9af20786 | ||
|
|
a59776e6c2 | ||
|
|
f937329e4b | ||
|
|
6fbf13e232 | ||
|
|
cd4bbdc5f3 | ||
|
|
97c142f6e3 | ||
|
|
eeedd4bfc6 | ||
|
|
08319d9eba | ||
|
|
7d3424b7e5 | ||
|
|
3025b5a23a | ||
|
|
563737932b | ||
|
|
93f3a8d45e | ||
|
|
12fed65d8a | ||
|
|
87a5697e75 | ||
|
|
8279687f4c | ||
|
|
aff813b107 | ||
|
|
a89e525e7a | ||
|
|
a5f340bc35 | ||
|
|
d730325a0d | ||
|
|
2e4cb18fa6 | ||
|
|
facd43f54f | ||
|
|
59b90201e4 | ||
|
|
db330ec33c | ||
|
|
610f488d1e | ||
|
|
d62faaa9cc | ||
|
|
eec02de5a7 | ||
|
|
389bd2b50e | ||
|
|
b8444c82b9 | ||
|
|
44b2cc43e0 | ||
|
|
4f8ae43239 | ||
|
|
78c129da83 | ||
|
|
0f27c5cd3c | ||
|
|
0c41be3154 | ||
|
|
6dafd77bcc | ||
|
|
9fd8951fae | ||
|
|
c5a4b5afed | ||
|
|
6ddf95f737 | ||
|
|
360372f681 | ||
|
|
b264bbd6bb | ||
|
|
ae8f7d370e | ||
|
|
e13deca6b7 | ||
|
|
f8d4167542 | ||
|
|
8c7c38eaf4 | ||
|
|
250e2600f8 | ||
|
|
98483772c2 | ||
|
|
ed4136b36d | ||
|
|
0af9c10189 | ||
|
|
36077ceb19 | ||
|
|
8e13d777ff |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@
|
||||
**/build/
|
||||
**/install/
|
||||
**/log/
|
||||
**/VisionDetect/**/**.so
|
||||
28
calibration_mat/eye_in_left_hand.json
Normal file
28
calibration_mat/eye_in_left_hand.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"T": [
|
||||
[
|
||||
-0.029312906184646676,
|
||||
0.9941492398859674,
|
||||
0.10396173510077301,
|
||||
-0.0809046635564155
|
||||
],
|
||||
[
|
||||
-0.9993360095995784,
|
||||
-0.02689542239474161,
|
||||
-0.024579995358005674,
|
||||
0.036195409684615196
|
||||
],
|
||||
[
|
||||
-0.021640088923136416,
|
||||
-0.10461321660460321,
|
||||
0.9942775173275502,
|
||||
0.04913468358915477
|
||||
],
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
1.0
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -1,22 +1,22 @@
|
||||
{
|
||||
"T": [
|
||||
[
|
||||
0.013635791720580838,
|
||||
0.9992765703636767,
|
||||
-0.03550212819481636,
|
||||
-0.08645650535173793
|
||||
0.02515190926712163,
|
||||
0.9984434279780899,
|
||||
-0.049780544267610596,
|
||||
-0.08312977955463981
|
||||
],
|
||||
[
|
||||
-0.9998865276218899,
|
||||
0.013399556063222661,
|
||||
-0.006883587549256321,
|
||||
0.036196137264974
|
||||
-0.9996337211449028,
|
||||
0.025617085950903107,
|
||||
0.008728599966714646,
|
||||
0.059044674332170574
|
||||
],
|
||||
[
|
||||
-0.006402895000908794,
|
||||
0.03559196285001416,
|
||||
0.999345893630474,
|
||||
0.014407883180676354
|
||||
0.00999024575340213,
|
||||
0.04954276975245833,
|
||||
0.9987220378839358,
|
||||
0.017378234075134728
|
||||
],
|
||||
[
|
||||
0.0,
|
||||
|
||||
28
calibration_mat/eye_to_hand.json
Normal file
28
calibration_mat/eye_to_hand.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"T": [
|
||||
[
|
||||
0.01868075138315295,
|
||||
0.8881359544145228,
|
||||
0.45920099738999437,
|
||||
-0.19284748723617093
|
||||
],
|
||||
[
|
||||
0.002854261818715398,
|
||||
-0.4593266423892484,
|
||||
0.8882628489252997,
|
||||
0.057154511710361816
|
||||
],
|
||||
[
|
||||
0.9998214254141742,
|
||||
-0.01528273756969839,
|
||||
-0.011115539354645598,
|
||||
-0.04187423675125192
|
||||
],
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
1.0
|
||||
]
|
||||
]
|
||||
}
|
||||
@@ -192,13 +192,13 @@ class _Calibration:
|
||||
np.linalg.inv(
|
||||
self._function(
|
||||
self._hand[i], self._hand[i + 1], self._hand[i + 2],
|
||||
self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
|
||||
self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
|
||||
)
|
||||
)
|
||||
if self._mode == 'eye_to_hand' else
|
||||
self._function(
|
||||
self._hand[i], self._hand[i + 1], self._hand[i + 2],
|
||||
self._hand[i + 3], self._hand[i + 4], self._hand[i + 5], self._hand[i + 6]
|
||||
self._hand[i + 6], self._hand[i + 3], self._hand[i + 4], self._hand[i + 5]
|
||||
)
|
||||
)
|
||||
self._Hcs.append(
|
||||
|
||||
@@ -52,7 +52,7 @@ def calculate(mode, hand_path, camera_path, hand_eye_path):
|
||||
for i in range(sum_):
|
||||
hand = [hand_all[i*7], hand_all[i*7+1], hand_all[i*7+2], hand_all[i*7+3], hand_all[i*7+4], hand_all[i*7+5], hand_all[i*7+6]]
|
||||
camera = [camera_all[i*7], camera_all[i*7+1], camera_all[i*7+2], camera_all[i*7+3], camera_all[i*7+4], camera_all[i*7+5], camera_all[i*7+6]]
|
||||
T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[3], hand[4], hand[5], hand[6])
|
||||
T_hand_in_base = get_matrix_quat(hand[0], hand[1], hand[2], hand[6], hand[3], hand[4], hand[5])
|
||||
T_cal_in_camera = get_matrix_quat(camera[0], camera[1], camera[2], camera[3], camera[4], camera[5], camera[6])
|
||||
|
||||
if mode == "eye_to_hand":
|
||||
|
||||
24
tools/pt2vino.py
Normal file
24
tools/pt2vino.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import argparse
|
||||
|
||||
from ultralytics import YOLO
|
||||
|
||||
|
||||
def main(checkpoint_path):
|
||||
model = YOLO(checkpoint_path)
|
||||
model.export(
|
||||
format="openvino",
|
||||
imgsz=(1280, 720),
|
||||
dynamic=True,
|
||||
simplify=True,
|
||||
half=True,
|
||||
workspace=0.8,
|
||||
batch=1,
|
||||
device="cpu"
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("checkpoint_path", type=str)
|
||||
args = parser.parse_args()
|
||||
main(args.checkpoint_path)
|
||||
# main(checkpoint_path="checkpoint/medical_sense-seg.pt")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"info": {},
|
||||
|
||||
"warring": {
|
||||
"0000": "Success",
|
||||
|
||||
"1000": "Detected object count is 0",
|
||||
"1001": "Depth crop is None",
|
||||
"1003": "Failed to detect a valid pose",
|
||||
|
||||
"1100": "Object point cloud contains excessive noise",
|
||||
"1101": "The point cloud is empty",
|
||||
|
||||
"1200": "The number of points is insufficient to compute an OBB",
|
||||
"1201": "PCA output vector is None",
|
||||
"1202": "This pose cannot be grab, and position refine fail",
|
||||
|
||||
"1300": "E2E model input data 'coors' are fewer than 128",
|
||||
"1301": "E2E model input data 'point_clouds' are fewer than 128",
|
||||
"1302": "The 'true num' of points is 0; No graspable points are available",
|
||||
"1303": "The model returned no predictions",
|
||||
"1304": "All rotation vector processing failed; no valid rotation matrix was generated"
|
||||
},
|
||||
|
||||
"error": {},
|
||||
|
||||
"fatal": {}
|
||||
}
|
||||
|
||||
@@ -48,5 +48,12 @@
|
||||
"ransac_voxel_size": 0.005,
|
||||
"icp_voxel_radius": [0.004, 0.002, 0.001],
|
||||
"icp_max_iter": [50, 30, 14]
|
||||
},
|
||||
"E2E_configs": {
|
||||
"checkpoint_path": "checkpoints/posenet.pt",
|
||||
"depth_scale": 1000.0,
|
||||
"depth_trunc": 3.0,
|
||||
"voxel_size": 0.010,
|
||||
"collision_thresh": 0.01
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"node_name": "bottle_detect_service",
|
||||
"node_name": "medical_sense_service",
|
||||
"output_boxes": "True",
|
||||
"output_masks": "False",
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"detect_mode": "Detect",
|
||||
"Detect_configs": {
|
||||
"checkpoint_path": "checkpoints/medical_sense-seg.pt",
|
||||
"confidence": 0.50,
|
||||
"confidence": 0.75,
|
||||
"classes": []
|
||||
},
|
||||
|
||||
@@ -25,6 +25,13 @@
|
||||
"PCA_configs": {
|
||||
"depth_scale": 1000.0,
|
||||
"depth_trunc": 3.0,
|
||||
"voxel_size": 0.010
|
||||
"voxel_size": 0.005
|
||||
},
|
||||
"E2E_configs": {
|
||||
"checkpoint_path": "checkpoints/posenet.pt",
|
||||
"depth_scale": 1000.0,
|
||||
"depth_trunc": 3.0,
|
||||
"voxel_size": 0.0005,
|
||||
"collision_thresh": 0.01
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
from launch import LaunchDescription
|
||||
from launch_ros.actions import Node
|
||||
from launch.actions import DeclareLaunchArgument, OpaqueFunction
|
||||
from launch.substitutions import LaunchConfiguration
|
||||
|
||||
import os
|
||||
import ast
|
||||
import json
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
|
||||
def generate_launch_description():
|
||||
args_detect = [
|
||||
DeclareLaunchArgument('output_boxes', default_value='True'),
|
||||
DeclareLaunchArgument('output_masks', default_value='False'),
|
||||
DeclareLaunchArgument('color_image_topic', default_value='/camera/color/image_raw'),
|
||||
DeclareLaunchArgument('depth_image_topic', default_value='/camera/depth/image_raw'),
|
||||
DeclareLaunchArgument('camera_info_topic', default_value='/camera/color/camera_info'),
|
||||
]
|
||||
|
||||
def create_detect_node(context):
|
||||
output_boxes = LaunchConfiguration('output_boxes').perform(context)
|
||||
output_masks = LaunchConfiguration('output_masks').perform(context)
|
||||
color_image_topic = LaunchConfiguration('color_image_topic').perform(context)
|
||||
depth_image_topic = LaunchConfiguration('depth_image_topic').perform(context)
|
||||
camera_info_topic = LaunchConfiguration('camera_info_topic').perform(context)
|
||||
|
||||
return [
|
||||
Node(
|
||||
package='vision_detect',
|
||||
executable='crossboard_detect_node',
|
||||
parameters=[{
|
||||
'output_boxes': output_boxes.lower() == 'true',
|
||||
'output_masks': output_masks.lower() == 'true',
|
||||
'color_image_topic': color_image_topic,
|
||||
'depth_image_topic': depth_image_topic,
|
||||
'camera_info_topic': camera_info_topic,
|
||||
}]
|
||||
)
|
||||
]
|
||||
|
||||
return LaunchDescription(args_detect + [
|
||||
OpaqueFunction(function=create_detect_node),
|
||||
])
|
||||
@@ -1,49 +0,0 @@
|
||||
from launch import LaunchDescription
|
||||
from launch_ros.actions import Node
|
||||
from launch.actions import DeclareLaunchArgument, OpaqueFunction
|
||||
from launch.substitutions import LaunchConfiguration
|
||||
|
||||
import os
|
||||
import ast
|
||||
import json
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
config_dir = os.path.join(share_dir, 'configs/launch_configs/detect_service.json')
|
||||
with open(config_dir, "r") as f:
|
||||
configs = json.load(f)
|
||||
|
||||
def generate_launch_description():
|
||||
args_detect = [
|
||||
DeclareLaunchArgument('checkpoint_name', default_value=configs['checkpoint_name']),
|
||||
DeclareLaunchArgument('output_boxes', default_value=configs['output_boxes']),
|
||||
DeclareLaunchArgument('output_masks', default_value=configs['output_masks']),
|
||||
DeclareLaunchArgument('set_confidence', default_value=configs['set_confidence']),
|
||||
DeclareLaunchArgument('classes', default_value=configs['classes']),
|
||||
]
|
||||
|
||||
def create_detect_node(context):
|
||||
checkpoint = LaunchConfiguration('checkpoint_name').perform(context)
|
||||
output_boxes = LaunchConfiguration('output_boxes').perform(context)
|
||||
output_masks = LaunchConfiguration('output_masks').perform(context)
|
||||
conf = LaunchConfiguration('set_confidence').perform(context)
|
||||
classes = LaunchConfiguration('classes').perform(context)
|
||||
|
||||
return [
|
||||
Node(
|
||||
package='vision_detect',
|
||||
executable='detect_service_node',
|
||||
parameters=[{
|
||||
'checkpoint_name': checkpoint,
|
||||
'output_boxes': output_boxes.lower() == 'true',
|
||||
'output_masks': output_masks.lower() == 'true',
|
||||
'set_confidence': float(conf),
|
||||
'classes': classes,
|
||||
}]
|
||||
)
|
||||
]
|
||||
|
||||
return LaunchDescription(args_detect + [
|
||||
OpaqueFunction(function=create_detect_node),
|
||||
])
|
||||
@@ -26,6 +26,7 @@ def generate_launch_description():
|
||||
package='vision_detect',
|
||||
executable='detect_node',
|
||||
name=configs['node_name'],
|
||||
output="screen",
|
||||
parameters=[{
|
||||
'configs_path': configs_path,
|
||||
}]
|
||||
|
||||
@@ -1,26 +1,47 @@
|
||||
import os
|
||||
from glob import glob
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
package_name = 'vision_detect'
|
||||
|
||||
openvino_files = []
|
||||
for model_dir in glob("checkpoints/*_openvino_model"):
|
||||
model_name = os.path.basename(model_dir)
|
||||
model_files = glob(os.path.join(model_dir, '*'))
|
||||
openvino_files.append(
|
||||
('share/' + package_name + '/checkpoints/' + model_name, model_files)
|
||||
)
|
||||
|
||||
data_files = [
|
||||
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
|
||||
('share/' + package_name, ['package.xml']),
|
||||
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
|
||||
('share/' + package_name + '/configs', glob('configs/*.json')),
|
||||
('share/' + package_name + '/configs/flexiv_configs',
|
||||
glob('configs/flexiv_configs/*.json')),
|
||||
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
|
||||
('share/' + package_name + '/configs/launch_configs',
|
||||
glob('configs/launch_configs/*.json')),
|
||||
('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
|
||||
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
|
||||
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
|
||||
|
||||
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
|
||||
glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
|
||||
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
|
||||
glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
|
||||
]
|
||||
|
||||
data_files.extend(openvino_files)
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version='0.0.0',
|
||||
packages=find_packages(exclude=['test']),
|
||||
data_files=[
|
||||
('share/ament_index/resource_index/packages', ['resource/' + package_name]),
|
||||
('share/' + package_name, ['package.xml']),
|
||||
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
|
||||
('share/' + package_name + '/configs', glob('configs/*.json')),
|
||||
('share/' + package_name + '/configs/flexiv_configs', glob('configs/flexiv_configs/*.json')),
|
||||
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
|
||||
('share/' + package_name + '/configs/launch_configs', glob('configs/launch_configs/*.json')),
|
||||
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
|
||||
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
|
||||
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
|
||||
],
|
||||
data_files=data_files,
|
||||
install_requires=['setuptools'],
|
||||
zip_safe=True,
|
||||
include_package_data=True,
|
||||
@@ -31,10 +52,6 @@ setup(
|
||||
tests_require=['pytest'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
# 'detect_service_node = vision_detect.detect_service:main',
|
||||
# 'detect_topic_node = vision_detect.detect_topic:main',
|
||||
|
||||
# 'box_detect_service_node = vision_detect.detect_box_service:main',
|
||||
# 'red_detect_topic_node = vision_detect.detect_red_topic:main',
|
||||
# 'red_detect_service_node = vision_detect.detect_red_service:main',
|
||||
|
||||
@@ -42,11 +59,8 @@ setup(
|
||||
'flexiv_detect_service_node = vision_detect.flexivaidk_detect_service:main',
|
||||
|
||||
'sub_pose_node = vision_detect.sub_pose:main',
|
||||
# 'calibration_node = vision_detect.hand_eye_calibration:main',
|
||||
# 'crossboard_detect_node = vision_detect.crossboard_detect:main',
|
||||
'service_client_node = vision_detect.service_client:main',
|
||||
# 'get_camera_pose_node = vision_detect.get_camera_pose:main',
|
||||
# 'calculate_node = vision_detect.calculate:main',
|
||||
'get_camera_pose_node = vision_detect.get_camera_pose:main',
|
||||
|
||||
'detect_node = vision_detect.detect_node:main',
|
||||
],
|
||||
|
||||
1
vision_detect/vision_detect/VisionDetect/net/__init__.py
Normal file
1
vision_detect/vision_detect/VisionDetect/net/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .gsnet import *
|
||||
@@ -0,0 +1,7 @@
|
||||
from .models.graspnet import *
|
||||
from .utils.collision_detector import *
|
||||
|
||||
|
||||
__all__ = [
|
||||
"GraspNet", "pred_decode", "ModelFreeCollisionDetector"
|
||||
]
|
||||
@@ -0,0 +1,132 @@
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import argparse
|
||||
from PIL import Image
|
||||
import torch
|
||||
import open3d as o3d
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(ROOT_DIR)
|
||||
|
||||
from models.graspnet import GraspNet, pred_decode
|
||||
import collections.abc as container_abcs
|
||||
import MinkowskiEngine as ME
|
||||
from utils.collision_detector import ModelFreeCollisionDetector
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--checkpoint_path', default='checkpoints/minkuresunet_realsense.tar')
|
||||
parser.add_argument('--dump_dir', help='Dump dir to save outputs', default='results/')
|
||||
parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
|
||||
parser.add_argument('--collision_thresh', type=float, default=0.01,
|
||||
help='Collision Threshold in collision detection [default: 0.01]')
|
||||
parser.add_argument('--voxel_size_cd', type=float, default=0.01,
|
||||
help='Voxel Size for collision detection')
|
||||
parser.add_argument('--infer', action='store_true', default=True)
|
||||
parser.add_argument('--vis', action='store_true', default=True)
|
||||
cfgs = parser.parse_args()
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------- GLOBAL CONFIG BEG
|
||||
# if not os.path.exists(cfgs.dump_dir):
|
||||
# os.mkdir(cfgs.dump_dir)
|
||||
|
||||
def minkowski_collate_fn(list_data):
|
||||
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
|
||||
[d["feats"] for d in list_data])
|
||||
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
|
||||
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
|
||||
coordinates_batch, features_batch, return_index=True, return_inverse=True)
|
||||
res = {
|
||||
"coors": coordinates_batch,
|
||||
"feats": features_batch,
|
||||
"quantize2original": quantize2original
|
||||
}
|
||||
|
||||
def collate_fn_(batch):
|
||||
if type(batch[0]).__module__ == 'numpy':
|
||||
return torch.stack([torch.from_numpy(b) for b in batch], 0)
|
||||
elif isinstance(batch[0], container_abcs.Sequence):
|
||||
return [[torch.from_numpy(sample) for sample in b] for b in batch]
|
||||
elif isinstance(batch[0], container_abcs.Mapping):
|
||||
for key in batch[0]:
|
||||
if key == 'coors' or key == 'feats':
|
||||
continue
|
||||
res[key] = collate_fn_([d[key] for d in batch])
|
||||
return res
|
||||
res = collate_fn_(list_data)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def data_process(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
|
||||
index = 0
|
||||
camera_poses = np.array([np.eye(4)]) # 相机位姿
|
||||
align_mat = np.eye(4) # Camera_0 相对桌面的位姿
|
||||
trans = np.dot(align_mat, camera_poses[int(index)])
|
||||
pcd.transform(trans)
|
||||
points = np.asarray(pcd.points)
|
||||
|
||||
ret_dict = {
|
||||
'point_clouds': points.astype(np.float32),
|
||||
'coors': points.astype(np.float32) / voxel_size,
|
||||
'feats': np.ones_like(points).astype(np.float32),
|
||||
}
|
||||
return ret_dict
|
||||
|
||||
|
||||
def get_grasp_dict(preds):
|
||||
grasp_dict = {
|
||||
"score": preds[:, 0],
|
||||
"width": preds[:, 1],
|
||||
"height": preds[:, 2],
|
||||
"depth": preds[:, 3],
|
||||
"rotation": preds[:, 4:13].reshape(-1, 3, 3),
|
||||
"translation": preds[:, 13:16].reshape(-1, 3),
|
||||
"object_id": preds[:, 16]
|
||||
}
|
||||
return grasp_dict
|
||||
|
||||
|
||||
def inference(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
|
||||
data_input = data_process(pcd, voxel_size)
|
||||
batch_data = minkowski_collate_fn([data_input])
|
||||
net = GraspNet(seed_feat_dim=512, is_training=False)
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
# device = torch.device("cpu")
|
||||
net.to(device)
|
||||
# Load checkpoint
|
||||
checkpoint_path = os.path.join(ROOT_DIR, cfgs.checkpoint_path)
|
||||
net.load_state_dict(checkpoint_path)
|
||||
|
||||
net.eval()
|
||||
|
||||
for key in batch_data:
|
||||
batch_data[key] = batch_data[key].to(device)
|
||||
# Forward pass
|
||||
with torch.no_grad():
|
||||
end_points = net(batch_data)
|
||||
grasp_preds = pred_decode(end_points)
|
||||
|
||||
preds = grasp_preds[0].detach().cpu().numpy()
|
||||
|
||||
sorted_index = np.argsort(-preds[:, 0])
|
||||
preds = preds[sorted_index]
|
||||
preds = preds[:10]
|
||||
|
||||
# collision detection
|
||||
if cfgs.collision_thresh > 0:
|
||||
cloud = data_input['point_clouds']
|
||||
mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
|
||||
collision_mask = mfcdetector.detect(get_grasp_dict(preds), approach_dist=0.05,
|
||||
collision_thresh=cfgs.collision_thresh)
|
||||
preds = preds[~collision_mask]
|
||||
|
||||
return preds
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data_dict = data_process()
|
||||
inference(data_dict)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,224 @@
|
||||
import MinkowskiEngine as ME
|
||||
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
|
||||
from .resnet import ResNetBase
|
||||
|
||||
|
||||
class MinkUNetBase(ResNetBase):
|
||||
BLOCK = None
|
||||
PLANES = None
|
||||
DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
|
||||
INIT_DIM = 32
|
||||
OUT_TENSOR_STRIDE = 1
|
||||
|
||||
# To use the model, must call initialize_coords before forward pass.
|
||||
# Once data is processed, call clear to reset the model before calling
|
||||
# initialize_coords
|
||||
def __init__(self, in_channels, out_channels, D=3):
|
||||
ResNetBase.__init__(self, in_channels, out_channels, D)
|
||||
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
# Output of the first conv concated to conv6
|
||||
self.inplanes = self.INIT_DIM
|
||||
self.conv0p1s1 = ME.MinkowskiConvolution(
|
||||
in_channels, self.inplanes, kernel_size=5, dimension=D)
|
||||
|
||||
self.bn0 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.conv1p1s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn1 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.block1 = self._make_layer(self.BLOCK, self.PLANES[0],
|
||||
self.LAYERS[0])
|
||||
|
||||
self.conv2p2s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn2 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
|
||||
self.block2 = self._make_layer(self.BLOCK, self.PLANES[1],
|
||||
self.LAYERS[1])
|
||||
|
||||
self.conv3p4s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
|
||||
self.bn3 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
self.block3 = self._make_layer(self.BLOCK, self.PLANES[2],
|
||||
self.LAYERS[2])
|
||||
|
||||
self.conv4p8s2 = ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
|
||||
self.bn4 = ME.MinkowskiBatchNorm(self.inplanes)
|
||||
self.block4 = self._make_layer(self.BLOCK, self.PLANES[3],
|
||||
self.LAYERS[3])
|
||||
|
||||
self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4])
|
||||
|
||||
self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion
|
||||
self.block5 = self._make_layer(self.BLOCK, self.PLANES[4],
|
||||
self.LAYERS[4])
|
||||
self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5])
|
||||
|
||||
self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion
|
||||
self.block6 = self._make_layer(self.BLOCK, self.PLANES[5],
|
||||
self.LAYERS[5])
|
||||
self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6])
|
||||
|
||||
self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion
|
||||
self.block7 = self._make_layer(self.BLOCK, self.PLANES[6],
|
||||
self.LAYERS[6])
|
||||
self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose(
|
||||
self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=D)
|
||||
self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7])
|
||||
|
||||
self.inplanes = self.PLANES[7] + self.INIT_DIM
|
||||
self.block8 = self._make_layer(self.BLOCK, self.PLANES[7],
|
||||
self.LAYERS[7])
|
||||
|
||||
self.final = ME.MinkowskiConvolution(
|
||||
self.PLANES[7] * self.BLOCK.expansion,
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
bias=True,
|
||||
dimension=D)
|
||||
self.relu = ME.MinkowskiReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv0p1s1(x)
|
||||
out = self.bn0(out)
|
||||
out_p1 = self.relu(out)
|
||||
|
||||
out = self.conv1p1s2(out_p1)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out_b1p2 = self.block1(out)
|
||||
|
||||
out = self.conv2p2s2(out_b1p2)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
out_b2p4 = self.block2(out)
|
||||
|
||||
out = self.conv3p4s2(out_b2p4)
|
||||
out = self.bn3(out)
|
||||
out = self.relu(out)
|
||||
out_b3p8 = self.block3(out)
|
||||
|
||||
# tensor_stride=16
|
||||
out = self.conv4p8s2(out_b3p8)
|
||||
out = self.bn4(out)
|
||||
out = self.relu(out)
|
||||
out = self.block4(out)
|
||||
|
||||
# tensor_stride=8
|
||||
out = self.convtr4p16s2(out)
|
||||
out = self.bntr4(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b3p8)
|
||||
out = self.block5(out)
|
||||
|
||||
# tensor_stride=4
|
||||
out = self.convtr5p8s2(out)
|
||||
out = self.bntr5(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b2p4)
|
||||
out = self.block6(out)
|
||||
|
||||
# tensor_stride=2
|
||||
out = self.convtr6p4s2(out)
|
||||
out = self.bntr6(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_b1p2)
|
||||
out = self.block7(out)
|
||||
|
||||
# tensor_stride=1
|
||||
out = self.convtr7p2s2(out)
|
||||
out = self.bntr7(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = ME.cat(out, out_p1)
|
||||
out = self.block8(out)
|
||||
|
||||
return self.final(out)
|
||||
|
||||
|
||||
class MinkUNet14(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1, 1, 1, 1, 1)
|
||||
|
||||
|
||||
class MinkUNet18(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet34(MinkUNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet50(MinkUNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet101(MinkUNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (2, 3, 4, 23, 2, 2, 2, 2)
|
||||
|
||||
|
||||
class MinkUNet14A(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
|
||||
|
||||
|
||||
class MinkUNet14B(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet14C(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 192, 192, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet14Dori(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet14E(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet14D(MinkUNet14):
|
||||
PLANES = (32, 64, 128, 256, 192, 192, 192, 192)
|
||||
|
||||
|
||||
class MinkUNet18A(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
|
||||
|
||||
|
||||
class MinkUNet18B(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
|
||||
|
||||
|
||||
class MinkUNet18D(MinkUNet18):
|
||||
PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
|
||||
|
||||
|
||||
class MinkUNet34A(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 64, 64)
|
||||
|
||||
|
||||
class MinkUNet34B(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 64, 32)
|
||||
|
||||
|
||||
class MinkUNet34C(MinkUNet34):
|
||||
PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
|
||||
@@ -0,0 +1,128 @@
|
||||
""" GraspNet baseline model definition.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
import logging
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import MinkowskiEngine as ME
|
||||
|
||||
from .backbone_resunet14 import MinkUNet14D
|
||||
from .modules import ApproachNet, GraspableNet, CloudCrop, SWADNet
|
||||
from ..utils.loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT
|
||||
from ..utils.label_generation import (process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix)
|
||||
from ...pointnet2.pointnet2_utils import furthest_point_sample, gather_operation
|
||||
|
||||
|
||||
class GraspNet(nn.Module):
|
||||
def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):
|
||||
super().__init__()
|
||||
self.is_training = is_training
|
||||
self.seed_feature_dim = seed_feat_dim
|
||||
self.num_depth = NUM_DEPTH
|
||||
self.num_angle = NUM_ANGLE
|
||||
self.M_points = M_POINT
|
||||
self.num_view = NUM_VIEW
|
||||
|
||||
self.backbone = MinkUNet14D(in_channels=3, out_channels=self.seed_feature_dim, D=3)
|
||||
self.graspable = GraspableNet(seed_feature_dim=self.seed_feature_dim)
|
||||
self.rotation = ApproachNet(self.num_view, seed_feature_dim=self.seed_feature_dim, is_training=self.is_training)
|
||||
self.crop = CloudCrop(nsample=16, cylinder_radius=cylinder_radius, seed_feature_dim=self.seed_feature_dim)
|
||||
self.swad = SWADNet(num_angle=self.num_angle, num_depth=self.num_depth)
|
||||
|
||||
def forward(self, end_points):
|
||||
seed_xyz = end_points['point_clouds'] # use all sampled point cloud, B*Ns*3
|
||||
B, point_num, _ = seed_xyz.shape # batch _size
|
||||
# point-wise features
|
||||
coordinates_batch = end_points['coors']
|
||||
features_batch = end_points['feats']
|
||||
mink_input = ME.SparseTensor(features_batch, coordinates=coordinates_batch)
|
||||
seed_features = self.backbone(mink_input).F
|
||||
seed_features = seed_features[end_points['quantize2original']].view(B, point_num, -1).transpose(1, 2)
|
||||
|
||||
end_points = self.graspable(seed_features, end_points)
|
||||
seed_features_flipped = seed_features.transpose(1, 2) # B*Ns*feat_dim
|
||||
objectness_score = end_points['objectness_score']
|
||||
graspness_score = end_points['graspness_score'].squeeze(1)
|
||||
objectness_pred = torch.argmax(objectness_score, 1)
|
||||
objectness_mask = (objectness_pred == 1)
|
||||
graspness_mask = graspness_score > GRASPNESS_THRESHOLD
|
||||
graspable_mask = objectness_mask & graspness_mask
|
||||
|
||||
seed_features_graspable = []
|
||||
seed_xyz_graspable = []
|
||||
graspable_num_batch = 0.
|
||||
for i in range(B):
|
||||
cur_mask = graspable_mask[i]
|
||||
graspable_num_batch += cur_mask.sum()
|
||||
cur_feat = seed_features_flipped[i][cur_mask] # Ns*feat_dim
|
||||
cur_seed_xyz = seed_xyz[i][cur_mask] # Ns*3 Ns=0->error
|
||||
|
||||
logging.error(f"cur_mask: {cur_mask.shape}")
|
||||
logging.error(f"true num: {torch.sum(cur_mask).item()}")
|
||||
logging.error(f"cur_seed_xyz: {cur_seed_xyz.shape}")
|
||||
|
||||
if torch.sum(cur_mask).item() == 0:
|
||||
return None
|
||||
|
||||
cur_seed_xyz = cur_seed_xyz.unsqueeze(0) # 1*Ns*3
|
||||
fps_idxs = furthest_point_sample(cur_seed_xyz, self.M_points)
|
||||
cur_seed_xyz_flipped = cur_seed_xyz.transpose(1, 2).contiguous() # 1*3*Ns
|
||||
cur_seed_xyz = gather_operation(cur_seed_xyz_flipped, fps_idxs).transpose(1, 2).squeeze(0).contiguous() # Ns*3
|
||||
cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous() # 1*feat_dim*Ns
|
||||
cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns
|
||||
|
||||
seed_features_graspable.append(cur_feat)
|
||||
seed_xyz_graspable.append(cur_seed_xyz)
|
||||
|
||||
seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0) # B*Ns*3
|
||||
seed_features_graspable = torch.stack(seed_features_graspable) # B*feat_dim*Ns
|
||||
end_points['xyz_graspable'] = seed_xyz_graspable
|
||||
end_points['graspable_count_stage1'] = graspable_num_batch / B
|
||||
|
||||
end_points, res_feat = self.rotation(seed_features_graspable, end_points)
|
||||
seed_features_graspable = seed_features_graspable + res_feat
|
||||
|
||||
if self.is_training:
|
||||
end_points = process_grasp_labels(end_points)
|
||||
grasp_top_views_rot, end_points = match_grasp_view_and_label(end_points)
|
||||
else:
|
||||
grasp_top_views_rot = end_points['grasp_top_view_rot']
|
||||
|
||||
group_features = self.crop(seed_xyz_graspable.contiguous(), seed_features_graspable.contiguous(), grasp_top_views_rot)
|
||||
end_points = self.swad(group_features, end_points)
|
||||
|
||||
return end_points
|
||||
|
||||
|
||||
def pred_decode(end_points):
|
||||
batch_size = len(end_points['point_clouds'])
|
||||
grasp_preds = []
|
||||
for i in range(batch_size):
|
||||
grasp_center = end_points['xyz_graspable'][i].float()
|
||||
|
||||
grasp_score = end_points['grasp_score_pred'][i].float()
|
||||
grasp_score = grasp_score.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
|
||||
grasp_score, grasp_score_inds = torch.max(grasp_score, -1) # [M_POINT]
|
||||
grasp_score = grasp_score.view(-1, 1)
|
||||
grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12
|
||||
grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01
|
||||
grasp_depth = grasp_depth.view(-1, 1)
|
||||
grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.
|
||||
grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)
|
||||
grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))
|
||||
grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)
|
||||
|
||||
approaching = -end_points['grasp_top_view_xyz'][i].float()
|
||||
grasp_rot = batch_viewpoint_params_to_matrix(approaching, grasp_angle)
|
||||
grasp_rot = grasp_rot.view(M_POINT, 9)
|
||||
|
||||
# merge preds
|
||||
grasp_height = 0.02 * torch.ones_like(grasp_score)
|
||||
obj_ids = -1 * torch.ones_like(grasp_score)
|
||||
grasp_preds.append(
|
||||
torch.cat([grasp_score, grasp_width, grasp_height, grasp_depth, grasp_rot, grasp_center, obj_ids], axis=-1))
|
||||
return grasp_preds
|
||||
@@ -0,0 +1,80 @@
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
|
||||
|
||||
def get_loss(end_points):
|
||||
objectness_loss, end_points = compute_objectness_loss(end_points)
|
||||
graspness_loss, end_points = compute_graspness_loss(end_points)
|
||||
view_loss, end_points = compute_view_graspness_loss(end_points)
|
||||
score_loss, end_points = compute_score_loss(end_points)
|
||||
width_loss, end_points = compute_width_loss(end_points)
|
||||
loss = objectness_loss + 10 * graspness_loss + 100 * view_loss + 15 * score_loss + 10 * width_loss
|
||||
end_points['loss/overall_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_objectness_loss(end_points):
|
||||
criterion = nn.CrossEntropyLoss(reduction='mean')
|
||||
objectness_score = end_points['objectness_score']
|
||||
objectness_label = end_points['objectness_label']
|
||||
loss = criterion(objectness_score, objectness_label)
|
||||
end_points['loss/stage1_objectness_loss'] = loss
|
||||
|
||||
objectness_pred = torch.argmax(objectness_score, 1)
|
||||
end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
|
||||
end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
|
||||
objectness_pred == 1].float().mean()
|
||||
end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
|
||||
objectness_label == 1].float().mean()
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_graspness_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='none')
|
||||
graspness_score = end_points['graspness_score'].squeeze(1)
|
||||
graspness_label = end_points['graspness_label'].squeeze(-1)
|
||||
loss_mask = end_points['objectness_label'].bool()
|
||||
loss = criterion(graspness_score, graspness_label)
|
||||
loss = loss[loss_mask]
|
||||
loss = loss.mean()
|
||||
|
||||
graspness_score_c = graspness_score.detach().clone()[loss_mask]
|
||||
graspness_label_c = graspness_label.detach().clone()[loss_mask]
|
||||
graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
|
||||
graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
|
||||
rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
|
||||
end_points['stage1_graspness_acc_rank_error'] = rank_error
|
||||
|
||||
end_points['loss/stage1_graspness_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_view_graspness_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='mean')
|
||||
view_score = end_points['view_score']
|
||||
view_label = end_points['batch_grasp_view_graspness']
|
||||
loss = criterion(view_score, view_label)
|
||||
end_points['loss/stage2_view_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_score_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='mean')
|
||||
grasp_score_pred = end_points['grasp_score_pred']
|
||||
grasp_score_label = end_points['batch_grasp_score']
|
||||
loss = criterion(grasp_score_pred, grasp_score_label)
|
||||
|
||||
end_points['loss/stage3_score_loss'] = loss
|
||||
return loss, end_points
|
||||
|
||||
|
||||
def compute_width_loss(end_points):
|
||||
criterion = nn.SmoothL1Loss(reduction='none')
|
||||
grasp_width_pred = end_points['grasp_width_pred']
|
||||
grasp_width_label = end_points['batch_grasp_width'] * 10
|
||||
loss = criterion(grasp_width_pred, grasp_width_label)
|
||||
grasp_score_label = end_points['batch_grasp_score']
|
||||
loss_mask = grasp_score_label > 0
|
||||
loss = loss[loss_mask].mean()
|
||||
end_points['loss/stage3_width_loss'] = loss
|
||||
return loss, end_points
|
||||
@@ -0,0 +1,116 @@
|
||||
# import os
|
||||
# import sys
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
# ROOT_DIR = os.path.dirname(BASE_DIR)
|
||||
# sys.path.append(ROOT_DIR)
|
||||
|
||||
from ...pointnet2 import pytorch_utils as pt_utils
|
||||
from ...pointnet2.pointnet2_utils import CylinderQueryAndGroup
|
||||
from ..utils.loss_utils import generate_grasp_views, batch_viewpoint_params_to_matrix
|
||||
|
||||
|
||||
class GraspableNet(nn.Module):
|
||||
def __init__(self, seed_feature_dim):
|
||||
super().__init__()
|
||||
self.in_dim = seed_feature_dim # default: 512
|
||||
self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)
|
||||
|
||||
def forward(self, seed_features, end_points):
|
||||
graspable_score = self.conv_graspable(seed_features) # (B, 3, num_seed)
|
||||
end_points['objectness_score'] = graspable_score[:, :2] # (B, 3, num_seed)
|
||||
end_points['graspness_score'] = graspable_score[:, 2] # (B, 3, num_seed)
|
||||
return end_points
|
||||
|
||||
|
||||
class ApproachNet(nn.Module):
|
||||
def __init__(self, num_view, seed_feature_dim, is_training=True):
|
||||
super().__init__()
|
||||
self.num_view = num_view
|
||||
self.in_dim = seed_feature_dim
|
||||
self.is_training = is_training
|
||||
self.conv1 = nn.Conv1d(self.in_dim, self.in_dim, 1)
|
||||
self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)
|
||||
|
||||
def forward(self, seed_features, end_points):
|
||||
B, _, num_seed = seed_features.size()
|
||||
res_features = F.relu(self.conv1(seed_features), inplace=True)
|
||||
features = self.conv2(res_features)
|
||||
view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)
|
||||
end_points['view_score'] = view_score
|
||||
|
||||
if self.is_training:
|
||||
# normalize view graspness score to 0~1
|
||||
view_score_ = view_score.clone().detach()
|
||||
view_score_max, _ = torch.max(view_score_, dim=2)
|
||||
view_score_min, _ = torch.min(view_score_, dim=2)
|
||||
view_score_max = view_score_max.unsqueeze(-1).expand(-1, -1, self.num_view)
|
||||
view_score_min = view_score_min.unsqueeze(-1).expand(-1, -1, self.num_view)
|
||||
view_score_ = (view_score_ - view_score_min) / (view_score_max - view_score_min + 1e-8)
|
||||
|
||||
top_view_inds = []
|
||||
for i in range(B):
|
||||
top_view_inds_batch = torch.multinomial(view_score_[i], 1, replacement=False)
|
||||
top_view_inds.append(top_view_inds_batch)
|
||||
top_view_inds = torch.stack(top_view_inds, dim=0).squeeze(-1) # B, num_seed
|
||||
else:
|
||||
_, top_view_inds = torch.max(view_score, dim=2) # (B, num_seed)
|
||||
|
||||
top_view_inds_ = top_view_inds.view(B, num_seed, 1, 1).expand(-1, -1, -1, 3).contiguous()
|
||||
template_views = generate_grasp_views(self.num_view).to(features.device) # (num_view, 3)
|
||||
template_views = template_views.view(1, 1, self.num_view, 3).expand(B, num_seed, -1, -1).contiguous()
|
||||
vp_xyz = torch.gather(template_views, 2, top_view_inds_).squeeze(2) # (B, num_seed, 3)
|
||||
vp_xyz_ = vp_xyz.view(-1, 3)
|
||||
batch_angle = torch.zeros(vp_xyz_.size(0), dtype=vp_xyz.dtype, device=vp_xyz.device)
|
||||
vp_rot = batch_viewpoint_params_to_matrix(-vp_xyz_, batch_angle).view(B, num_seed, 3, 3)
|
||||
end_points['grasp_top_view_xyz'] = vp_xyz
|
||||
end_points['grasp_top_view_rot'] = vp_rot
|
||||
|
||||
end_points['grasp_top_view_inds'] = top_view_inds
|
||||
return end_points, res_features
|
||||
|
||||
|
||||
class CloudCrop(nn.Module):
|
||||
def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):
|
||||
super().__init__()
|
||||
self.nsample = nsample
|
||||
self.in_dim = seed_feature_dim
|
||||
self.cylinder_radius = cylinder_radius
|
||||
mlps = [3 + self.in_dim, 256, 256] # use xyz, so plus 3
|
||||
|
||||
self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,
|
||||
use_xyz=True, normalize_xyz=True)
|
||||
self.mlps = pt_utils.SharedMLP(mlps, bn=True)
|
||||
|
||||
def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
|
||||
grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,
|
||||
seed_features_graspable) # B*3 + feat_dim*M*K
|
||||
new_features = self.mlps(grouped_feature) # (batch_size, mlps[-1], M, K)
|
||||
new_features = F.max_pool2d(new_features, kernel_size=[1, new_features.size(3)]) # (batch_size, mlps[-1], M, 1)
|
||||
new_features = new_features.squeeze(-1) # (batch_size, mlps[-1], M)
|
||||
return new_features
|
||||
|
||||
|
||||
class SWADNet(nn.Module):
|
||||
def __init__(self, num_angle, num_depth):
|
||||
super().__init__()
|
||||
self.num_angle = num_angle
|
||||
self.num_depth = num_depth
|
||||
|
||||
self.conv1 = nn.Conv1d(256, 256, 1) # input feat dim need to be consistent with CloudCrop module
|
||||
self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)
|
||||
|
||||
def forward(self, vp_features, end_points):
|
||||
B, _, num_seed = vp_features.size()
|
||||
vp_features = F.relu(self.conv1(vp_features), inplace=True)
|
||||
vp_features = self.conv_swad(vp_features)
|
||||
vp_features = vp_features.view(B, 2, self.num_angle, self.num_depth, num_seed)
|
||||
vp_features = vp_features.permute(0, 1, 4, 2, 3)
|
||||
|
||||
# split prediction
|
||||
end_points['grasp_score_pred'] = vp_features[:, 0] # B * num_seed * num angle * num_depth
|
||||
end_points['grasp_width_pred'] = vp_features[:, 1]
|
||||
return end_points
|
||||
@@ -0,0 +1,196 @@
|
||||
import torch.nn as nn
|
||||
|
||||
try:
|
||||
import open3d as o3d
|
||||
except ImportError:
|
||||
raise ImportError("Please install open3d with `pip install open3d`.")
|
||||
|
||||
import MinkowskiEngine as ME
|
||||
from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
|
||||
|
||||
|
||||
class ResNetBase(nn.Module):
|
||||
BLOCK = None
|
||||
LAYERS = ()
|
||||
INIT_DIM = 64
|
||||
PLANES = (64, 128, 256, 512)
|
||||
|
||||
def __init__(self, in_channels, out_channels, D=3):
|
||||
nn.Module.__init__(self)
|
||||
self.D = D
|
||||
assert self.BLOCK is not None
|
||||
|
||||
self.network_initialization(in_channels, out_channels, D)
|
||||
self.weight_initialization()
|
||||
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
|
||||
self.inplanes = self.INIT_DIM
|
||||
self.conv1 = nn.Sequential(
|
||||
ME.MinkowskiConvolution(
|
||||
in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D
|
||||
),
|
||||
ME.MinkowskiInstanceNorm(self.inplanes),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),
|
||||
)
|
||||
|
||||
self.layer1 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2
|
||||
)
|
||||
self.layer2 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2
|
||||
)
|
||||
self.layer3 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2
|
||||
)
|
||||
self.layer4 = self._make_layer(
|
||||
self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2
|
||||
)
|
||||
|
||||
self.conv5 = nn.Sequential(
|
||||
ME.MinkowskiDropout(),
|
||||
ME.MinkowskiConvolution(
|
||||
self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D
|
||||
),
|
||||
ME.MinkowskiInstanceNorm(self.inplanes),
|
||||
ME.MinkowskiGELU(),
|
||||
)
|
||||
|
||||
self.glob_pool = ME.MinkowskiGlobalMaxPooling()
|
||||
|
||||
self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)
|
||||
|
||||
def weight_initialization(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, ME.MinkowskiConvolution):
|
||||
ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu")
|
||||
|
||||
if isinstance(m, ME.MinkowskiBatchNorm):
|
||||
nn.init.constant_(m.bn.weight, 1)
|
||||
nn.init.constant_(m.bn.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
ME.MinkowskiConvolution(
|
||||
self.inplanes,
|
||||
planes * block.expansion,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
dimension=self.D,
|
||||
),
|
||||
ME.MinkowskiBatchNorm(planes * block.expansion),
|
||||
)
|
||||
layers = []
|
||||
layers.append(
|
||||
block(
|
||||
self.inplanes,
|
||||
planes,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
downsample=downsample,
|
||||
dimension=self.D,
|
||||
)
|
||||
)
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(
|
||||
block(
|
||||
self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D
|
||||
)
|
||||
)
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x: ME.SparseTensor):
|
||||
x = self.conv1(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.conv5(x)
|
||||
x = self.glob_pool(x)
|
||||
return self.final(x)
|
||||
|
||||
|
||||
class ResNet14(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1)
|
||||
|
||||
|
||||
class ResNet18(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2)
|
||||
|
||||
|
||||
class ResNet34(ResNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResNet50(ResNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResNet101(ResNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 23, 3)
|
||||
|
||||
|
||||
class ResFieldNetBase(ResNetBase):
|
||||
def network_initialization(self, in_channels, out_channels, D):
|
||||
field_ch = 32
|
||||
field_ch2 = 64
|
||||
self.field_network = nn.Sequential(
|
||||
ME.MinkowskiSinusoidal(in_channels, field_ch),
|
||||
ME.MinkowskiBatchNorm(field_ch),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiLinear(field_ch, field_ch),
|
||||
ME.MinkowskiBatchNorm(field_ch),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiToSparseTensor(),
|
||||
)
|
||||
self.field_network2 = nn.Sequential(
|
||||
ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),
|
||||
ME.MinkowskiBatchNorm(field_ch2),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiLinear(field_ch2, field_ch2),
|
||||
ME.MinkowskiBatchNorm(field_ch2),
|
||||
ME.MinkowskiReLU(inplace=True),
|
||||
ME.MinkowskiToSparseTensor(),
|
||||
)
|
||||
|
||||
ResNetBase.network_initialization(self, field_ch2, out_channels, D)
|
||||
|
||||
def forward(self, x: ME.TensorField):
|
||||
otensor = self.field_network(x)
|
||||
otensor2 = self.field_network2(otensor.cat_slice(x))
|
||||
return ResNetBase.forward(self, otensor2)
|
||||
|
||||
|
||||
class ResFieldNet14(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (1, 1, 1, 1)
|
||||
|
||||
|
||||
class ResFieldNet18(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (2, 2, 2, 2)
|
||||
|
||||
|
||||
class ResFieldNet34(ResFieldNetBase):
|
||||
BLOCK = BasicBlock
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResFieldNet50(ResFieldNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 6, 3)
|
||||
|
||||
|
||||
class ResFieldNet101(ResFieldNetBase):
|
||||
BLOCK = Bottleneck
|
||||
LAYERS = (3, 4, 23, 3)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,132 @@
|
||||
""" Collision detection to remove collided grasp pose predictions.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
|
||||
|
||||
class ModelFreeCollisionDetector():
|
||||
""" Collision detection in scenes without object labels. Current finger width and length are fixed.
|
||||
|
||||
Input:
|
||||
scene_points: [numpy.ndarray, (N,3), numpy.float32]
|
||||
the scene points to detect
|
||||
voxel_size: [float]
|
||||
used for downsample
|
||||
|
||||
Example usage:
|
||||
mfcdetector = ModelFreeCollisionDetector(scene_points, voxel_size=0.005)
|
||||
collision_mask = mfcdetector.detect(grasp_group, approach_dist=0.03)
|
||||
collision_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05, return_ious=True)
|
||||
collision_mask, empty_mask = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
|
||||
return_empty_grasp=True, empty_thresh=0.01)
|
||||
collision_mask, empty_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
|
||||
return_empty_grasp=True, empty_thresh=0.01, return_ious=True)
|
||||
"""
|
||||
def __init__(self, scene_points, voxel_size=0.005):
|
||||
self.finger_width = 0.005
|
||||
self.finger_length = 0.09
|
||||
self.voxel_size = voxel_size
|
||||
scene_cloud = o3d.geometry.PointCloud()
|
||||
scene_cloud.points = o3d.utility.Vector3dVector(scene_points)
|
||||
scene_cloud = scene_cloud.voxel_down_sample(voxel_size)
|
||||
self.scene_points = np.array(scene_cloud.points) # (K, 3)
|
||||
|
||||
def detect(self, preds, approach_dist=0.03, collision_thresh=0.05, return_empty_grasp=False, empty_thresh=0.01, return_ious=False):
|
||||
""" Detect collision of grasps.
|
||||
|
||||
Input:
|
||||
grasp_group: [GraspGroup, M grasps]
|
||||
the grasps to check
|
||||
approach_dist: [float]
|
||||
the distance for a gripper to move along approaching direction before grasping
|
||||
this shifting space requires no voxel_sizepoint either
|
||||
collision_thresh: [float]
|
||||
if global collision iou is greater than this threshold,
|
||||
a collision is detected
|
||||
return_empty_grasp: [bool]
|
||||
if True, return a mask to imply whether there are objects in a grasp
|
||||
empty_thresh: [float]
|
||||
if inner space iou is smaller than this threshold,
|
||||
a collision is detected
|
||||
only set when [return_empty_grasp] is True
|
||||
return_ious: [bool]
|
||||
if True, return global collision iou and part collision ious
|
||||
|
||||
Output:
|
||||
collision_mask: [numpy.ndarray, (M,), numpy.bool]
|
||||
True implies collision
|
||||
[optional] empty_mask: [numpy.ndarray, (M,), numpy.bool]
|
||||
True implies empty grasp
|
||||
only returned when [return_empty_grasp] is True
|
||||
[optional] iou_list: list of [numpy.ndarray, (M,), numpy.float32]
|
||||
global and part collision ious, containing
|
||||
[global_iou, left_iou, right_iou, bottom_iou, shifting_iou]
|
||||
only returned when [return_ious] is True
|
||||
"""
|
||||
approach_dist = max(approach_dist, self.finger_width)
|
||||
T = preds[:, 13:16].reshape(-1, 3) # (N, 3)
|
||||
R = preds[:, 4:13].reshape(-1, 3, 3) # (N, 3, 3)
|
||||
heights = preds[:, 2][:,np.newaxis] # (N,) -> (N, 1)
|
||||
depths = preds[:, 3][:,np.newaxis] # (N,) -> (N, 1)
|
||||
widths = preds[:, 1][:,np.newaxis] # (N,) -> (N, 1)
|
||||
# 使用不同的抓取点偏置
|
||||
targets = self.scene_points[np.newaxis,:,:] - T[:,np.newaxis,:] # (1, K, 3) - (N, 1, 3) = (N, K, 3)
|
||||
|
||||
targets = np.matmul(targets, R) # (N, K, 3) * (N, 3, 3) = (N, K, 3)
|
||||
|
||||
## collision detection
|
||||
# height mask
|
||||
mask1 = ((targets[:,:,2] > -heights/2) & (targets[:,:,2] < heights/2)) # (N, K) * (N, 3) = (N, K)
|
||||
# left finger mask
|
||||
mask2 = ((targets[:,:,0] > depths - self.finger_length) & (targets[:,:,0] < depths))
|
||||
mask3 = (targets[:,:,1] > -(widths/2 + self.finger_width))
|
||||
mask4 = (targets[:,:,1] < -widths/2)
|
||||
# right finger mask
|
||||
mask5 = (targets[:,:,1] < (widths/2 + self.finger_width))
|
||||
mask6 = (targets[:,:,1] > widths/2)
|
||||
# bottom mask
|
||||
mask7 = ((targets[:,:,0] <= depths - self.finger_length)\
|
||||
& (targets[:,:,0] > depths - self.finger_length - self.finger_width))
|
||||
# shifting mask
|
||||
mask8 = ((targets[:,:,0] <= depths - self.finger_length - self.finger_width)\
|
||||
& (targets[:,:,0] > depths - self.finger_length - self.finger_width - approach_dist))
|
||||
|
||||
# get collision mask of each point
|
||||
left_mask = (mask1 & mask2 & mask3 & mask4)
|
||||
right_mask = (mask1 & mask2 & mask5 & mask6)
|
||||
bottom_mask = (mask1 & mask3 & mask5 & mask7)
|
||||
shifting_mask = (mask1 & mask3 & mask5 & mask8)
|
||||
global_mask = (left_mask | right_mask | bottom_mask | shifting_mask)
|
||||
|
||||
# calculate equivalant volume of each part
|
||||
left_right_volume = (heights * self.finger_length * self.finger_width / (self.voxel_size**3)).reshape(-1)
|
||||
bottom_volume = (heights * (widths+2*self.finger_width) * self.finger_width / (self.voxel_size**3)).reshape(-1)
|
||||
shifting_volume = (heights * (widths+2*self.finger_width) * approach_dist / (self.voxel_size**3)).reshape(-1)
|
||||
volume = left_right_volume*2 + bottom_volume + shifting_volume
|
||||
|
||||
# get collision iou of each part
|
||||
global_iou = global_mask.sum(axis=1) / (volume+1e-6)
|
||||
|
||||
# get collison mask
|
||||
collision_mask = (global_iou > collision_thresh)
|
||||
|
||||
if not (return_empty_grasp or return_ious):
|
||||
return collision_mask
|
||||
|
||||
ret_value = [collision_mask,]
|
||||
if return_empty_grasp:
|
||||
inner_mask = (mask1 & mask2 & (~mask4) & (~mask6))
|
||||
inner_volume = (heights * self.finger_length * widths / (self.voxel_size**3)).reshape(-1)
|
||||
empty_mask = (inner_mask.sum(axis=-1)/inner_volume < empty_thresh)
|
||||
ret_value.append(empty_mask)
|
||||
if return_ious:
|
||||
left_iou = left_mask.sum(axis=1) / (left_right_volume+1e-6)
|
||||
right_iou = right_mask.sum(axis=1) / (left_right_volume+1e-6)
|
||||
bottom_iou = bottom_mask.sum(axis=1) / (bottom_volume+1e-6)
|
||||
shifting_iou = shifting_mask.sum(axis=1) / (shifting_volume+1e-6)
|
||||
ret_value.append([global_iou, left_iou, right_iou, bottom_iou, shifting_iou])
|
||||
return ret_value
|
||||
@@ -0,0 +1,156 @@
|
||||
""" Tools for data processing.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CameraInfo():
|
||||
""" Camera intrisics for point cloud creation. """
|
||||
|
||||
def __init__(self, width, height, fx, fy, cx, cy, scale):
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.fx = fx
|
||||
self.fy = fy
|
||||
self.cx = cx
|
||||
self.cy = cy
|
||||
self.scale = scale
|
||||
|
||||
|
||||
def create_point_cloud_from_depth_image(depth, camera, organized=True):
|
||||
""" Generate point cloud using depth image only.
|
||||
|
||||
Input:
|
||||
depth: [numpy.ndarray, (H,W), numpy.float32]
|
||||
depth image
|
||||
camera: [CameraInfo]
|
||||
camera intrinsics
|
||||
organized: bool
|
||||
whether to keep the cloud in image shape (H,W,3)
|
||||
|
||||
Output:
|
||||
cloud: [numpy.ndarray, (H,W,3)/(H*W,3), numpy.float32]
|
||||
generated cloud, (H,W,3) for organized=True, (H*W,3) for organized=False
|
||||
"""
|
||||
assert (depth.shape[0] == camera.height and depth.shape[1] == camera.width)
|
||||
xmap = np.arange(camera.width)
|
||||
ymap = np.arange(camera.height)
|
||||
xmap, ymap = np.meshgrid(xmap, ymap)
|
||||
points_z = depth / camera.scale
|
||||
points_x = (xmap - camera.cx) * points_z / camera.fx
|
||||
points_y = (ymap - camera.cy) * points_z / camera.fy
|
||||
cloud = np.stack([points_x, points_y, points_z], axis=-1)
|
||||
if not organized:
|
||||
cloud = cloud.reshape([-1, 3])
|
||||
return cloud
|
||||
|
||||
|
||||
def transform_point_cloud(cloud, transform, format='4x4'):
|
||||
""" Transform points to new coordinates with transformation matrix.
|
||||
|
||||
Input:
|
||||
cloud: [np.ndarray, (N,3), np.float32]
|
||||
points in original coordinates
|
||||
transform: [np.ndarray, (3,3)/(3,4)/(4,4), np.float32]
|
||||
transformation matrix, could be rotation only or rotation+translation
|
||||
format: [string, '3x3'/'3x4'/'4x4']
|
||||
the shape of transformation matrix
|
||||
'3x3' --> rotation matrix
|
||||
'3x4'/'4x4' --> rotation matrix + translation matrix
|
||||
|
||||
Output:
|
||||
cloud_transformed: [np.ndarray, (N,3), np.float32]
|
||||
points in new coordinates
|
||||
"""
|
||||
if not (format == '3x3' or format == '4x4' or format == '3x4'):
|
||||
raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
|
||||
if format == '3x3':
|
||||
cloud_transformed = np.dot(transform, cloud.T).T
|
||||
elif format == '4x4' or format == '3x4':
|
||||
ones = np.ones(cloud.shape[0])[:, np.newaxis]
|
||||
cloud_ = np.concatenate([cloud, ones], axis=1)
|
||||
cloud_transformed = np.dot(transform, cloud_.T).T
|
||||
cloud_transformed = cloud_transformed[:, :3]
|
||||
return cloud_transformed
|
||||
|
||||
|
||||
def compute_point_dists(A, B):
|
||||
""" Compute pair-wise point distances in two matrices.
|
||||
|
||||
Input:
|
||||
A: [np.ndarray, (N,3), np.float32]
|
||||
point cloud A
|
||||
B: [np.ndarray, (M,3), np.float32]
|
||||
point cloud B
|
||||
|
||||
Output:
|
||||
dists: [np.ndarray, (N,M), np.float32]
|
||||
distance matrix
|
||||
"""
|
||||
A = A[:, np.newaxis, :]
|
||||
B = B[np.newaxis, :, :]
|
||||
dists = np.linalg.norm(A - B, axis=-1)
|
||||
return dists
|
||||
|
||||
|
||||
def remove_invisible_grasp_points(cloud, grasp_points, pose, th=0.01):
|
||||
""" Remove invisible part of object model according to scene point cloud.
|
||||
|
||||
Input:
|
||||
cloud: [np.ndarray, (N,3), np.float32]
|
||||
scene point cloud
|
||||
grasp_points: [np.ndarray, (M,3), np.float32]
|
||||
grasp point label in object coordinates
|
||||
pose: [np.ndarray, (4,4), np.float32]
|
||||
transformation matrix from object coordinates to world coordinates
|
||||
th: [float]
|
||||
if the minimum distance between a grasp point and the scene points is greater than outlier, the point will be removed
|
||||
|
||||
Output:
|
||||
visible_mask: [np.ndarray, (M,), np.bool]
|
||||
mask to show the visible part of grasp points
|
||||
"""
|
||||
grasp_points_trans = transform_point_cloud(grasp_points, pose)
|
||||
dists = compute_point_dists(grasp_points_trans, cloud)
|
||||
min_dists = dists.min(axis=1)
|
||||
visible_mask = (min_dists < th)
|
||||
return visible_mask
|
||||
|
||||
|
||||
def get_workspace_mask(cloud, seg, trans=None, organized=True, outlier=0):
|
||||
""" Keep points in workspace as input.
|
||||
|
||||
Input:
|
||||
cloud: [np.ndarray, (H,W,3), np.float32]
|
||||
scene point cloud
|
||||
seg: [np.ndarray, (H,W,), np.uint8]
|
||||
segmantation label of scene points
|
||||
trans: [np.ndarray, (4,4), np.float32]
|
||||
transformation matrix for scene points, default: None.
|
||||
organized: [bool]
|
||||
whether to keep the cloud in image shape (H,W,3)
|
||||
outlier: [float]
|
||||
if the distance between a point and workspace is greater than outlier, the point will be removed
|
||||
|
||||
Output:
|
||||
workspace_mask: [np.ndarray, (H,W)/(H*W,), np.bool]
|
||||
mask to indicate whether scene points are in workspace
|
||||
"""
|
||||
if organized:
|
||||
h, w, _ = cloud.shape
|
||||
cloud = cloud.reshape([h * w, 3])
|
||||
seg = seg.reshape(h * w)
|
||||
if trans is not None:
|
||||
cloud = transform_point_cloud(cloud, trans)
|
||||
foreground = cloud[seg > 0]
|
||||
xmin, ymin, zmin = foreground.min(axis=0)
|
||||
xmax, ymax, zmax = foreground.max(axis=0)
|
||||
mask_x = ((cloud[:, 0] > xmin - outlier) & (cloud[:, 0] < xmax + outlier))
|
||||
mask_y = ((cloud[:, 1] > ymin - outlier) & (cloud[:, 1] < ymax + outlier))
|
||||
mask_z = ((cloud[:, 2] > zmin - outlier) & (cloud[:, 2] < zmax + outlier))
|
||||
workspace_mask = (mask_x & mask_y & mask_z)
|
||||
if organized:
|
||||
workspace_mask = workspace_mask.reshape([h, w])
|
||||
|
||||
return workspace_mask
|
||||
@@ -0,0 +1,143 @@
|
||||
""" Dynamically generate grasp labels during training.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import torch
|
||||
|
||||
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
# ROOT_DIR = os.path.dirname(BASE_DIR)
|
||||
# sys.path.append(ROOT_DIR)
|
||||
# sys.path.append(os.path.join(ROOT_DIR, 'knn'))
|
||||
|
||||
from ...knn.knn_modules import knn
|
||||
from ..utils.loss_utils import GRASP_MAX_WIDTH, batch_viewpoint_params_to_matrix, \
|
||||
transform_point_cloud, generate_grasp_views
|
||||
|
||||
|
||||
def process_grasp_labels(end_points):
|
||||
""" Process labels according to scene points and object poses. """
|
||||
seed_xyzs = end_points['xyz_graspable'] # (B, M_point, 3)
|
||||
batch_size, num_samples, _ = seed_xyzs.size()
|
||||
|
||||
batch_grasp_points = []
|
||||
batch_grasp_views_rot = []
|
||||
batch_grasp_scores = []
|
||||
batch_grasp_widths = []
|
||||
for i in range(batch_size):
|
||||
seed_xyz = seed_xyzs[i] # (Ns, 3)
|
||||
poses = end_points['object_poses_list'][i] # [(3, 4),]
|
||||
|
||||
# get merged grasp points for label computation
|
||||
grasp_points_merged = []
|
||||
grasp_views_rot_merged = []
|
||||
grasp_scores_merged = []
|
||||
grasp_widths_merged = []
|
||||
for obj_idx, pose in enumerate(poses):
|
||||
grasp_points = end_points['grasp_points_list'][i][obj_idx] # (Np, 3)
|
||||
grasp_scores = end_points['grasp_scores_list'][i][obj_idx] # (Np, V, A, D)
|
||||
grasp_widths = end_points['grasp_widths_list'][i][obj_idx] # (Np, V, A, D)
|
||||
_, V, A, D = grasp_scores.size()
|
||||
num_grasp_points = grasp_points.size(0)
|
||||
# generate and transform template grasp views
|
||||
grasp_views = generate_grasp_views(V).to(pose.device) # (V, 3)
|
||||
grasp_points_trans = transform_point_cloud(grasp_points, pose, '3x4')
|
||||
grasp_views_trans = transform_point_cloud(grasp_views, pose[:3, :3], '3x3')
|
||||
# generate and transform template grasp view rotation
|
||||
angles = torch.zeros(grasp_views.size(0), dtype=grasp_views.dtype, device=grasp_views.device)
|
||||
grasp_views_rot = batch_viewpoint_params_to_matrix(-grasp_views, angles) # (V, 3, 3)
|
||||
grasp_views_rot_trans = torch.matmul(pose[:3, :3], grasp_views_rot) # (V, 3, 3)
|
||||
|
||||
# assign views
|
||||
grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0)
|
||||
grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0)
|
||||
view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1
|
||||
grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds) # (V, 3, 3)
|
||||
grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1,
|
||||
-1) # (Np, V, 3, 3)
|
||||
grasp_scores = torch.index_select(grasp_scores, 1, view_inds) # (Np, V, A, D)
|
||||
grasp_widths = torch.index_select(grasp_widths, 1, view_inds) # (Np, V, A, D)
|
||||
# add to list
|
||||
grasp_points_merged.append(grasp_points_trans)
|
||||
grasp_views_rot_merged.append(grasp_views_rot_trans)
|
||||
grasp_scores_merged.append(grasp_scores)
|
||||
grasp_widths_merged.append(grasp_widths)
|
||||
|
||||
grasp_points_merged = torch.cat(grasp_points_merged, dim=0) # (Np', 3)
|
||||
grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0) # (Np', V, 3, 3)
|
||||
grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0) # (Np', V, A, D)
|
||||
grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0) # (Np', V, A, D)
|
||||
|
||||
# compute nearest neighbors
|
||||
seed_xyz_ = seed_xyz.transpose(0, 1).contiguous().unsqueeze(0) # (1, 3, Ns)
|
||||
grasp_points_merged_ = grasp_points_merged.transpose(0, 1).contiguous().unsqueeze(0) # (1, 3, Np')
|
||||
nn_inds = knn(grasp_points_merged_, seed_xyz_, k=1).squeeze() - 1 # (Ns)
|
||||
|
||||
# assign anchor points to real points
|
||||
grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds) # (Ns, 3)
|
||||
grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds) # (Ns, V, 3, 3)
|
||||
grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds) # (Ns, V, A, D)
|
||||
grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds) # (Ns, V, A, D)
|
||||
|
||||
# add to batch
|
||||
batch_grasp_points.append(grasp_points_merged)
|
||||
batch_grasp_views_rot.append(grasp_views_rot_merged)
|
||||
batch_grasp_scores.append(grasp_scores_merged)
|
||||
batch_grasp_widths.append(grasp_widths_merged)
|
||||
|
||||
batch_grasp_points = torch.stack(batch_grasp_points, 0) # (B, Ns, 3)
|
||||
batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0) # (B, Ns, V, 3, 3)
|
||||
batch_grasp_scores = torch.stack(batch_grasp_scores, 0) # (B, Ns, V, A, D)
|
||||
batch_grasp_widths = torch.stack(batch_grasp_widths, 0) # (B, Ns, V, A, D)
|
||||
|
||||
# compute view graspness
|
||||
view_u_threshold = 0.6
|
||||
view_grasp_num = 48
|
||||
batch_grasp_view_valid_mask = (batch_grasp_scores <= view_u_threshold) & (batch_grasp_scores > 0) # (B, Ns, V, A, D)
|
||||
batch_grasp_view_valid = batch_grasp_view_valid_mask.float()
|
||||
batch_grasp_view_graspness = torch.sum(torch.sum(batch_grasp_view_valid, dim=-1), dim=-1) / view_grasp_num # (B, Ns, V)
|
||||
view_graspness_min, _ = torch.min(batch_grasp_view_graspness, dim=-1) # (B, Ns)
|
||||
view_graspness_max, _ = torch.max(batch_grasp_view_graspness, dim=-1)
|
||||
view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300) # (B, Ns, V)
|
||||
view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300) # same shape as batch_grasp_view_graspness
|
||||
batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5)
|
||||
|
||||
# process scores
|
||||
label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH) # (B, Ns, V, A, D)
|
||||
batch_grasp_scores[~label_mask] = 0
|
||||
|
||||
end_points['batch_grasp_point'] = batch_grasp_points
|
||||
end_points['batch_grasp_view_rot'] = batch_grasp_views_rot
|
||||
end_points['batch_grasp_score'] = batch_grasp_scores
|
||||
end_points['batch_grasp_width'] = batch_grasp_widths
|
||||
end_points['batch_grasp_view_graspness'] = batch_grasp_view_graspness
|
||||
|
||||
return end_points
|
||||
|
||||
|
||||
def match_grasp_view_and_label(end_points):
|
||||
""" Slice grasp labels according to predicted views. """
|
||||
top_view_inds = end_points['grasp_top_view_inds'] # (B, Ns)
|
||||
template_views_rot = end_points['batch_grasp_view_rot'] # (B, Ns, V, 3, 3)
|
||||
grasp_scores = end_points['batch_grasp_score'] # (B, Ns, V, A, D)
|
||||
grasp_widths = end_points['batch_grasp_width'] # (B, Ns, V, A, D, 3)
|
||||
|
||||
B, Ns, V, A, D = grasp_scores.size()
|
||||
top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, 3, 3)
|
||||
top_template_views_rot = torch.gather(template_views_rot, 2, top_view_inds_).squeeze(2)
|
||||
top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, A, D)
|
||||
top_view_grasp_scores = torch.gather(grasp_scores, 2, top_view_inds_).squeeze(2)
|
||||
top_view_grasp_widths = torch.gather(grasp_widths, 2, top_view_inds_).squeeze(2)
|
||||
|
||||
u_max = top_view_grasp_scores.max()
|
||||
po_mask = top_view_grasp_scores > 0
|
||||
po_mask_num = torch.sum(po_mask)
|
||||
if po_mask_num > 0:
|
||||
u_min = top_view_grasp_scores[po_mask].min()
|
||||
top_view_grasp_scores[po_mask] = torch.log(u_max / top_view_grasp_scores[po_mask]) / (torch.log(u_max / u_min) + 1e-6)
|
||||
|
||||
end_points['batch_grasp_score'] = top_view_grasp_scores # (B, Ns, A, D)
|
||||
end_points['batch_grasp_width'] = top_view_grasp_widths # (B, Ns, A, D)
|
||||
|
||||
return top_template_views_rot, end_points
|
||||
@@ -0,0 +1,121 @@
|
||||
""" Tools for loss computation.
|
||||
Author: chenxi-wang
|
||||
"""
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
GRASP_MAX_WIDTH = 0.1
|
||||
GRASPNESS_THRESHOLD = 0.1
|
||||
NUM_VIEW = 300
|
||||
NUM_ANGLE = 12
|
||||
NUM_DEPTH = 4
|
||||
M_POINT = 1024
|
||||
|
||||
|
||||
def transform_point_cloud(cloud, transform, format='4x4'):
|
||||
""" Transform points to new coordinates with transformation matrix.
|
||||
|
||||
Input:
|
||||
cloud: [torch.FloatTensor, (N,3)]
|
||||
points in original coordinates
|
||||
transform: [torch.FloatTensor, (3,3)/(3,4)/(4,4)]
|
||||
transformation matrix, could be rotation only or rotation+translation
|
||||
format: [string, '3x3'/'3x4'/'4x4']
|
||||
the shape of transformation matrix
|
||||
'3x3' --> rotation matrix
|
||||
'3x4'/'4x4' --> rotation matrix + translation matrix
|
||||
|
||||
Output:
|
||||
cloud_transformed: [torch.FloatTensor, (N,3)]
|
||||
points in new coordinates
|
||||
"""
|
||||
if not (format == '3x3' or format == '4x4' or format == '3x4'):
|
||||
raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
|
||||
if format == '3x3':
|
||||
cloud_transformed = torch.matmul(transform, cloud.T).T
|
||||
elif format == '4x4' or format == '3x4':
|
||||
ones = cloud.new_ones(cloud.size(0), device=cloud.device).unsqueeze(-1)
|
||||
cloud_ = torch.cat([cloud, ones], dim=1)
|
||||
cloud_transformed = torch.matmul(transform, cloud_.T).T
|
||||
cloud_transformed = cloud_transformed[:, :3]
|
||||
return cloud_transformed
|
||||
|
||||
|
||||
def generate_grasp_views(N=300, phi=(np.sqrt(5) - 1) / 2, center=np.zeros(3), r=1):
|
||||
""" View sampling on a unit sphere using Fibonacci lattices.
|
||||
Ref: https://arxiv.org/abs/0912.4540
|
||||
|
||||
Input:
|
||||
N: [int]
|
||||
number of sampled views
|
||||
phi: [float]
|
||||
constant for view coordinate calculation, different phi's bring different distributions, default: (sqrt(5)-1)/2
|
||||
center: [np.ndarray, (3,), np.float32]
|
||||
sphere center
|
||||
r: [float]
|
||||
sphere radius
|
||||
|
||||
Output:
|
||||
views: [torch.FloatTensor, (N,3)]
|
||||
sampled view coordinates
|
||||
"""
|
||||
views = []
|
||||
for i in range(N):
|
||||
zi = (2 * i + 1) / N - 1
|
||||
xi = np.sqrt(1 - zi ** 2) * np.cos(2 * i * np.pi * phi)
|
||||
yi = np.sqrt(1 - zi ** 2) * np.sin(2 * i * np.pi * phi)
|
||||
views.append([xi, yi, zi])
|
||||
views = r * np.array(views) + center
|
||||
return torch.from_numpy(views.astype(np.float32))
|
||||
|
||||
|
||||
def batch_viewpoint_params_to_matrix(batch_towards, batch_angle):
|
||||
""" Transform approach vectors and in-plane rotation angles to rotation matrices.
|
||||
|
||||
Input:
|
||||
batch_towards: [torch.FloatTensor, (N,3)]
|
||||
approach vectors in batch
|
||||
batch_angle: [torch.floatTensor, (N,)]
|
||||
in-plane rotation angles in batch
|
||||
|
||||
Output:
|
||||
batch_matrix: [torch.floatTensor, (N,3,3)]
|
||||
rotation matrices in batch
|
||||
"""
|
||||
axis_x = batch_towards
|
||||
ones = torch.ones(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
|
||||
zeros = torch.zeros(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
|
||||
axis_y = torch.stack([-axis_x[:, 1], axis_x[:, 0], zeros], dim=-1)
|
||||
mask_y = (torch.norm(axis_y, dim=-1) == 0)
|
||||
axis_y[mask_y, 1] = 1
|
||||
axis_x = axis_x / torch.norm(axis_x, dim=-1, keepdim=True)
|
||||
axis_y = axis_y / torch.norm(axis_y, dim=-1, keepdim=True)
|
||||
axis_z = torch.cross(axis_x, axis_y)
|
||||
sin = torch.sin(batch_angle)
|
||||
cos = torch.cos(batch_angle)
|
||||
R1 = torch.stack([ones, zeros, zeros, zeros, cos, -sin, zeros, sin, cos], dim=-1)
|
||||
R1 = R1.reshape([-1, 3, 3])
|
||||
R2 = torch.stack([axis_x, axis_y, axis_z], dim=-1)
|
||||
batch_matrix = torch.matmul(R2, R1)
|
||||
return batch_matrix
|
||||
|
||||
|
||||
def huber_loss(error, delta=1.0):
|
||||
"""
|
||||
Args:
|
||||
error: Torch tensor (d1,d2,...,dk)
|
||||
Returns:
|
||||
loss: Torch tensor (d1,d2,...,dk)
|
||||
|
||||
x = error = pred - gt or dist(pred,gt)
|
||||
0.5 * |x|^2 if |x|<=d
|
||||
0.5 * d^2 + d * (|x|-d) if |x|>d
|
||||
Author: Charles R. Qi
|
||||
Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
|
||||
"""
|
||||
abs_error = torch.abs(error)
|
||||
quadratic = torch.clamp(abs_error, max=delta)
|
||||
linear = (abs_error - quadratic)
|
||||
loss = 0.5 * quadratic ** 2 + delta * linear
|
||||
return loss
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,20 @@
|
||||
# import unittest
|
||||
# import gc
|
||||
# import operator as op
|
||||
# import functools
|
||||
import torch
|
||||
# from torch.autograd import Variable, Function
|
||||
# from knn_pytorch import knn_pytorch
|
||||
# import knn_pytorch
|
||||
from . import knn_pytorch
|
||||
|
||||
|
||||
def knn(ref, query, k=1):
|
||||
""" Compute k nearest neighbors for each query point.
|
||||
"""
|
||||
device = ref.device
|
||||
ref = ref.float().to(device)
|
||||
query = query.float().to(device)
|
||||
inds = torch.empty(query.shape[0], k, query.shape[2]).long().to(device)
|
||||
knn_pytorch.knn(ref, query, inds)
|
||||
return inds
|
||||
@@ -0,0 +1 @@
|
||||
from .knn_pytorch import *
|
||||
66
vision_detect/vision_detect/VisionDetect/net/knn/setup.py
Normal file
66
vision_detect/vision_detect/VisionDetect/net/knn/setup.py
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import glob
|
||||
import os
|
||||
|
||||
import torch
|
||||
from setuptools import find_packages
|
||||
from setuptools import setup
|
||||
from torch.utils.cpp_extension import CUDA_HOME
|
||||
from torch.utils.cpp_extension import CppExtension
|
||||
from torch.utils.cpp_extension import CUDAExtension
|
||||
|
||||
requirements = ["torch", "torchvision"]
|
||||
|
||||
|
||||
def get_extensions():
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
extensions_dir = os.path.join(this_dir, "src")
|
||||
|
||||
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
|
||||
source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
|
||||
source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
|
||||
|
||||
sources = main_file + source_cpu
|
||||
extension = CppExtension
|
||||
|
||||
extra_compile_args = {"cxx": []}
|
||||
define_macros = []
|
||||
|
||||
if torch.cuda.is_available() and CUDA_HOME is not None:
|
||||
extension = CUDAExtension
|
||||
sources += source_cuda
|
||||
define_macros += [("WITH_CUDA", None)]
|
||||
extra_compile_args["nvcc"] = [
|
||||
"-DCUDA_HAS_FP16=1",
|
||||
"-D__CUDA_NO_HALF_OPERATORS__",
|
||||
"-D__CUDA_NO_HALF_CONVERSIONS__",
|
||||
"-D__CUDA_NO_HALF2_OPERATORS__",
|
||||
]
|
||||
|
||||
sources = [os.path.join(extensions_dir, s) for s in sources]
|
||||
|
||||
include_dirs = [extensions_dir]
|
||||
|
||||
ext_modules = [
|
||||
extension(
|
||||
"knn_pytorch.knn_pytorch",
|
||||
sources,
|
||||
include_dirs=include_dirs,
|
||||
define_macros=define_macros,
|
||||
extra_compile_args=extra_compile_args,
|
||||
)
|
||||
]
|
||||
|
||||
return ext_modules
|
||||
|
||||
|
||||
setup(
|
||||
name="knn_pytorch",
|
||||
version="0.1",
|
||||
author="foolyc",
|
||||
url="https://github.com/foolyc/torchKNN",
|
||||
description="KNN implement in Pytorch 1.0 including both cpu version and gpu version",
|
||||
ext_modules=get_extensions(),
|
||||
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
|
||||
)
|
||||
@@ -0,0 +1,56 @@
|
||||
#include "cpu/vision.h"
|
||||
|
||||
|
||||
void knn_cpu(float* ref_dev, int ref_width, float* query_dev, int query_width,
|
||||
int height, int k, float* dist_dev, long* ind_dev, long* ind_buf)
|
||||
{
|
||||
// Compute all the distances
|
||||
for(int query_idx = 0;query_idx<query_width;query_idx++)
|
||||
{
|
||||
for(int ref_idx = 0;ref_idx < ref_width;ref_idx++)
|
||||
{
|
||||
dist_dev[query_idx * ref_width + ref_idx] = 0;
|
||||
for(int hi=0;hi<height;hi++)
|
||||
dist_dev[query_idx * ref_width + ref_idx] += (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]) * (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]);
|
||||
}
|
||||
}
|
||||
|
||||
float temp_value;
|
||||
long temp_idx;
|
||||
// sort the distance and get the index
|
||||
for(int query_idx = 0;query_idx<query_width;query_idx++)
|
||||
{
|
||||
for(int i = 0;i < ref_width;i++)
|
||||
{
|
||||
ind_buf[i] = i+1;
|
||||
}
|
||||
for(int i = 0;i < ref_width;i++)
|
||||
for(int j = 0;j < ref_width - i - 1;j++)
|
||||
{
|
||||
if(dist_dev[query_idx * ref_width + j] > dist_dev[query_idx * ref_width + j + 1])
|
||||
{
|
||||
temp_value = dist_dev[query_idx * ref_width + j];
|
||||
dist_dev[query_idx * ref_width + j] = dist_dev[query_idx * ref_width + j + 1];
|
||||
dist_dev[query_idx * ref_width + j + 1] = temp_value;
|
||||
temp_idx = ind_buf[j];
|
||||
ind_buf[j] = ind_buf[j + 1];
|
||||
ind_buf[j + 1] = temp_idx;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(int i = 0;i < k;i++)
|
||||
ind_dev[query_idx + i * query_width] = ind_buf[i];
|
||||
#if DEBUG
|
||||
for(int i = 0;i < ref_width;i++)
|
||||
printf("%d, ", ind_buf[i]);
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
|
||||
void knn_cpu(float* ref_dev, int ref_width,
|
||||
float* query_dev, int query_width,
|
||||
int height, int k, float* dist_dev, long* ind_dev, long* ind_buf);
|
||||
269
vision_detect/vision_detect/VisionDetect/net/knn/src/cuda/knn.cu
Normal file
269
vision_detect/vision_detect/VisionDetect/net/knn/src/cuda/knn.cu
Normal file
@@ -0,0 +1,269 @@
|
||||
/** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA
|
||||
* The modifications are
|
||||
* removed texture memory usage
|
||||
* removed split query KNN computation
|
||||
* added feature extraction with bilinear interpolation
|
||||
*
|
||||
* Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016
|
||||
*/
|
||||
|
||||
// Includes
|
||||
#include <cstdio>
|
||||
#include "cuda.h"
|
||||
|
||||
#define IDX2D(i, j, dj) (dj * i + j)
|
||||
#define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))
|
||||
|
||||
#define BLOCK 512
|
||||
#define MAX_STREAMS 512
|
||||
|
||||
// Constants used by the program
|
||||
#define BLOCK_DIM 16
|
||||
#define DEBUG 0
|
||||
|
||||
|
||||
/**
|
||||
* Computes the distance between two matrix A (reference points) and
|
||||
* B (query points) containing respectively wA and wB points.
|
||||
*
|
||||
* @param A pointer on the matrix A
|
||||
* @param wA width of the matrix A = number of points in A
|
||||
* @param B pointer on the matrix B
|
||||
* @param wB width of the matrix B = number of points in B
|
||||
* @param dim dimension of points = height of matrices A and B
|
||||
* @param AB pointer on the matrix containing the wA*wB distances computed
|
||||
*/
|
||||
__global__ void cuComputeDistanceGlobal( float* A, int wA,
|
||||
float* B, int wB, int dim, float* AB){
|
||||
|
||||
// Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B
|
||||
__shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];
|
||||
__shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];
|
||||
|
||||
|
||||
// Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)
|
||||
__shared__ int begin_A;
|
||||
__shared__ int begin_B;
|
||||
__shared__ int step_A;
|
||||
__shared__ int step_B;
|
||||
__shared__ int end_A;
|
||||
|
||||
// Thread index
|
||||
int tx = threadIdx.x;
|
||||
int ty = threadIdx.y;
|
||||
|
||||
// Other variables
|
||||
float tmp;
|
||||
float ssd = 0;
|
||||
|
||||
// Loop parameters
|
||||
begin_A = BLOCK_DIM * blockIdx.y;
|
||||
begin_B = BLOCK_DIM * blockIdx.x;
|
||||
step_A = BLOCK_DIM * wA;
|
||||
step_B = BLOCK_DIM * wB;
|
||||
end_A = begin_A + (dim-1) * wA;
|
||||
|
||||
// Conditions
|
||||
int cond0 = (begin_A + tx < wA); // used to write in shared memory
|
||||
int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix
|
||||
int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix
|
||||
|
||||
// Loop over all the sub-matrices of A and B required to compute the block sub-matrix
|
||||
for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {
|
||||
// Load the matrices from device memory to shared memory; each thread loads one element of each matrix
|
||||
if (a/wA + ty < dim){
|
||||
shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;
|
||||
shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;
|
||||
}
|
||||
else{
|
||||
shared_A[ty][tx] = 0;
|
||||
shared_B[ty][tx] = 0;
|
||||
}
|
||||
|
||||
// Synchronize to make sure the matrices are loaded
|
||||
__syncthreads();
|
||||
|
||||
// Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix
|
||||
if (cond2 && cond1){
|
||||
for (int k = 0; k < BLOCK_DIM; ++k){
|
||||
tmp = shared_A[k][ty] - shared_B[k][tx];
|
||||
ssd += tmp*tmp;
|
||||
}
|
||||
}
|
||||
|
||||
// Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// Write the block sub-matrix to device memory; each thread writes one element
|
||||
if (cond2 && cond1)
|
||||
AB[(begin_A + ty) * wB + begin_B + tx] = ssd;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gathers k-th smallest distances for each column of the distance matrix in the top.
|
||||
*
|
||||
* @param dist distance matrix
|
||||
* @param ind index matrix
|
||||
* @param width width of the distance matrix and of the index matrix
|
||||
* @param height height of the distance matrix and of the index matrix
|
||||
* @param k number of neighbors to consider
|
||||
*/
|
||||
__global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){
|
||||
|
||||
// Variables
|
||||
int l, i, j;
|
||||
float *p_dist;
|
||||
long *p_ind;
|
||||
float curr_dist, max_dist;
|
||||
long curr_row, max_row;
|
||||
unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (xIndex<width){
|
||||
// Pointer shift, initialization, and max value
|
||||
p_dist = dist + xIndex;
|
||||
p_ind = ind + xIndex;
|
||||
max_dist = p_dist[0];
|
||||
p_ind[0] = 1;
|
||||
|
||||
// Part 1 : sort kth firt elementZ
|
||||
for (l=1; l<k; l++){
|
||||
curr_row = l * width;
|
||||
curr_dist = p_dist[curr_row];
|
||||
if (curr_dist<max_dist){
|
||||
i=l-1;
|
||||
for (int a=0; a<l-1; a++){
|
||||
if (p_dist[a*width]>curr_dist){
|
||||
i=a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (j=l; j>i; j--){
|
||||
p_dist[j*width] = p_dist[(j-1)*width];
|
||||
p_ind[j*width] = p_ind[(j-1)*width];
|
||||
}
|
||||
p_dist[i*width] = curr_dist;
|
||||
p_ind[i*width] = l+1;
|
||||
} else {
|
||||
p_ind[l*width] = l+1;
|
||||
}
|
||||
max_dist = p_dist[curr_row];
|
||||
}
|
||||
|
||||
// Part 2 : insert element in the k-th first lines
|
||||
max_row = (k-1)*width;
|
||||
for (l=k; l<height; l++){
|
||||
curr_dist = p_dist[l*width];
|
||||
if (curr_dist<max_dist){
|
||||
i=k-1;
|
||||
for (int a=0; a<k-1; a++){
|
||||
if (p_dist[a*width]>curr_dist){
|
||||
i=a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (j=k-1; j>i; j--){
|
||||
p_dist[j*width] = p_dist[(j-1)*width];
|
||||
p_ind[j*width] = p_ind[(j-1)*width];
|
||||
}
|
||||
p_dist[i*width] = curr_dist;
|
||||
p_ind[i*width] = l+1;
|
||||
max_dist = p_dist[max_row];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the square root of the first line (width-th first element)
|
||||
* of the distance matrix.
|
||||
*
|
||||
* @param dist distance matrix
|
||||
* @param width width of the distance matrix
|
||||
* @param k number of neighbors to consider
|
||||
*/
|
||||
__global__ void cuParallelSqrt(float *dist, int width, int k){
|
||||
unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (xIndex<width && yIndex<k)
|
||||
dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------------------------//
|
||||
// K-th NEAREST NEIGHBORS //
|
||||
//-----------------------------------------------------------------------------------------------//
|
||||
|
||||
/**
|
||||
* K nearest neighbor algorithm
|
||||
* - Initialize CUDA
|
||||
* - Allocate device memory
|
||||
* - Copy point sets (reference and query points) from host to device memory
|
||||
* - Compute the distances + indexes to the k nearest neighbors for each query point
|
||||
* - Copy distances from device to host memory
|
||||
*
|
||||
* @param ref_host reference points ; pointer to linear matrix
|
||||
* @param ref_nb number of reference points ; width of the matrix
|
||||
* @param query_host query points ; pointer to linear matrix
|
||||
* @param query_nb number of query points ; width of the matrix
|
||||
* @param dim dimension of points ; height of the matrices
|
||||
* @param k number of neighbor to consider
|
||||
* @param dist_host distances to k nearest neighbors ; pointer to linear matrix
|
||||
* @param dist_host indexes of the k nearest neighbors ; pointer to linear matrix
|
||||
*
|
||||
*/
|
||||
void knn_device(float* ref_dev, int ref_nb, float* query_dev, int query_nb,
|
||||
int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){
|
||||
|
||||
// Grids and threads
|
||||
dim3 g_16x16(query_nb/16, ref_nb/16, 1);
|
||||
dim3 t_16x16(16, 16, 1);
|
||||
if (query_nb%16 != 0) g_16x16.x += 1;
|
||||
if (ref_nb %16 != 0) g_16x16.y += 1;
|
||||
//
|
||||
dim3 g_256x1(query_nb/256, 1, 1);
|
||||
dim3 t_256x1(256, 1, 1);
|
||||
if (query_nb%256 != 0) g_256x1.x += 1;
|
||||
|
||||
dim3 g_k_16x16(query_nb/16, k/16, 1);
|
||||
dim3 t_k_16x16(16, 16, 1);
|
||||
if (query_nb%16 != 0) g_k_16x16.x += 1;
|
||||
if (k %16 != 0) g_k_16x16.y += 1;
|
||||
|
||||
// Kernel 1: Compute all the distances
|
||||
cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb, query_dev, query_nb, dim, dist_dev);
|
||||
|
||||
// Kernel 2: Sort each column
|
||||
cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev, query_nb, ref_nb, k);
|
||||
|
||||
// Kernel 3: Compute square root of k first elements
|
||||
// cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);
|
||||
|
||||
#if DEBUG
|
||||
unsigned int size_of_float = sizeof(float);
|
||||
unsigned long size_of_long = sizeof(long);
|
||||
|
||||
float* dist_host = new float[query_nb * k];
|
||||
long* idx_host = new long[query_nb * k];
|
||||
|
||||
// Memory copy of output from device to host
|
||||
cudaMemcpy(&dist_host[0], dist_dev,
|
||||
query_nb * k *size_of_float, cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaMemcpy(&idx_host[0], ind_dev,
|
||||
query_nb * k * size_of_long, cudaMemcpyDeviceToHost);
|
||||
|
||||
int i = 0;
|
||||
for(i = 0; i < 100; i++){
|
||||
printf("IDX[%d]: %d\n", i, (int)idx_host[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
|
||||
void knn_device(float* ref_dev, int ref_width,
|
||||
float* query_dev, int query_width,
|
||||
int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);
|
||||
81
vision_detect/vision_detect/VisionDetect/net/knn/src/knn.h
Normal file
81
vision_detect/vision_detect/VisionDetect/net/knn/src/knn.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
#include "cpu/vision.h"
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
#include "cuda/vision.h"
|
||||
//#include <THC/THC.h>
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
//extern THCState *state;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
int knn(at::Tensor& ref, at::Tensor& query, at::Tensor& idx)
|
||||
{
|
||||
|
||||
// NOTE: check dimensions
|
||||
long batch, ref_nb, query_nb, dim, k;
|
||||
batch = ref.size(0);
|
||||
dim = ref.size(1);
|
||||
k = idx.size(1);
|
||||
ref_nb = ref.size(2);
|
||||
query_nb = query.size(2);
|
||||
|
||||
float *ref_dev = ref.data<float>();
|
||||
float *query_dev = query.data<float>();
|
||||
long *idx_dev = idx.data<long>();
|
||||
|
||||
|
||||
|
||||
|
||||
if (ref.type().is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
// NOTE: raise error if not compiled with CUDA
|
||||
// float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));
|
||||
float *dist_dev = nullptr;
|
||||
cudaError_t err = cudaMalloc((void**)&dist_dev, ref_nb * query_nb * sizeof(float));
|
||||
if (err != cudaSuccess) {
|
||||
fprintf(stderr, "cudaMalloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cudaFree(dist_dev);
|
||||
|
||||
for (int b = 0; b < batch; b++)
|
||||
{
|
||||
// knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
|
||||
// dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));
|
||||
knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
|
||||
dist_dev, idx_dev + b * k * query_nb, c10::cuda::getCurrentCUDAStream());
|
||||
}
|
||||
// THCudaFree(state, dist_dev);
|
||||
cudaFree(dist_dev);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
printf("error in knn: %s\n", cudaGetErrorString(err));
|
||||
// THError("aborting");
|
||||
fprintf(stderr, "aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
return 1;
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
float *dist_dev = (float*)malloc(ref_nb * query_nb * sizeof(float));
|
||||
long *ind_buf = (long*)malloc(ref_nb * sizeof(long));
|
||||
for (int b = 0; b < batch; b++) {
|
||||
knn_cpu(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
|
||||
dist_dev, idx_dev + b * k * query_nb, ind_buf);
|
||||
}
|
||||
|
||||
free(dist_dev);
|
||||
free(ind_buf);
|
||||
|
||||
return 1;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
#include "knn.h"
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("knn", &knn, "k-nearest neighbors");
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,11 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
|
||||
|
||||
at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
|
||||
const int nsample);
|
||||
@@ -0,0 +1,46 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#ifndef _CUDA_UTILS_H
|
||||
#define _CUDA_UTILS_H
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <cmath>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#define TOTAL_THREADS 512
|
||||
|
||||
inline int opt_n_threads(int work_size) {
|
||||
const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
|
||||
|
||||
return max(min(1 << pow_2, TOTAL_THREADS), 1);
|
||||
}
|
||||
|
||||
inline dim3 opt_block_config(int x, int y) {
|
||||
const int x_threads = opt_n_threads(x);
|
||||
const int y_threads =
|
||||
max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
|
||||
dim3 block_config(x_threads, y_threads, 1);
|
||||
|
||||
return block_config;
|
||||
}
|
||||
|
||||
#define CUDA_CHECK_ERRORS() \
|
||||
do { \
|
||||
cudaError_t err = cudaGetLastError(); \
|
||||
if (cudaSuccess != err) { \
|
||||
fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \
|
||||
cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
|
||||
__FILE__); \
|
||||
exit(-1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,7 @@
|
||||
// Author: chenxi-wang
|
||||
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
|
||||
at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
|
||||
const int nsample);
|
||||
@@ -0,0 +1,10 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
|
||||
at::Tensor group_points(at::Tensor points, at::Tensor idx);
|
||||
at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <torch/extension.h>
|
||||
#include <vector>
|
||||
|
||||
std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
|
||||
at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
|
||||
at::Tensor weight);
|
||||
at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
|
||||
at::Tensor weight, const int m);
|
||||
@@ -0,0 +1,11 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
|
||||
at::Tensor gather_points(at::Tensor points, at::Tensor idx);
|
||||
at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
|
||||
at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#pragma once
|
||||
#ifdef WITH_CUDA
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#endif
|
||||
#include <torch/extension.h>
|
||||
|
||||
#define CHECK_CUDA(x) \
|
||||
do { \
|
||||
TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor"); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_CONTIGUOUS(x) \
|
||||
do { \
|
||||
TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_IS_INT(x) \
|
||||
do { \
|
||||
TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \
|
||||
#x " must be an int tensor"); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_IS_FLOAT(x) \
|
||||
do { \
|
||||
TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \
|
||||
#x " must be a float tensor"); \
|
||||
} while (0)
|
||||
@@ -0,0 +1,113 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
#include<iostream>
|
||||
#include "ball_query.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void query_ball_point_kernel_wrapper(
|
||||
int b, int n, int m, float radius,
|
||||
int nsample,
|
||||
const float *new_xyz,
|
||||
const float *xyz,
|
||||
int *idx
|
||||
);
|
||||
#endif
|
||||
|
||||
void query_ball_point_cpu(
|
||||
int b, int n, int m, float radius,
|
||||
int nsample,
|
||||
const float *new_xyz,
|
||||
const float *xyz,
|
||||
int *idx
|
||||
);
|
||||
|
||||
#ifdef DEBUG
|
||||
void ball_query_debug(const torch::Tensor& new_xyz, const torch::Tensor& xyz,
|
||||
float radius, int nsample) {
|
||||
// 优化后的打印代码,格式更清晰,便于调试
|
||||
std::cout << "===== ball_query.cpp 调试信息 =====" << std::endl;
|
||||
// 打印new_xyz的尺寸(维度)
|
||||
std::cout << "new_xyz.size(): (";
|
||||
for (auto s : new_xyz.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
// 打印xyz的尺寸(维度)
|
||||
std::cout << "xyz.size(): (";
|
||||
for (auto s : xyz.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
// 打印半径和采样数
|
||||
std::cout << "radius: " << radius << std::endl;
|
||||
std::cout << "nsample: " << nsample << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
|
||||
const int nsample) {
|
||||
CHECK_CONTIGUOUS(new_xyz);
|
||||
CHECK_CONTIGUOUS(xyz);
|
||||
CHECK_IS_FLOAT(new_xyz);
|
||||
CHECK_IS_FLOAT(xyz);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (new_xyz.is_cuda()) {
|
||||
CHECK_CUDA(xyz);
|
||||
// 新增1:CUDA张量设备端连续性强校验(解决内存布局碎片化问题)
|
||||
TORCH_CHECK(new_xyz.is_contiguous(), "new_xyz must be contiguous on CUDA device");
|
||||
TORCH_CHECK(xyz.is_contiguous(), "xyz must be contiguous on CUDA device");
|
||||
// 新增2:CUDA设备同步,确保张量内存分配完成(解决指针无效问题)
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
ball_query_debug(new_xyz, xyz, radius, nsample);
|
||||
#endif
|
||||
|
||||
at::Tensor idx =
|
||||
torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
|
||||
at::device(new_xyz.device()).dtype(at::ScalarType::Int));
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (new_xyz.is_cuda()) {
|
||||
query_ball_point_kernel_wrapper(
|
||||
xyz.size(0),
|
||||
xyz.size(1),
|
||||
new_xyz.size(1),
|
||||
radius, nsample,
|
||||
new_xyz.contiguous().data_ptr<float>(),
|
||||
xyz.contiguous().data_ptr<float>(),
|
||||
idx.contiguous().data_ptr<int>()
|
||||
);
|
||||
|
||||
} else {
|
||||
#endif
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
query_ball_point_cpu(
|
||||
xyz.size(0),
|
||||
xyz.size(1),
|
||||
new_xyz.size(1),
|
||||
radius,
|
||||
nsample,
|
||||
new_xyz.contiguous().data_ptr<float>(),
|
||||
xyz.contiguous().data_ptr<float>(),
|
||||
idx.contiguous().data_ptr<int>()
|
||||
);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return idx;
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
|
||||
|
||||
void query_ball_point_cpu(
|
||||
int b, int n, int m, float radius,
|
||||
int nsample,
|
||||
const float* new_xyz,
|
||||
const float* xyz,
|
||||
int* idx
|
||||
) {
|
||||
float radius2 = radius * radius;
|
||||
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float* xyz_b = xyz + batch_index * n * 3;
|
||||
const float* new_xyz_b = new_xyz + batch_index * m * 3;
|
||||
int* idx_b = idx + batch_index * m * nsample;
|
||||
|
||||
for (int j = 0; j < m; ++j) {
|
||||
float nx = new_xyz_b[j * 3 + 0];
|
||||
float ny = new_xyz_b[j * 3 + 1];
|
||||
float nz = new_xyz_b[j * 3 + 2];
|
||||
|
||||
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
|
||||
float x = xyz_b[k * 3 + 0];
|
||||
float y = xyz_b[k * 3 + 1];
|
||||
float z = xyz_b[k * 3 + 2];
|
||||
|
||||
float d2 =
|
||||
(nx - x) * (nx - x) +
|
||||
(ny - y) * (ny - y) +
|
||||
(nz - z) * (nz - z);
|
||||
|
||||
if (d2 < radius2) {
|
||||
if (cnt == 0) {
|
||||
for (int l = 0; l < nsample; ++l) {
|
||||
idx_b[j * nsample + l] = k;
|
||||
}
|
||||
}
|
||||
idx_b[j * nsample + cnt] = k;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cuda_utils.h"
|
||||
|
||||
// input: new_xyz(b, m, 3) xyz(b, n, 3)
|
||||
// output: idx(b, m, nsample)
|
||||
__global__ void query_ball_point_kernel(int b, int n, int m, float radius,
|
||||
int nsample,
|
||||
const float *__restrict__ new_xyz,
|
||||
const float *__restrict__ xyz,
|
||||
int *__restrict__ idx) {
|
||||
int batch_index = blockIdx.x;
|
||||
xyz += batch_index * n * 3;
|
||||
new_xyz += batch_index * m * 3;
|
||||
idx += m * nsample * batch_index;
|
||||
|
||||
int index = threadIdx.x;
|
||||
int stride = blockDim.x;
|
||||
|
||||
float radius2 = radius * radius;
|
||||
for (int j = index; j < m; j += stride) {
|
||||
float new_x = new_xyz[j * 3 + 0];
|
||||
float new_y = new_xyz[j * 3 + 1];
|
||||
float new_z = new_xyz[j * 3 + 2];
|
||||
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
|
||||
float x = xyz[k * 3 + 0];
|
||||
float y = xyz[k * 3 + 1];
|
||||
float z = xyz[k * 3 + 2];
|
||||
float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
|
||||
(new_z - z) * (new_z - z);
|
||||
if (d2 < radius2) {
|
||||
if (cnt == 0) {
|
||||
for (int l = 0; l < nsample; ++l) {
|
||||
idx[j * nsample + l] = k;
|
||||
}
|
||||
}
|
||||
idx[j * nsample + cnt] = k;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
|
||||
int nsample, const float *new_xyz,
|
||||
const float *xyz, int *idx) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
|
||||
b, n, m, radius, nsample, new_xyz, xyz, idx);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include "ball_query.h"
|
||||
#include "group_points.h"
|
||||
#include "interpolate.h"
|
||||
#include "sampling.h"
|
||||
#include "cylinder_query.h"
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("gather_points", &gather_points);
|
||||
m.def("gather_points_grad", &gather_points_grad);
|
||||
m.def("furthest_point_sampling", &furthest_point_sampling);
|
||||
|
||||
m.def("three_nn", &three_nn);
|
||||
m.def("three_interpolate", &three_interpolate);
|
||||
m.def("three_interpolate_grad", &three_interpolate_grad);
|
||||
|
||||
m.def("ball_query", &ball_query);
|
||||
|
||||
m.def("group_points", &group_points);
|
||||
m.def("group_points_grad", &group_points_grad);
|
||||
|
||||
m.def("cylinder_query", &cylinder_query);
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
// Author: chenxi-wang
|
||||
|
||||
#include "cylinder_query.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
|
||||
int nsample, const float *new_xyz,
|
||||
const float *xyz, const float *rot, int *idx);
|
||||
#endif
|
||||
|
||||
|
||||
void query_cylinder_point_cpu(
|
||||
int b, int n, int m, float radius, float hmin, float hmax,
|
||||
int nsample, const float *new_xyz,
|
||||
const float *xyz, const float *rot, int *idx);
|
||||
|
||||
#ifdef DEBUG
|
||||
void cylinder_query_debug(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot,
|
||||
const float radius, const float hmin, const float hmax,
|
||||
const int nsample) {
|
||||
// 优化后的打印代码,格式更清晰,便于调试
|
||||
std::cout << "===== cylinder_query.cpp 调试信息 =====" << std::endl;
|
||||
// 打印new_xyz的尺寸(维度)
|
||||
std::cout << "new_xyz.size(): (";
|
||||
for (auto s : new_xyz.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
// 打印xyz的尺寸(维度)
|
||||
std::cout << "xyz.size(): (";
|
||||
for (auto s : xyz.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
// 打印rot的尺寸(维度)
|
||||
std::cout << "rot.size(): (";
|
||||
for (auto s : rot.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
// 打印半径和采样数
|
||||
std::cout << "radius: " << radius << std::endl;
|
||||
std::cout << "hmin: " << hmin << std::endl;
|
||||
std::cout << "hmax: " << hmax << std::endl;
|
||||
std::cout << "nsample: " << nsample << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
|
||||
const int nsample) {
|
||||
CHECK_CONTIGUOUS(new_xyz);
|
||||
CHECK_CONTIGUOUS(xyz);
|
||||
CHECK_CONTIGUOUS(rot);
|
||||
CHECK_IS_FLOAT(new_xyz);
|
||||
CHECK_IS_FLOAT(xyz);
|
||||
CHECK_IS_FLOAT(rot);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (new_xyz.is_cuda()) {
|
||||
CHECK_CUDA(xyz);
|
||||
CHECK_CUDA(rot);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
cylinder_query_debug(new_xyz, xyz, rot, radius, hmin, hmax, nsample);
|
||||
#endif
|
||||
|
||||
at::Tensor idx =
|
||||
torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
|
||||
at::device(new_xyz.device()).dtype(at::ScalarType::Int));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (new_xyz.is_cuda()) {
|
||||
query_cylinder_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
|
||||
radius, hmin, hmax, nsample, new_xyz.data_ptr<float>(),
|
||||
xyz.data_ptr<float>(), rot.data_ptr<float>(), idx.data_ptr<int>());
|
||||
} else {
|
||||
#endif
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
query_cylinder_point_cpu(xyz.size(0), xyz.size(1), new_xyz.size(1),
|
||||
radius, hmin, hmax, nsample, new_xyz.data_ptr<float>(),
|
||||
xyz.data_ptr<float>(), rot.data_ptr<float>(), idx.data_ptr<int>());
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return idx;
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
void query_cylinder_point_cpu(
|
||||
int b, int n, int m,
|
||||
float radius,
|
||||
float hmin, float hmax,
|
||||
int nsample,
|
||||
const float *new_xyz,
|
||||
const float *xyz,
|
||||
const float *rot,
|
||||
int *idx
|
||||
) {
|
||||
float radius2 = radius * radius;
|
||||
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *xyz_b = xyz + batch_index * n * 3;
|
||||
const float *new_xyz_b = new_xyz + batch_index * m * 3;
|
||||
const float *rot_b = rot + batch_index * m * 9;
|
||||
int *idx_b = idx + batch_index * m * nsample;
|
||||
|
||||
for (int j = 0; j < m; ++j) {
|
||||
float nx = new_xyz_b[j * 3 + 0];
|
||||
float ny = new_xyz_b[j * 3 + 1];
|
||||
float nz = new_xyz_b[j * 3 + 2];
|
||||
|
||||
float r0 = rot_b[j * 9 + 0];
|
||||
float r1 = rot_b[j * 9 + 1];
|
||||
float r2 = rot_b[j * 9 + 2];
|
||||
float r3 = rot_b[j * 9 + 3];
|
||||
float r4 = rot_b[j * 9 + 4];
|
||||
float r5 = rot_b[j * 9 + 5];
|
||||
float r6 = rot_b[j * 9 + 6];
|
||||
float r7 = rot_b[j * 9 + 7];
|
||||
float r8 = rot_b[j * 9 + 8];
|
||||
|
||||
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
|
||||
float x = xyz_b[k * 3 + 0] - nx;
|
||||
float y = xyz_b[k * 3 + 1] - ny;
|
||||
float z = xyz_b[k * 3 + 2] - nz;
|
||||
|
||||
float x_rot = r0 * x + r3 * y + r6 * z;
|
||||
float y_rot = r1 * x + r4 * y + r7 * z;
|
||||
float z_rot = r2 * x + r5 * y + r8 * z;
|
||||
|
||||
float d2 = y_rot * y_rot + z_rot * z_rot;
|
||||
if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {
|
||||
if (cnt == 0) {
|
||||
for (int l = 0; l < nsample; ++l) {
|
||||
idx_b[j * nsample + l] = k;
|
||||
}
|
||||
}
|
||||
idx_b[j * nsample + cnt] = k;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
// Author: chenxi-wang
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cuda_utils.h"
|
||||
|
||||
__global__ void query_cylinder_point_kernel(int b, int n, int m, float radius, float hmin, float hmax,
|
||||
int nsample,
|
||||
const float *__restrict__ new_xyz,
|
||||
const float *__restrict__ xyz,
|
||||
const float *__restrict__ rot,
|
||||
int *__restrict__ idx) {
|
||||
int batch_index = blockIdx.x;
|
||||
xyz += batch_index * n * 3;
|
||||
new_xyz += batch_index * m * 3;
|
||||
rot += batch_index * m * 9;
|
||||
idx += m * nsample * batch_index;
|
||||
|
||||
int index = threadIdx.x;
|
||||
int stride = blockDim.x;
|
||||
|
||||
float radius2 = radius * radius;
|
||||
for (int j = index; j < m; j += stride) {
|
||||
float new_x = new_xyz[j * 3 + 0];
|
||||
float new_y = new_xyz[j * 3 + 1];
|
||||
float new_z = new_xyz[j * 3 + 2];
|
||||
float r0 = rot[j * 9 + 0];
|
||||
float r1 = rot[j * 9 + 1];
|
||||
float r2 = rot[j * 9 + 2];
|
||||
float r3 = rot[j * 9 + 3];
|
||||
float r4 = rot[j * 9 + 4];
|
||||
float r5 = rot[j * 9 + 5];
|
||||
float r6 = rot[j * 9 + 6];
|
||||
float r7 = rot[j * 9 + 7];
|
||||
float r8 = rot[j * 9 + 8];
|
||||
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
|
||||
float x = xyz[k * 3 + 0] - new_x;
|
||||
float y = xyz[k * 3 + 1] - new_y;
|
||||
float z = xyz[k * 3 + 2] - new_z;
|
||||
float x_rot = r0 * x + r3 * y + r6 * z;
|
||||
float y_rot = r1 * x + r4 * y + r7 * z;
|
||||
float z_rot = r2 * x + r5 * y + r8 * z;
|
||||
float d2 = y_rot * y_rot + z_rot * z_rot;
|
||||
if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {
|
||||
if (cnt == 0) {
|
||||
for (int l = 0; l < nsample; ++l) {
|
||||
idx[j * nsample + l] = k;
|
||||
}
|
||||
}
|
||||
idx[j * nsample + cnt] = k;
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
|
||||
int nsample, const float *new_xyz,
|
||||
const float *xyz, const float *rot, int *idx) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
query_cylinder_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
|
||||
b, n, m, radius, hmin, hmax, nsample, new_xyz, xyz, rot, idx);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include "group_points.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
|
||||
const float *points, const int *idx,
|
||||
float *out);
|
||||
|
||||
void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
int nsample, const float *grad_out,
|
||||
const int *idx, float *grad_points);
|
||||
#endif
|
||||
|
||||
|
||||
void group_points_cpu(int b, int c, int n, int npoints, int nsample,
|
||||
const float *points, const int *idx,
|
||||
float *out);
|
||||
|
||||
void group_points_grad_cpu(int b, int c, int n, int npoints,
|
||||
int nsample, const float *grad_out,
|
||||
const int *idx, float *grad_points);
|
||||
|
||||
#ifdef DEBUG
|
||||
void group_points_debug(at::Tensor points, at::Tensor idx) {
|
||||
// 优化后的打印代码,格式更清晰,便于调试
|
||||
std::cout << "===== group_points.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "===== group points 函数调试信息 =====" << std::endl;
|
||||
// 打印new_xyz的尺寸(维度)
|
||||
std::cout << "points.size(): (";
|
||||
for (auto s : points.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
// 打印xyz的尺寸(维度)
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
|
||||
void group_points_grad_debug(at::Tensor grad_out, at::Tensor idx, const int n) {
|
||||
// 优化后的打印代码,格式更清晰,便于调试
|
||||
std::cout << "===== group_points.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "===== group_points_grad 函数调试信息 =====" << std::endl;
|
||||
// 打印new_xyz的尺寸(维度)
|
||||
std::cout << "grad_out.size(): (";
|
||||
for (auto s : grad_out.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
// 打印xyz的尺寸(维度)
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "n: " << n << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
at::Tensor group_points(at::Tensor points, at::Tensor idx) {
|
||||
CHECK_CONTIGUOUS(points);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_IS_FLOAT(points);
|
||||
CHECK_IS_INT(idx);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
group_points_debug(points, idx);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
|
||||
at::device(points.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
|
||||
idx.size(1), idx.size(2), points.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
group_points_cpu(points.size(0), points.size(1), points.size(2),
|
||||
idx.size(1), idx.size(2), points.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
|
||||
CHECK_CONTIGUOUS(grad_out);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_IS_FLOAT(grad_out);
|
||||
CHECK_IS_INT(idx);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
group_points_grad_debug(grad_out, idx, n);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({grad_out.size(0), grad_out.size(1), n},
|
||||
at::device(grad_out.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
group_points_grad_kernel_wrapper(
|
||||
grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
|
||||
grad_out.data_ptr<float>(), idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
group_points_grad_cpu(
|
||||
grad_out.size(0), grad_out.size(1), n,idx.size(1), idx.size(2),
|
||||
grad_out.data_ptr<float>(), idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
void group_points_cpu(
|
||||
int b, int c, int n,
|
||||
int npoints,
|
||||
int nsample,
|
||||
const float *points,
|
||||
const int *idx,
|
||||
float *out
|
||||
) {
|
||||
for (int batch = 0; batch < b; ++batch) {
|
||||
const float *points_b = points + batch * n * c;
|
||||
const int *idx_b = idx + batch * npoints * nsample;
|
||||
float *out_b = out + batch * npoints * nsample * c;
|
||||
|
||||
for (int i = 0; i < c * npoints; ++i) {
|
||||
const int l = i / npoints;
|
||||
const int j = i % npoints;
|
||||
|
||||
for (int k = 0; k < nsample; ++k) {
|
||||
int ii = idx_b[j * nsample + k];
|
||||
out_b[(l * npoints + j) * nsample + k] = points_b[l * n + ii];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void group_points_grad_cpu(
|
||||
int b, int c, int n,
|
||||
int npoints,
|
||||
int nsample,
|
||||
const float *grad_out,
|
||||
const int *idx,
|
||||
float *grad_points
|
||||
) {
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *grad_out_b = grad_out + batch_index * npoints * nsample * c;
|
||||
const int *idx_b = idx + batch_index * npoints * nsample;
|
||||
float *grad_points_b = grad_points + batch_index * n * c;
|
||||
|
||||
for (int i = 0; i < n * c; ++i) {
|
||||
grad_points_b[i] = 0.0f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < c * npoints; ++i) {
|
||||
const int l = i / npoints;
|
||||
const int j = i % npoints;
|
||||
|
||||
for (int k = 0; k < nsample; ++k) {
|
||||
int ii = idx_b[j * nsample + k];
|
||||
grad_points_b[l * n + ii] += grad_out_b[(l * npoints + j) * nsample + k];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cuda_utils.h"
|
||||
|
||||
// input: points(b, c, n) idx(b, npoints, nsample)
|
||||
// output: out(b, c, npoints, nsample)
|
||||
__global__ void group_points_kernel(int b, int c, int n, int npoints,
|
||||
int nsample,
|
||||
const float *__restrict__ points,
|
||||
const int *__restrict__ idx,
|
||||
float *__restrict__ out) {
|
||||
int batch_index = blockIdx.x;
|
||||
points += batch_index * n * c;
|
||||
idx += batch_index * npoints * nsample;
|
||||
out += batch_index * npoints * nsample * c;
|
||||
|
||||
const int index = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int stride = blockDim.y * blockDim.x;
|
||||
for (int i = index; i < c * npoints; i += stride) {
|
||||
const int l = i / npoints;
|
||||
const int j = i % npoints;
|
||||
for (int k = 0; k < nsample; ++k) {
|
||||
int ii = idx[j * nsample + k];
|
||||
if (ii < 0 || ii >= n) {
|
||||
out[(l * npoints + j) * nsample + k] = 0;
|
||||
} else {
|
||||
out[(l * npoints + j) * nsample + k] = points[l * n + ii];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
|
||||
const float *points, const int *idx,
|
||||
float *out) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
|
||||
group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
|
||||
b, c, n, npoints, nsample, points, idx, out);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
|
||||
// input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
|
||||
// output: grad_points(b, c, n)
|
||||
__global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
|
||||
int nsample,
|
||||
const float *__restrict__ grad_out,
|
||||
const int *__restrict__ idx,
|
||||
float *__restrict__ grad_points) {
|
||||
int batch_index = blockIdx.x;
|
||||
grad_out += batch_index * npoints * nsample * c;
|
||||
idx += batch_index * npoints * nsample;
|
||||
grad_points += batch_index * n * c;
|
||||
|
||||
const int index = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int stride = blockDim.y * blockDim.x;
|
||||
for (int i = index; i < c * npoints; i += stride) {
|
||||
const int l = i / npoints;
|
||||
const int j = i % npoints;
|
||||
for (int k = 0; k < nsample; ++k) {
|
||||
int ii = idx[j * nsample + k];
|
||||
atomicAdd(grad_points + l * n + ii,
|
||||
grad_out[(l * npoints + j) * nsample + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
int nsample, const float *grad_out,
|
||||
const int *idx, float *grad_points) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
|
||||
group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
|
||||
b, c, n, npoints, nsample, grad_out, idx, grad_points);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
@@ -0,0 +1,241 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include "interpolate.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
|
||||
const float *known, float *dist2, int *idx);
|
||||
void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
|
||||
const float *points, const int *idx,
|
||||
const float *weight, float *out);
|
||||
void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
|
||||
const float *grad_out,
|
||||
const int *idx, const float *weight,
|
||||
float *grad_points);
|
||||
#endif
|
||||
|
||||
|
||||
void three_nn_cpu(int b, int n, int m, const float *unknown,
|
||||
const float *known, float *dist2, int *idx);
|
||||
void three_interpolate_cpu(int b, int c, int m, int n,
|
||||
const float *points, const int *idx,
|
||||
const float *weight, float *out);
|
||||
void three_interpolate_grad_cpu(int b, int c, int n, int m,
|
||||
const float *grad_out,
|
||||
const int *idx, const float *weight,
|
||||
float *grad_points);
|
||||
|
||||
#ifdef DEBUG
|
||||
void three_nn_debug(at::Tensor unknowns, at::Tensor knows) {
|
||||
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== three_nn ===========" << std::endl;
|
||||
|
||||
std::cout << "unknowns.size(): (";
|
||||
for (auto s : unknowns.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "knows.size(): (";
|
||||
for (auto s : knows.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
|
||||
void three_interpolate_debug(at::Tensor points, at::Tensor idx,
|
||||
at::Tensor weight) {
|
||||
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== three_interpolate ===========" << std::endl;
|
||||
|
||||
std::cout << "points.size(): (";
|
||||
for (auto s : points.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "weight.size(): (";
|
||||
for (auto s : weight.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
|
||||
void three_interpolate_grad_debug(at::Tensor grad_out, at::Tensor idx,
|
||||
at::Tensor weight, const int m) {
|
||||
std::cout << "===== interpolate.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== three_interpolate_grad ===========" << std::endl;
|
||||
|
||||
std::cout << "grad_out.size(): (";
|
||||
for (auto s : grad_out.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "weight.size(): (";
|
||||
for (auto s : weight.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
// 打印半径和采样数
|
||||
std::cout << "m: " << m << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
|
||||
CHECK_CONTIGUOUS(unknowns);
|
||||
CHECK_CONTIGUOUS(knows);
|
||||
CHECK_IS_FLOAT(unknowns);
|
||||
CHECK_IS_FLOAT(knows);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (unknowns.is_cuda()) {
|
||||
CHECK_CUDA(knows);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
three_nn_debug(unknowns, knows);
|
||||
#endif
|
||||
|
||||
at::Tensor idx =
|
||||
torch::zeros({unknowns.size(0), unknowns.size(1), 3},
|
||||
at::device(unknowns.device()).dtype(at::ScalarType::Int));
|
||||
at::Tensor dist2 =
|
||||
torch::zeros({unknowns.size(0), unknowns.size(1), 3},
|
||||
at::device(unknowns.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (unknowns.is_cuda()) {
|
||||
three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
|
||||
unknowns.data_ptr<float>(), knows.data_ptr<float>(),
|
||||
dist2.data_ptr<float>(), idx.data_ptr<int>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
three_nn_cpu(unknowns.size(0), unknowns.size(1), knows.size(1),
|
||||
unknowns.data_ptr<float>(), knows.data_ptr<float>(),
|
||||
dist2.data_ptr<float>(), idx.data_ptr<int>());
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return {dist2, idx};
|
||||
}
|
||||
|
||||
at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
|
||||
at::Tensor weight) {
|
||||
CHECK_CONTIGUOUS(points);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_CONTIGUOUS(weight);
|
||||
CHECK_IS_FLOAT(points);
|
||||
CHECK_IS_INT(idx);
|
||||
CHECK_IS_FLOAT(weight);
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
CHECK_CUDA(weight);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
three_interpolate_debug(points, idx, weight);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({points.size(0), points.size(1), idx.size(1)},
|
||||
at::device(points.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
three_interpolate_kernel_wrapper(
|
||||
points.size(0), points.size(1), points.size(2), idx.size(1),
|
||||
points.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
three_interpolate_cpu(
|
||||
points.size(0), points.size(1), points.size(2), idx.size(1),
|
||||
points.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
|
||||
at::Tensor weight, const int m) {
|
||||
CHECK_CONTIGUOUS(grad_out);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_CONTIGUOUS(weight);
|
||||
CHECK_IS_FLOAT(grad_out);
|
||||
CHECK_IS_INT(idx);
|
||||
CHECK_IS_FLOAT(weight);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
CHECK_CUDA(weight);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
three_interpolate_grad_debug(grad_out, idx, weight, m);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({grad_out.size(0), grad_out.size(1), m},
|
||||
at::device(grad_out.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
three_interpolate_grad_kernel_wrapper(
|
||||
grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
|
||||
grad_out.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
three_interpolate_grad_cpu(
|
||||
grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
|
||||
grad_out.data_ptr<float>(), idx.data_ptr<int>(), weight.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
void three_nn_cpu(
|
||||
int b, int n, int m,
|
||||
const float *unknown,
|
||||
const float *known,
|
||||
float *dist2,
|
||||
int *idx
|
||||
) {
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *unknown_batch = unknown + batch_index * n * 3;
|
||||
const float *known_batch = known + batch_index * m * 3;
|
||||
float *dist2_batch = dist2 + batch_index * n * 3;
|
||||
int *idx_batch = idx + batch_index * n * 3;
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
float ux = unknown_batch[j * 3 + 0];
|
||||
float uy = unknown_batch[j * 3 + 1];
|
||||
float uz = unknown_batch[j * 3 + 2];
|
||||
|
||||
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
|
||||
int besti1 = 0, besti2 = 0, besti3 = 0;
|
||||
|
||||
for (int k = 0; k < m; ++k) {
|
||||
float x = known_batch[k * 3 + 0];
|
||||
float y = known_batch[k * 3 + 1];
|
||||
float z = known_batch[k * 3 + 2];
|
||||
float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
|
||||
if (d < best1) {
|
||||
best3 = best2;
|
||||
besti3 = besti2;
|
||||
best2 = best1;
|
||||
besti2 = besti1;
|
||||
best1 = d;
|
||||
besti1 = k;
|
||||
} else if (d < best2) {
|
||||
best3 = best2;
|
||||
besti3 = besti2;
|
||||
best2 = d;
|
||||
besti2 = k;
|
||||
} else if (d < best3) {
|
||||
best3 = d;
|
||||
besti3 = k;
|
||||
}
|
||||
}
|
||||
dist2_batch[j * 3 + 0] = best1;
|
||||
dist2_batch[j * 3 + 1] = best2;
|
||||
dist2_batch[j * 3 + 2] = best3;
|
||||
|
||||
idx_batch[j * 3 + 0] = besti1;
|
||||
idx_batch[j * 3 + 1] = besti2;
|
||||
idx_batch[j * 3 + 2] = besti3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void three_interpolate_cpu(
|
||||
int b, int c, int m, int n,
|
||||
const float *points,
|
||||
const int *idx,
|
||||
const float *weight,
|
||||
float *out
|
||||
) {
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *points_batch = points + batch_index * m * c;
|
||||
const int *idx_batch = idx + batch_index * n * 3;
|
||||
const float *weight_batch = weight + batch_index * n * 3;
|
||||
float *out_batch = out + batch_index * n * c;
|
||||
|
||||
for (int i = 0; i < c * n; ++i) {
|
||||
const int l = i / n;
|
||||
const int j = i % n;
|
||||
|
||||
float w1 = weight_batch[j * 3 + 0];
|
||||
float w2 = weight_batch[j * 3 + 1];
|
||||
float w3 = weight_batch[j * 3 + 2];
|
||||
|
||||
int i1 = idx_batch[j * 3 + 0];
|
||||
int i2 = idx_batch[j * 3 + 1];
|
||||
int i3 = idx_batch[j * 3 + 2];
|
||||
|
||||
out_batch[i] = w1 * points_batch[l * m + i1] +
|
||||
w2 * points_batch[l * m + i2] +
|
||||
w3 * points_batch[l * m + i3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void three_interpolate_grad_cpu(
|
||||
int b, int c, int n, int m,
|
||||
const float *grad_out,
|
||||
const int *idx, const float *weight,
|
||||
float *grad_points
|
||||
) {
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *grad_out_batch = grad_out + batch_index * n * c;
|
||||
const int *idx_batch = idx + batch_index * n * 3;
|
||||
const float *weight_batch = weight + batch_index * n * 3;
|
||||
float *grad_points_batch = grad_points + batch_index * m * c;
|
||||
|
||||
// 累积初始值置0
|
||||
for (int i = 0; i < m * c; ++i) {
|
||||
grad_points_batch[i] = 0.0f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < c * n; ++i) {
|
||||
const int l = i / n;
|
||||
const int j = i % n;
|
||||
|
||||
float w1 = weight_batch[j * 3 + 0];
|
||||
float w2 = weight_batch[j * 3 + 1];
|
||||
float w3 = weight_batch[j * 3 + 2];
|
||||
|
||||
int i1 = idx_batch[j * 3 + 0];
|
||||
int i2 = idx_batch[j * 3 + 1];
|
||||
int i3 = idx_batch[j * 3 + 2];
|
||||
|
||||
grad_points_batch[l * m + i1] += w1 * grad_out_batch[i];
|
||||
grad_points_batch[l * m + i2] += w2 * grad_out_batch[i];
|
||||
grad_points_batch[l * m + i3] += w3 * grad_out_batch[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cuda_utils.h"
|
||||
|
||||
// input: unknown(b, n, 3) known(b, m, 3)
|
||||
// output: dist2(b, n, 3), idx(b, n, 3)
|
||||
__global__ void three_nn_kernel(int b, int n, int m,
|
||||
const float *__restrict__ unknown,
|
||||
const float *__restrict__ known,
|
||||
float *__restrict__ dist2,
|
||||
int *__restrict__ idx) {
|
||||
int batch_index = blockIdx.x;
|
||||
unknown += batch_index * n * 3;
|
||||
known += batch_index * m * 3;
|
||||
dist2 += batch_index * n * 3;
|
||||
idx += batch_index * n * 3;
|
||||
|
||||
int index = threadIdx.x;
|
||||
int stride = blockDim.x;
|
||||
for (int j = index; j < n; j += stride) {
|
||||
float ux = unknown[j * 3 + 0];
|
||||
float uy = unknown[j * 3 + 1];
|
||||
float uz = unknown[j * 3 + 2];
|
||||
|
||||
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
|
||||
int besti1 = 0, besti2 = 0, besti3 = 0;
|
||||
for (int k = 0; k < m; ++k) {
|
||||
float x = known[k * 3 + 0];
|
||||
float y = known[k * 3 + 1];
|
||||
float z = known[k * 3 + 2];
|
||||
float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
|
||||
if (d < best1) {
|
||||
best3 = best2;
|
||||
besti3 = besti2;
|
||||
best2 = best1;
|
||||
besti2 = besti1;
|
||||
best1 = d;
|
||||
besti1 = k;
|
||||
} else if (d < best2) {
|
||||
best3 = best2;
|
||||
besti3 = besti2;
|
||||
best2 = d;
|
||||
besti2 = k;
|
||||
} else if (d < best3) {
|
||||
best3 = d;
|
||||
besti3 = k;
|
||||
}
|
||||
}
|
||||
dist2[j * 3 + 0] = best1;
|
||||
dist2[j * 3 + 1] = best2;
|
||||
dist2[j * 3 + 2] = best3;
|
||||
|
||||
idx[j * 3 + 0] = besti1;
|
||||
idx[j * 3 + 1] = besti2;
|
||||
idx[j * 3 + 2] = besti3;
|
||||
}
|
||||
}
|
||||
|
||||
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
|
||||
const float *known, float *dist2, int *idx) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
|
||||
dist2, idx);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
|
||||
// input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
|
||||
// output: out(b, c, n)
|
||||
__global__ void three_interpolate_kernel(int b, int c, int m, int n,
|
||||
const float *__restrict__ points,
|
||||
const int *__restrict__ idx,
|
||||
const float *__restrict__ weight,
|
||||
float *__restrict__ out) {
|
||||
int batch_index = blockIdx.x;
|
||||
points += batch_index * m * c;
|
||||
|
||||
idx += batch_index * n * 3;
|
||||
weight += batch_index * n * 3;
|
||||
|
||||
out += batch_index * n * c;
|
||||
|
||||
const int index = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int stride = blockDim.y * blockDim.x;
|
||||
for (int i = index; i < c * n; i += stride) {
|
||||
const int l = i / n;
|
||||
const int j = i % n;
|
||||
float w1 = weight[j * 3 + 0];
|
||||
float w2 = weight[j * 3 + 1];
|
||||
float w3 = weight[j * 3 + 2];
|
||||
|
||||
int i1 = idx[j * 3 + 0];
|
||||
int i2 = idx[j * 3 + 1];
|
||||
int i3 = idx[j * 3 + 2];
|
||||
|
||||
out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
|
||||
points[l * m + i3] * w3;
|
||||
}
|
||||
}
|
||||
|
||||
void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
|
||||
const float *points, const int *idx,
|
||||
const float *weight, float *out) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
|
||||
b, c, m, n, points, idx, weight, out);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
|
||||
// input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
|
||||
// output: grad_points(b, c, m)
|
||||
|
||||
__global__ void three_interpolate_grad_kernel(
|
||||
int b, int c, int n, int m, const float *__restrict__ grad_out,
|
||||
const int *__restrict__ idx, const float *__restrict__ weight,
|
||||
float *__restrict__ grad_points) {
|
||||
int batch_index = blockIdx.x;
|
||||
grad_out += batch_index * n * c;
|
||||
idx += batch_index * n * 3;
|
||||
weight += batch_index * n * 3;
|
||||
grad_points += batch_index * m * c;
|
||||
|
||||
const int index = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int stride = blockDim.y * blockDim.x;
|
||||
for (int i = index; i < c * n; i += stride) {
|
||||
const int l = i / n;
|
||||
const int j = i % n;
|
||||
float w1 = weight[j * 3 + 0];
|
||||
float w2 = weight[j * 3 + 1];
|
||||
float w3 = weight[j * 3 + 2];
|
||||
|
||||
int i1 = idx[j * 3 + 0];
|
||||
int i2 = idx[j * 3 + 1];
|
||||
int i3 = idx[j * 3 + 2];
|
||||
|
||||
atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
|
||||
atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
|
||||
atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
|
||||
}
|
||||
}
|
||||
|
||||
void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
|
||||
const float *grad_out,
|
||||
const int *idx, const float *weight,
|
||||
float *grad_points) {
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
|
||||
b, c, n, m, grad_out, idx, weight, grad_points);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
@@ -0,0 +1,207 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include "sampling.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
const float *points, const int *idx,
|
||||
float *out);
|
||||
void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
const float *grad_out, const int *idx,
|
||||
float *grad_points);
|
||||
|
||||
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
|
||||
const float *dataset, float *temp,
|
||||
int *idxs);
|
||||
#endif
|
||||
|
||||
|
||||
void gather_points_cpu(int b, int c, int n, int npoints,
|
||||
const float *points, const int *idx,
|
||||
float *out);
|
||||
void gather_points_grad_cpu(int b, int c, int n, int npoints,
|
||||
const float *grad_out, const int *idx,
|
||||
float *grad_points);
|
||||
|
||||
void furthest_point_sampling_cpu(int b, int n, int m,
|
||||
const float *dataset, float *temp,
|
||||
int *idxs);
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
void gather_points_debug(at::Tensor points, at::Tensor idx) {
|
||||
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== gather_points ===========" << std::endl;
|
||||
|
||||
std::cout << "points.size(): (";
|
||||
for (auto s : points.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
|
||||
void gather_points_grad_debug(at::Tensor grad_out, at::Tensor idx,
|
||||
const int n) {
|
||||
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== gather_points_grad ===========" << std::endl;
|
||||
|
||||
std::cout << "grad_out.size(): (";
|
||||
for (auto s : grad_out.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "idx.size(): (";
|
||||
for (auto s : idx.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
|
||||
std::cout << "n: " << n << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
|
||||
void furthest_point_sampling_debug(at::Tensor points, const int nsamples) {
|
||||
std::cout << "===== sampling.cpp 调试信息 =====" << std::endl;
|
||||
std::cout << "=========== furthest_point_sampling ===========" << std::endl;
|
||||
|
||||
std::cout << "points.size(): (";
|
||||
for (auto s : points.sizes()) {
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << ")" <<std::endl;
|
||||
|
||||
std::cout << "nsamples: " << nsamples << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
|
||||
CHECK_CONTIGUOUS(points);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_IS_FLOAT(points);
|
||||
CHECK_IS_INT(idx);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
gather_points_debug(points, idx);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({points.size(0), points.size(1), idx.size(1)},
|
||||
at::device(points.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
|
||||
idx.size(1), points.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
gather_points_cpu(points.size(0), points.size(1), points.size(2),
|
||||
idx.size(1), points.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
|
||||
const int n) {
|
||||
CHECK_CONTIGUOUS(grad_out);
|
||||
CHECK_CONTIGUOUS(idx);
|
||||
CHECK_IS_FLOAT(grad_out);
|
||||
CHECK_IS_INT(idx);
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
CHECK_CUDA(idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
gather_points_grad_debug(grad_out, idx, n);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({grad_out.size(0), grad_out.size(1), n},
|
||||
at::device(grad_out.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (grad_out.is_cuda()) {
|
||||
gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
|
||||
idx.size(1), grad_out.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
gather_points_grad_cpu(grad_out.size(0), grad_out.size(1), n,
|
||||
idx.size(1), grad_out.data_ptr<float>(),
|
||||
idx.data_ptr<int>(), output.data_ptr<float>());
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
|
||||
CHECK_CONTIGUOUS(points);
|
||||
CHECK_IS_FLOAT(points);
|
||||
|
||||
#ifdef DEBUG
|
||||
furthest_point_sampling_debug(points, nsamples);
|
||||
#endif
|
||||
|
||||
at::Tensor output =
|
||||
torch::zeros({points.size(0), nsamples},
|
||||
at::device(points.device()).dtype(at::ScalarType::Int));
|
||||
|
||||
at::Tensor tmp =
|
||||
torch::full({points.size(0), points.size(1)}, 1e10,
|
||||
at::device(points.device()).dtype(at::ScalarType::Float));
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
if (points.is_cuda()) {
|
||||
furthest_point_sampling_kernel_wrapper(
|
||||
points.size(0), points.size(1), nsamples, points.data_ptr<float>(),
|
||||
tmp.data_ptr<float>(), output.data_ptr<int>());
|
||||
} else {
|
||||
#endif
|
||||
|
||||
// TORCH_CHECK(false, "CPU not supported");
|
||||
furthest_point_sampling_cpu(
|
||||
points.size(0), points.size(1), nsamples, points.data_ptr<float>(),
|
||||
tmp.data_ptr<float>(), output.data_ptr<int>());
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
}
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
void gather_points_cpu(
|
||||
int b, int c, int n, int npoints,
|
||||
const float *points,
|
||||
const int *idx,
|
||||
float *out
|
||||
) {
|
||||
for (int i = 0; i < b; ++i) {
|
||||
for (int l = 0; l < c; ++l) {
|
||||
for (int j = 0; j < npoints; ++j) {
|
||||
int a = idx[i * npoints + j];
|
||||
out[(i * c + l) * npoints + j] = points[(i * c + l) * n + a];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void gather_points_grad_cpu(
|
||||
int b, int c, int n, int npoints,
|
||||
const float *grad_out,
|
||||
const int *idx,
|
||||
float *grad_points
|
||||
) {
|
||||
for (int i = 0; i < b; ++i) {
|
||||
for (int l = 0; l < c; ++l) {
|
||||
for (int j = 0; j < npoints; ++j) {
|
||||
int a = idx[i * npoints + j];
|
||||
grad_points[(i * c + l) * n + a] += grad_out[(i * c + l) * npoints + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void furthest_point_sampling_cpu(
|
||||
int b, int n, int m,
|
||||
const float *dataset,
|
||||
float *temp,
|
||||
int *idxs
|
||||
) {
|
||||
// 1. 遍历每个批次(对应CUDA的blockIdx.x)
|
||||
for (int batch_index = 0; batch_index < b; ++batch_index) {
|
||||
const float *dataset_batch = dataset + batch_index * n * 3;
|
||||
float *temp_batch = temp + batch_index * n;
|
||||
int *idxs_batch = idxs + batch_index * m;
|
||||
|
||||
// 2. 初始化:第一个采样点为索引0(对应CUDA的threadIdx.x==0时的赋值)
|
||||
int old = 0;
|
||||
idxs_batch[0] = old;
|
||||
|
||||
// 3. 初始化cur_temp:设为极大值(表示初始时与已选点集(仅一个点)的距离未计算)
|
||||
for (int k = 0; k < n; ++k) {
|
||||
temp_batch[k] = 1e10f; // 极大值,保证首次min操作有效
|
||||
}
|
||||
|
||||
// 4. 迭代m-1次,选择剩余m-1个采样点(对应CUDA的for (int j=1; j<m; j++))
|
||||
for (int j = 1; j < m; ++j) {
|
||||
// 4.1 提取上一个选中点的坐标(x1,y1,z1)
|
||||
float x1 = dataset_batch[old * 3 + 0];
|
||||
float y1 = dataset_batch[old * 3 + 1];
|
||||
float z1 = dataset_batch[old * 3 + 2];
|
||||
|
||||
// 4.2 遍历所有点,计算距离并更新cur_temp(替换CUDA的线程步长循环)
|
||||
for (int k = 0; k < n; ++k) {
|
||||
// 提取当前点的坐标(x2,y2,z2)
|
||||
float x2 = dataset_batch[k * 3 + 0];
|
||||
float y2 = dataset_batch[k * 3 + 1];
|
||||
float z2 = dataset_batch[k * 3 + 2];
|
||||
|
||||
// 过滤无效点(模长过小,近似为原点,对应CUDA的mag<=1e-3判断)
|
||||
float mag = x2 * x2 + y2 * y2 + z2 * z2;
|
||||
if (mag <= 1e-3f) continue;
|
||||
|
||||
// 计算当前点到上一个选中点的欧氏距离平方(避免开方,不影响大小比较)
|
||||
float dx = x2 - x1;
|
||||
float dy = y2 - y1;
|
||||
float dz = z2 - z1;
|
||||
float d = dx * dx + dy * dy + dz * dz;
|
||||
|
||||
// 核心:更新当前点到已选点集的最小距离
|
||||
if (d < temp_batch[k]) {
|
||||
temp_batch[k] = d;
|
||||
}
|
||||
}
|
||||
|
||||
// 4.3 规约操作:找到cur_temp中最大值对应的点索引(替换CUDA的__update+共享内存规约)
|
||||
float best = -1.0f;
|
||||
int besti = 0;
|
||||
for (int k = 0; k < n; ++k) {
|
||||
if (temp_batch[k] > best) {
|
||||
best = temp_batch[k];
|
||||
besti = k;
|
||||
}
|
||||
}
|
||||
|
||||
// 4.4 记录当前选中点,更新old为下一轮的上一个选中点
|
||||
old = besti;
|
||||
idxs_batch[j] = old;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates.
|
||||
//
|
||||
// This source code is licensed under the MIT license found in the
|
||||
// LICENSE file in the root directory of this source tree.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "cuda_utils.h"
|
||||
|
||||
// input: points(b, c, n) idx(b, m)
|
||||
// output: out(b, c, m)
|
||||
__global__ void gather_points_kernel(int b, int c, int n, int m,
|
||||
const float *__restrict__ points,
|
||||
const int *__restrict__ idx,
|
||||
float *__restrict__ out) {
|
||||
for (int i = blockIdx.x; i < b; i += gridDim.x) {
|
||||
for (int l = blockIdx.y; l < c; l += gridDim.y) {
|
||||
for (int j = threadIdx.x; j < m; j += blockDim.x) {
|
||||
int a = idx[i * m + j];
|
||||
out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
const float *points, const int *idx,
|
||||
float *out) {
|
||||
gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
|
||||
at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
|
||||
points, idx, out);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
|
||||
// input: grad_out(b, c, m) idx(b, m)
|
||||
// output: grad_points(b, c, n)
|
||||
__global__ void gather_points_grad_kernel(int b, int c, int n, int m,
|
||||
const float *__restrict__ grad_out,
|
||||
const int *__restrict__ idx,
|
||||
float *__restrict__ grad_points) {
|
||||
for (int i = blockIdx.x; i < b; i += gridDim.x) {
|
||||
for (int l = blockIdx.y; l < c; l += gridDim.y) {
|
||||
for (int j = threadIdx.x; j < m; j += blockDim.x) {
|
||||
int a = idx[i * m + j];
|
||||
atomicAdd(grad_points + (i * c + l) * n + a,
|
||||
grad_out[(i * c + l) * m + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
|
||||
const float *grad_out, const int *idx,
|
||||
float *grad_points) {
|
||||
gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
|
||||
at::cuda::getCurrentCUDAStream()>>>(
|
||||
b, c, n, npoints, grad_out, idx, grad_points);
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
|
||||
__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
|
||||
int idx1, int idx2) {
|
||||
const float v1 = dists[idx1], v2 = dists[idx2];
|
||||
const int i1 = dists_i[idx1], i2 = dists_i[idx2];
|
||||
dists[idx1] = max(v1, v2);
|
||||
dists_i[idx1] = v2 > v1 ? i2 : i1;
|
||||
}
|
||||
|
||||
// Input dataset: (b, n, 3), tmp: (b, n)
|
||||
// Ouput idxs (b, m)
|
||||
template <unsigned int block_size>
|
||||
__global__ void furthest_point_sampling_kernel(
|
||||
int b, int n, int m, const float *__restrict__ dataset,
|
||||
float *__restrict__ temp, int *__restrict__ idxs) {
|
||||
if (m <= 0) return;
|
||||
__shared__ float dists[block_size];
|
||||
__shared__ int dists_i[block_size];
|
||||
|
||||
int batch_index = blockIdx.x;
|
||||
dataset += batch_index * n * 3;
|
||||
temp += batch_index * n;
|
||||
idxs += batch_index * m;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
const int stride = block_size;
|
||||
|
||||
int old = 0;
|
||||
if (threadIdx.x == 0) idxs[0] = old;
|
||||
|
||||
__syncthreads();
|
||||
for (int j = 1; j < m; j++) {
|
||||
int besti = 0;
|
||||
float best = -1;
|
||||
float x1 = dataset[old * 3 + 0];
|
||||
float y1 = dataset[old * 3 + 1];
|
||||
float z1 = dataset[old * 3 + 2];
|
||||
for (int k = tid; k < n; k += stride) {
|
||||
float x2, y2, z2;
|
||||
x2 = dataset[k * 3 + 0];
|
||||
y2 = dataset[k * 3 + 1];
|
||||
z2 = dataset[k * 3 + 2];
|
||||
float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
|
||||
if (mag <= 1e-3) continue;
|
||||
|
||||
float d =
|
||||
(x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
|
||||
|
||||
float d2 = min(d, temp[k]);
|
||||
temp[k] = d2;
|
||||
besti = d2 > best ? k : besti;
|
||||
best = d2 > best ? d2 : best;
|
||||
}
|
||||
dists[tid] = best;
|
||||
dists_i[tid] = besti;
|
||||
__syncthreads();
|
||||
|
||||
if (block_size >= 512) {
|
||||
if (tid < 256) {
|
||||
__update(dists, dists_i, tid, tid + 256);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 256) {
|
||||
if (tid < 128) {
|
||||
__update(dists, dists_i, tid, tid + 128);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 128) {
|
||||
if (tid < 64) {
|
||||
__update(dists, dists_i, tid, tid + 64);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 64) {
|
||||
if (tid < 32) {
|
||||
__update(dists, dists_i, tid, tid + 32);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 32) {
|
||||
if (tid < 16) {
|
||||
__update(dists, dists_i, tid, tid + 16);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 16) {
|
||||
if (tid < 8) {
|
||||
__update(dists, dists_i, tid, tid + 8);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 8) {
|
||||
if (tid < 4) {
|
||||
__update(dists, dists_i, tid, tid + 4);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 4) {
|
||||
if (tid < 2) {
|
||||
__update(dists, dists_i, tid, tid + 2);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
if (block_size >= 2) {
|
||||
if (tid < 1) {
|
||||
__update(dists, dists_i, tid, tid + 1);
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old = dists_i[0];
|
||||
if (tid == 0) idxs[j] = old;
|
||||
}
|
||||
}
|
||||
|
||||
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
|
||||
const float *dataset, float *temp,
|
||||
int *idxs) {
|
||||
unsigned int n_threads = opt_n_threads(n);
|
||||
|
||||
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
|
||||
switch (n_threads) {
|
||||
case 512:
|
||||
furthest_point_sampling_kernel<512>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 256:
|
||||
furthest_point_sampling_kernel<256>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 128:
|
||||
furthest_point_sampling_kernel<128>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 64:
|
||||
furthest_point_sampling_kernel<64>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 32:
|
||||
furthest_point_sampling_kernel<32>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 16:
|
||||
furthest_point_sampling_kernel<16>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 8:
|
||||
furthest_point_sampling_kernel<8>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 4:
|
||||
furthest_point_sampling_kernel<4>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 2:
|
||||
furthest_point_sampling_kernel<2>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
case 1:
|
||||
furthest_point_sampling_kernel<1>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
break;
|
||||
default:
|
||||
furthest_point_sampling_kernel<512>
|
||||
<<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
|
||||
}
|
||||
|
||||
CUDA_CHECK_ERRORS();
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
from .pointnet2 import *
|
||||
Binary file not shown.
@@ -0,0 +1,518 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
''' Pointnet2 layers.
|
||||
Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch
|
||||
Extended with the following:
|
||||
1. Uniform sampling in each local region (sample_uniformly)
|
||||
2. Return sampled points indices to support votenet.
|
||||
'''
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# import os
|
||||
# import sys
|
||||
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
# sys.path.append(BASE_DIR)
|
||||
|
||||
from . import pointnet2_utils
|
||||
from . import pytorch_utils as pt_utils
|
||||
from typing import List
|
||||
|
||||
|
||||
class _PointnetSAModuleBase(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.npoint = None
|
||||
self.groupers = None
|
||||
self.mlps = None
|
||||
|
||||
def forward(self, xyz: torch.Tensor,
|
||||
features: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) tensor of the xyz coordinates of the features
|
||||
features : torch.Tensor
|
||||
(B, N, C) tensor of the descriptors of the the features
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_xyz : torch.Tensor
|
||||
(B, npoint, 3) tensor of the new features' xyz
|
||||
new_features : torch.Tensor
|
||||
(B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
|
||||
"""
|
||||
|
||||
new_features_list = []
|
||||
|
||||
xyz_flipped = xyz.transpose(1, 2).contiguous()
|
||||
new_xyz = pointnet2_utils.gather_operation(
|
||||
xyz_flipped,
|
||||
pointnet2_utils.furthest_point_sample(xyz, self.npoint)
|
||||
).transpose(1, 2).contiguous() if self.npoint is not None else None
|
||||
|
||||
for i in range(len(self.groupers)):
|
||||
new_features = self.groupers[i](
|
||||
xyz, new_xyz, features
|
||||
) # (B, C, npoint, nsample)
|
||||
|
||||
new_features = self.mlps[i](
|
||||
new_features
|
||||
) # (B, mlp[-1], npoint, nsample)
|
||||
new_features = F.max_pool2d(
|
||||
new_features, kernel_size=[1, new_features.size(3)]
|
||||
) # (B, mlp[-1], npoint, 1)
|
||||
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
|
||||
|
||||
new_features_list.append(new_features)
|
||||
|
||||
return new_xyz, torch.cat(new_features_list, dim=1)
|
||||
|
||||
|
||||
class PointnetSAModuleMSG(_PointnetSAModuleBase):
|
||||
r"""Pointnet set abstrction layer with multiscale grouping
|
||||
|
||||
Parameters
|
||||
----------
|
||||
npoint : int
|
||||
Number of features
|
||||
radii : list of float32
|
||||
list of radii to group with
|
||||
nsamples : list of int32
|
||||
Number of samples in each ball query
|
||||
mlps : list of list of int32
|
||||
Spec of the pointnet before the global max_pool for each scale
|
||||
bn : bool
|
||||
Use batchnorm
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
npoint: int,
|
||||
radii: List[float],
|
||||
nsamples: List[int],
|
||||
mlps: List[List[int]],
|
||||
bn: bool = True,
|
||||
use_xyz: bool = True,
|
||||
sample_uniformly: bool = False
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
assert len(radii) == len(nsamples) == len(mlps)
|
||||
|
||||
self.npoint = npoint
|
||||
self.groupers = nn.ModuleList()
|
||||
self.mlps = nn.ModuleList()
|
||||
for i in range(len(radii)):
|
||||
radius = radii[i]
|
||||
nsample = nsamples[i]
|
||||
self.groupers.append(
|
||||
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)
|
||||
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
|
||||
)
|
||||
mlp_spec = mlps[i]
|
||||
if use_xyz:
|
||||
mlp_spec[0] += 3
|
||||
|
||||
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
|
||||
|
||||
|
||||
class PointnetSAModule(PointnetSAModuleMSG):
|
||||
r"""Pointnet set abstrction layer
|
||||
|
||||
Parameters
|
||||
----------
|
||||
npoint : int
|
||||
Number of features
|
||||
radius : float
|
||||
Radius of ball
|
||||
nsample : int
|
||||
Number of samples in the ball query
|
||||
mlp : list
|
||||
Spec of the pointnet before the global max_pool
|
||||
bn : bool
|
||||
Use batchnorm
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
mlp: List[int],
|
||||
npoint: int = None,
|
||||
radius: float = None,
|
||||
nsample: int = None,
|
||||
bn: bool = True,
|
||||
use_xyz: bool = True
|
||||
):
|
||||
super().__init__(
|
||||
mlps=[mlp],
|
||||
npoint=npoint,
|
||||
radii=[radius],
|
||||
nsamples=[nsample],
|
||||
bn=bn,
|
||||
use_xyz=use_xyz
|
||||
)
|
||||
|
||||
|
||||
class PointnetSAModuleVotes(nn.Module):
|
||||
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
|
||||
with extra support for returning point indices for getting their GT votes '''
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
mlp: List[int],
|
||||
npoint: int = None,
|
||||
radius: float = None,
|
||||
nsample: int = None,
|
||||
bn: bool = True,
|
||||
use_xyz: bool = True,
|
||||
pooling: str = 'max',
|
||||
sigma: float = None, # for RBF pooling
|
||||
normalize_xyz: bool = False, # noramlize local XYZ with radius
|
||||
sample_uniformly: bool = False,
|
||||
ret_unique_cnt: bool = False
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.npoint = npoint
|
||||
self.radius = radius
|
||||
self.nsample = nsample
|
||||
self.pooling = pooling
|
||||
self.mlp_module = None
|
||||
self.use_xyz = use_xyz
|
||||
self.sigma = sigma
|
||||
if self.sigma is None:
|
||||
self.sigma = self.radius/2
|
||||
self.normalize_xyz = normalize_xyz
|
||||
self.ret_unique_cnt = ret_unique_cnt
|
||||
|
||||
if npoint is not None:
|
||||
self.grouper = pointnet2_utils.QueryAndGroup(radius, nsample,
|
||||
use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz,
|
||||
sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt)
|
||||
else:
|
||||
self.grouper = pointnet2_utils.GroupAll(use_xyz, ret_grouped_xyz=True)
|
||||
|
||||
mlp_spec = mlp
|
||||
if use_xyz and len(mlp_spec)>0:
|
||||
mlp_spec[0] += 3
|
||||
self.mlp_module = pt_utils.SharedMLP(mlp_spec, bn=bn)
|
||||
|
||||
|
||||
def forward(self, xyz: torch.Tensor,
|
||||
features: torch.Tensor = None,
|
||||
inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) tensor of the xyz coordinates of the features
|
||||
features : torch.Tensor
|
||||
(B, C, N) tensor of the descriptors of the the features
|
||||
inds : torch.Tensor
|
||||
(B, npoint) tensor that stores index to the xyz points (values in 0-N-1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_xyz : torch.Tensor
|
||||
(B, npoint, 3) tensor of the new features' xyz
|
||||
new_features : torch.Tensor
|
||||
(B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors
|
||||
inds: torch.Tensor
|
||||
(B, npoint) tensor of the inds
|
||||
"""
|
||||
|
||||
xyz_flipped = xyz.transpose(1, 2).contiguous()
|
||||
if inds is None:
|
||||
inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)
|
||||
else:
|
||||
assert(inds.shape[1] == self.npoint)
|
||||
new_xyz = pointnet2_utils.gather_operation(
|
||||
xyz_flipped, inds
|
||||
).transpose(1, 2).contiguous() if self.npoint is not None else None
|
||||
|
||||
if not self.ret_unique_cnt:
|
||||
grouped_features, grouped_xyz = self.grouper(
|
||||
xyz, new_xyz, features
|
||||
) # (B, C, npoint, nsample)
|
||||
else:
|
||||
grouped_features, grouped_xyz, unique_cnt = self.grouper(
|
||||
xyz, new_xyz, features
|
||||
) # (B, C, npoint, nsample), (B,3,npoint,nsample), (B,npoint)
|
||||
|
||||
new_features = self.mlp_module(
|
||||
grouped_features
|
||||
) # (B, mlp[-1], npoint, nsample)
|
||||
if self.pooling == 'max':
|
||||
new_features = F.max_pool2d(
|
||||
new_features, kernel_size=[1, new_features.size(3)]
|
||||
) # (B, mlp[-1], npoint, 1)
|
||||
elif self.pooling == 'avg':
|
||||
new_features = F.avg_pool2d(
|
||||
new_features, kernel_size=[1, new_features.size(3)]
|
||||
) # (B, mlp[-1], npoint, 1)
|
||||
elif self.pooling == 'rbf':
|
||||
# Use radial basis function kernel for weighted sum of features (normalized by nsample and sigma)
|
||||
# Ref: https://en.wikipedia.org/wiki/Radial_basis_function_kernel
|
||||
rbf = torch.exp(-1 * grouped_xyz.pow(2).sum(1,keepdim=False) / (self.sigma**2) / 2) # (B, npoint, nsample)
|
||||
new_features = torch.sum(new_features * rbf.unsqueeze(1), -1, keepdim=True) / float(self.nsample) # (B, mlp[-1], npoint, 1)
|
||||
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
|
||||
|
||||
if not self.ret_unique_cnt:
|
||||
return new_xyz, new_features, inds
|
||||
else:
|
||||
return new_xyz, new_features, inds, unique_cnt
|
||||
|
||||
class PointnetSAModuleMSGVotes(nn.Module):
|
||||
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
|
||||
with extra support for returning point indices for getting their GT votes '''
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
mlps: List[List[int]],
|
||||
npoint: int,
|
||||
radii: List[float],
|
||||
nsamples: List[int],
|
||||
bn: bool = True,
|
||||
use_xyz: bool = True,
|
||||
sample_uniformly: bool = False
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
assert(len(mlps) == len(nsamples) == len(radii))
|
||||
|
||||
self.npoint = npoint
|
||||
self.groupers = nn.ModuleList()
|
||||
self.mlps = nn.ModuleList()
|
||||
for i in range(len(radii)):
|
||||
radius = radii[i]
|
||||
nsample = nsamples[i]
|
||||
self.groupers.append(
|
||||
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz, sample_uniformly=sample_uniformly)
|
||||
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
|
||||
)
|
||||
mlp_spec = mlps[i]
|
||||
if use_xyz:
|
||||
mlp_spec[0] += 3
|
||||
|
||||
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
|
||||
|
||||
def forward(self, xyz: torch.Tensor,
|
||||
features: torch.Tensor = None, inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) tensor of the xyz coordinates of the features
|
||||
features : torch.Tensor
|
||||
(B, C, C) tensor of the descriptors of the the features
|
||||
inds : torch.Tensor
|
||||
(B, npoint) tensor that stores index to the xyz points (values in 0-N-1)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_xyz : torch.Tensor
|
||||
(B, npoint, 3) tensor of the new features' xyz
|
||||
new_features : torch.Tensor
|
||||
(B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors
|
||||
inds: torch.Tensor
|
||||
(B, npoint) tensor of the inds
|
||||
"""
|
||||
new_features_list = []
|
||||
|
||||
xyz_flipped = xyz.transpose(1, 2).contiguous()
|
||||
if inds is None:
|
||||
inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint)
|
||||
new_xyz = pointnet2_utils.gather_operation(
|
||||
xyz_flipped, inds
|
||||
).transpose(1, 2).contiguous() if self.npoint is not None else None
|
||||
|
||||
for i in range(len(self.groupers)):
|
||||
new_features = self.groupers[i](
|
||||
xyz, new_xyz, features
|
||||
) # (B, C, npoint, nsample)
|
||||
new_features = self.mlps[i](
|
||||
new_features
|
||||
) # (B, mlp[-1], npoint, nsample)
|
||||
new_features = F.max_pool2d(
|
||||
new_features, kernel_size=[1, new_features.size(3)]
|
||||
) # (B, mlp[-1], npoint, 1)
|
||||
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
|
||||
|
||||
new_features_list.append(new_features)
|
||||
|
||||
return new_xyz, torch.cat(new_features_list, dim=1), inds
|
||||
|
||||
|
||||
class PointnetFPModule(nn.Module):
|
||||
r"""Propigates the features of one set to another
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mlp : list
|
||||
Pointnet module parameters
|
||||
bn : bool
|
||||
Use batchnorm
|
||||
"""
|
||||
|
||||
def __init__(self, *, mlp: List[int], bn: bool = True):
|
||||
super().__init__()
|
||||
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
|
||||
|
||||
def forward(
|
||||
self, unknown: torch.Tensor, known: torch.Tensor,
|
||||
unknow_feats: torch.Tensor, known_feats: torch.Tensor
|
||||
) -> torch.Tensor:
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
unknown : torch.Tensor
|
||||
(B, n, 3) tensor of the xyz positions of the unknown features
|
||||
known : torch.Tensor
|
||||
(B, m, 3) tensor of the xyz positions of the known features
|
||||
unknow_feats : torch.Tensor
|
||||
(B, C1, n) tensor of the features to be propigated to
|
||||
known_feats : torch.Tensor
|
||||
(B, C2, m) tensor of features to be propigated
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_features : torch.Tensor
|
||||
(B, mlp[-1], n) tensor of the features of the unknown features
|
||||
"""
|
||||
|
||||
if known is not None:
|
||||
dist, idx = pointnet2_utils.three_nn(unknown, known)
|
||||
dist_recip = 1.0 / (dist + 1e-8)
|
||||
norm = torch.sum(dist_recip, dim=2, keepdim=True)
|
||||
weight = dist_recip / norm
|
||||
|
||||
interpolated_feats = pointnet2_utils.three_interpolate(
|
||||
known_feats, idx, weight
|
||||
)
|
||||
else:
|
||||
interpolated_feats = known_feats.expand(
|
||||
*known_feats.size()[0:2], unknown.size(1)
|
||||
)
|
||||
|
||||
if unknow_feats is not None:
|
||||
new_features = torch.cat([interpolated_feats, unknow_feats],
|
||||
dim=1) #(B, C2 + C1, n)
|
||||
else:
|
||||
new_features = interpolated_feats
|
||||
|
||||
new_features = new_features.unsqueeze(-1)
|
||||
new_features = self.mlp(new_features)
|
||||
|
||||
return new_features.squeeze(-1)
|
||||
|
||||
class PointnetLFPModuleMSG(nn.Module):
|
||||
''' Modified based on _PointnetSAModuleBase and PointnetSAModuleMSG
|
||||
learnable feature propagation layer.'''
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
mlps: List[List[int]],
|
||||
radii: List[float],
|
||||
nsamples: List[int],
|
||||
post_mlp: List[int],
|
||||
bn: bool = True,
|
||||
use_xyz: bool = True,
|
||||
sample_uniformly: bool = False
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
assert(len(mlps) == len(nsamples) == len(radii))
|
||||
|
||||
self.post_mlp = pt_utils.SharedMLP(post_mlp, bn=bn)
|
||||
|
||||
self.groupers = nn.ModuleList()
|
||||
self.mlps = nn.ModuleList()
|
||||
for i in range(len(radii)):
|
||||
radius = radii[i]
|
||||
nsample = nsamples[i]
|
||||
self.groupers.append(
|
||||
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz,
|
||||
sample_uniformly=sample_uniformly)
|
||||
)
|
||||
mlp_spec = mlps[i]
|
||||
if use_xyz:
|
||||
mlp_spec[0] += 3
|
||||
|
||||
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
|
||||
|
||||
def forward(self, xyz2: torch.Tensor, xyz1: torch.Tensor,
|
||||
features2: torch.Tensor, features1: torch.Tensor) -> torch.Tensor:
|
||||
r""" Propagate features from xyz1 to xyz2.
|
||||
Parameters
|
||||
----------
|
||||
xyz2 : torch.Tensor
|
||||
(B, N2, 3) tensor of the xyz coordinates of the features
|
||||
xyz1 : torch.Tensor
|
||||
(B, N1, 3) tensor of the xyz coordinates of the features
|
||||
features2 : torch.Tensor
|
||||
(B, C2, N2) tensor of the descriptors of the the features
|
||||
features1 : torch.Tensor
|
||||
(B, C1, N1) tensor of the descriptors of the the features
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_features1 : torch.Tensor
|
||||
(B, \sum_k(mlps[k][-1]), N1) tensor of the new_features descriptors
|
||||
"""
|
||||
new_features_list = []
|
||||
|
||||
for i in range(len(self.groupers)):
|
||||
new_features = self.groupers[i](
|
||||
xyz1, xyz2, features1
|
||||
) # (B, C1, N2, nsample)
|
||||
new_features = self.mlps[i](
|
||||
new_features
|
||||
) # (B, mlp[-1], N2, nsample)
|
||||
new_features = F.max_pool2d(
|
||||
new_features, kernel_size=[1, new_features.size(3)]
|
||||
) # (B, mlp[-1], N2, 1)
|
||||
new_features = new_features.squeeze(-1) # (B, mlp[-1], N2)
|
||||
|
||||
if features2 is not None:
|
||||
new_features = torch.cat([new_features, features2],
|
||||
dim=1) #(B, mlp[-1] + C2, N2)
|
||||
|
||||
new_features = new_features.unsqueeze(-1)
|
||||
new_features = self.post_mlp(new_features)
|
||||
|
||||
new_features_list.append(new_features)
|
||||
|
||||
return torch.cat(new_features_list, dim=1).squeeze(-1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch.autograd import Variable
|
||||
torch.manual_seed(1)
|
||||
torch.cuda.manual_seed_all(1)
|
||||
xyz = Variable(torch.randn(2, 9, 3).cuda(), requires_grad=True)
|
||||
xyz_feats = Variable(torch.randn(2, 9, 6).cuda(), requires_grad=True)
|
||||
|
||||
test_module = PointnetSAModuleMSG(
|
||||
npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]]
|
||||
)
|
||||
test_module.cuda()
|
||||
print(test_module(xyz, xyz_feats))
|
||||
|
||||
for _ in range(1):
|
||||
_, new_features = test_module(xyz, xyz_feats)
|
||||
new_features.backward(
|
||||
torch.cuda.FloatTensor(*new_features.size()).fill_(1)
|
||||
)
|
||||
print(new_features)
|
||||
print(xyz.grad)
|
||||
@@ -0,0 +1,554 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
''' Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch '''
|
||||
from __future__ import (
|
||||
division,
|
||||
absolute_import,
|
||||
with_statement,
|
||||
print_function,
|
||||
unicode_literals,
|
||||
)
|
||||
import torch
|
||||
from torch.autograd import Function
|
||||
import torch.nn as nn
|
||||
# import pytorch_utils as pt_utils
|
||||
from . import pytorch_utils as pt_utils
|
||||
# import sys
|
||||
|
||||
try:
|
||||
import builtins
|
||||
except:
|
||||
import __builtin__ as builtins
|
||||
|
||||
try:
|
||||
from . import pointnet2
|
||||
except ImportError:
|
||||
if not getattr(builtins, "__POINTNET2_SETUP__", False):
|
||||
raise ImportError(
|
||||
"Could not import pointnet2 module.\n"
|
||||
"Please see the setup instructions in the README: "
|
||||
"https://github.com/erikwijmans/Pointnet2_PyTorch/blob/master/README.rst"
|
||||
)
|
||||
|
||||
if False:
|
||||
# Workaround for type hints without depending on the `typing` module
|
||||
from typing import *
|
||||
|
||||
|
||||
class RandomDropout(nn.Module):
|
||||
def __init__(self, p=0.5, inplace=False):
|
||||
super(RandomDropout, self).__init__()
|
||||
self.p = p
|
||||
self.inplace = inplace
|
||||
|
||||
def forward(self, X):
|
||||
theta = torch.Tensor(1).uniform_(0, self.p)[0]
|
||||
return pt_utils.feature_dropout_no_scaling(X, theta, self.train, self.inplace)
|
||||
|
||||
|
||||
class FurthestPointSampling(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, xyz, npoint):
|
||||
# type: (Any, torch.Tensor, int) -> torch.Tensor
|
||||
r"""
|
||||
Uses iterative furthest point sampling to select a set of npoint features that have the largest
|
||||
minimum distance
|
||||
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) tensor where N > npoint
|
||||
npoint : int32
|
||||
number of features in the sampled set
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, npoint) tensor containing the set
|
||||
"""
|
||||
return pointnet2.furthest_point_sampling(xyz, npoint)
|
||||
|
||||
@staticmethod
|
||||
def backward(xyz, a=None):
|
||||
return None, None
|
||||
|
||||
|
||||
furthest_point_sample = FurthestPointSampling.apply
|
||||
|
||||
|
||||
class GatherOperation(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, features, idx):
|
||||
# type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor
|
||||
r"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
features : torch.Tensor
|
||||
(B, C, N) tensor
|
||||
|
||||
idx : torch.Tensor
|
||||
(B, npoint) tensor of the features to gather
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, C, npoint) tensor
|
||||
"""
|
||||
|
||||
_, C, N = features.size()
|
||||
|
||||
ctx.for_backwards = (idx, C, N)
|
||||
|
||||
return pointnet2.gather_points(features, idx)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_out):
|
||||
idx, C, N = ctx.for_backwards
|
||||
|
||||
grad_features = pointnet2.gather_points_grad(grad_out.contiguous(), idx, N)
|
||||
return grad_features, None
|
||||
|
||||
|
||||
gather_operation = GatherOperation.apply
|
||||
|
||||
|
||||
class ThreeNN(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, unknown, known):
|
||||
# type: (Any, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]
|
||||
r"""
|
||||
Find the three nearest neighbors of unknown in known
|
||||
Parameters
|
||||
----------
|
||||
unknown : torch.Tensor
|
||||
(B, n, 3) tensor of known features
|
||||
known : torch.Tensor
|
||||
(B, m, 3) tensor of unknown features
|
||||
|
||||
Returns
|
||||
-------
|
||||
dist : torch.Tensor
|
||||
(B, n, 3) l2 distance to the three nearest neighbors
|
||||
idx : torch.Tensor
|
||||
(B, n, 3) index of 3 nearest neighbors
|
||||
"""
|
||||
dist2, idx = pointnet2.three_nn(unknown, known)
|
||||
|
||||
return torch.sqrt(dist2), idx
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, a=None, b=None):
|
||||
return None, None
|
||||
|
||||
|
||||
three_nn = ThreeNN.apply
|
||||
|
||||
|
||||
class ThreeInterpolate(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, features, idx, weight):
|
||||
# type(Any, torch.Tensor, torch.Tensor, torch.Tensor) -> Torch.Tensor
|
||||
r"""
|
||||
Performs weight linear interpolation on 3 features
|
||||
Parameters
|
||||
----------
|
||||
features : torch.Tensor
|
||||
(B, c, m) Features descriptors to be interpolated from
|
||||
idx : torch.Tensor
|
||||
(B, n, 3) three nearest neighbors of the target features in features
|
||||
weight : torch.Tensor
|
||||
(B, n, 3) weights
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, c, n) tensor of the interpolated features
|
||||
"""
|
||||
B, c, m = features.size()
|
||||
n = idx.size(1)
|
||||
|
||||
ctx.three_interpolate_for_backward = (idx, weight, m)
|
||||
|
||||
return pointnet2.three_interpolate(features, idx, weight)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_out):
|
||||
# type: (Any, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
grad_out : torch.Tensor
|
||||
(B, c, n) tensor with gradients of ouputs
|
||||
|
||||
Returns
|
||||
-------
|
||||
grad_features : torch.Tensor
|
||||
(B, c, m) tensor with gradients of features
|
||||
|
||||
None
|
||||
|
||||
None
|
||||
"""
|
||||
idx, weight, m = ctx.three_interpolate_for_backward
|
||||
|
||||
grad_features = pointnet2.three_interpolate_grad(
|
||||
grad_out.contiguous(), idx, weight, m
|
||||
)
|
||||
|
||||
return grad_features, None, None
|
||||
|
||||
|
||||
three_interpolate = ThreeInterpolate.apply
|
||||
|
||||
|
||||
class GroupingOperation(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, features, idx):
|
||||
# type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor
|
||||
r"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
features : torch.Tensor
|
||||
(B, C, N) tensor of features to group
|
||||
idx : torch.Tensor
|
||||
(B, npoint, nsample) tensor containing the indicies of features to group with
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, C, npoint, nsample) tensor
|
||||
"""
|
||||
B, nfeatures, nsample = idx.size()
|
||||
_, C, N = features.size()
|
||||
|
||||
ctx.for_backwards = (idx, N)
|
||||
|
||||
return pointnet2.group_points(features, idx)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_out):
|
||||
# type: (Any, torch.tensor) -> Tuple[torch.Tensor, torch.Tensor]
|
||||
r"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
grad_out : torch.Tensor
|
||||
(B, C, npoint, nsample) tensor of the gradients of the output from forward
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, C, N) gradient of the features
|
||||
None
|
||||
"""
|
||||
idx, N = ctx.for_backwards
|
||||
|
||||
grad_features = pointnet2.group_points_grad(grad_out.contiguous(), idx, N)
|
||||
|
||||
return grad_features, None
|
||||
|
||||
|
||||
grouping_operation = GroupingOperation.apply
|
||||
|
||||
|
||||
class BallQuery(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, radius, nsample, xyz, new_xyz):
|
||||
# type: (Any, float, int, torch.Tensor, torch.Tensor) -> torch.Tensor
|
||||
r"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
radius : float
|
||||
radius of the balls
|
||||
nsample : int
|
||||
maximum number of features in the balls
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) xyz coordinates of the features
|
||||
new_xyz : torch.Tensor
|
||||
(B, npoint, 3) centers of the ball query
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, npoint, nsample) tensor with the indicies of the features that form the query balls
|
||||
"""
|
||||
return pointnet2.ball_query(new_xyz, xyz, radius, nsample)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, a=None):
|
||||
return None, None, None, None
|
||||
|
||||
|
||||
ball_query = BallQuery.apply
|
||||
|
||||
|
||||
class QueryAndGroup(nn.Module):
|
||||
r"""
|
||||
Groups with a ball query of radius
|
||||
|
||||
Parameters
|
||||
---------
|
||||
radius : float32
|
||||
Radius of ball
|
||||
nsample : int32
|
||||
Maximum number of features to gather in the ball
|
||||
"""
|
||||
|
||||
def __init__(self, radius, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, sample_uniformly=False, ret_unique_cnt=False):
|
||||
# type: (QueryAndGroup, float, int, bool) -> None
|
||||
super(QueryAndGroup, self).__init__()
|
||||
self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
|
||||
self.ret_grouped_xyz = ret_grouped_xyz
|
||||
self.normalize_xyz = normalize_xyz
|
||||
self.sample_uniformly = sample_uniformly
|
||||
self.ret_unique_cnt = ret_unique_cnt
|
||||
if self.ret_unique_cnt:
|
||||
assert(self.sample_uniformly)
|
||||
|
||||
def forward(self, xyz, new_xyz, features=None):
|
||||
# type: (QueryAndGroup, torch.Tensor. torch.Tensor, torch.Tensor) -> Tuple[Torch.Tensor]
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
xyz coordinates of the features (B, N, 3)
|
||||
new_xyz : torch.Tensor
|
||||
centriods (B, npoint, 3)
|
||||
features : torch.Tensor
|
||||
Descriptors of the features (B, C, N)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_features : torch.Tensor
|
||||
(B, 3 + C, npoint, nsample) tensor
|
||||
"""
|
||||
idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
|
||||
|
||||
if self.sample_uniformly:
|
||||
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
|
||||
for i_batch in range(idx.shape[0]):
|
||||
for i_region in range(idx.shape[1]):
|
||||
unique_ind = torch.unique(idx[i_batch, i_region, :])
|
||||
num_unique = unique_ind.shape[0]
|
||||
unique_cnt[i_batch, i_region] = num_unique
|
||||
sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)
|
||||
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
|
||||
idx[i_batch, i_region, :] = all_ind
|
||||
|
||||
|
||||
xyz_trans = xyz.transpose(1, 2).contiguous()
|
||||
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
|
||||
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
|
||||
if self.normalize_xyz:
|
||||
grouped_xyz /= self.radius
|
||||
|
||||
if features is not None:
|
||||
grouped_features = grouping_operation(features, idx)
|
||||
if self.use_xyz:
|
||||
new_features = torch.cat(
|
||||
[grouped_xyz, grouped_features], dim=1
|
||||
) # (B, C + 3, npoint, nsample)
|
||||
else:
|
||||
new_features = grouped_features
|
||||
else:
|
||||
assert (
|
||||
self.use_xyz
|
||||
), "Cannot have not features and not use xyz as a feature!"
|
||||
new_features = grouped_xyz
|
||||
|
||||
ret = [new_features]
|
||||
if self.ret_grouped_xyz:
|
||||
ret.append(grouped_xyz)
|
||||
if self.ret_unique_cnt:
|
||||
ret.append(unique_cnt)
|
||||
if len(ret) == 1:
|
||||
return ret[0]
|
||||
else:
|
||||
return tuple(ret)
|
||||
|
||||
|
||||
class GroupAll(nn.Module):
|
||||
r"""
|
||||
Groups all features
|
||||
|
||||
Parameters
|
||||
---------
|
||||
"""
|
||||
|
||||
def __init__(self, use_xyz=True, ret_grouped_xyz=False):
|
||||
# type: (GroupAll, bool) -> None
|
||||
super(GroupAll, self).__init__()
|
||||
self.use_xyz = use_xyz
|
||||
|
||||
def forward(self, xyz, new_xyz, features=None):
|
||||
# type: (GroupAll, torch.Tensor, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor]
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
xyz coordinates of the features (B, N, 3)
|
||||
new_xyz : torch.Tensor
|
||||
Ignored
|
||||
features : torch.Tensor
|
||||
Descriptors of the features (B, C, N)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_features : torch.Tensor
|
||||
(B, C + 3, 1, N) tensor
|
||||
"""
|
||||
|
||||
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
|
||||
if features is not None:
|
||||
grouped_features = features.unsqueeze(2)
|
||||
if self.use_xyz:
|
||||
new_features = torch.cat(
|
||||
[grouped_xyz, grouped_features], dim=1
|
||||
) # (B, 3 + C, 1, N)
|
||||
else:
|
||||
new_features = grouped_features
|
||||
else:
|
||||
new_features = grouped_xyz
|
||||
|
||||
if self.ret_grouped_xyz:
|
||||
return new_features, grouped_xyz
|
||||
else:
|
||||
return new_features
|
||||
|
||||
|
||||
class CylinderQuery(Function):
|
||||
@staticmethod
|
||||
def forward(ctx, radius, hmin, hmax, nsample, xyz, new_xyz, rot):
|
||||
# type: (Any, float, float, float, int, torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
|
||||
r"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
radius : float
|
||||
radius of the cylinders
|
||||
hmin, hmax : float
|
||||
endpoints of cylinder height in x-rotation axis
|
||||
nsample : int
|
||||
maximum number of features in the cylinders
|
||||
xyz : torch.Tensor
|
||||
(B, N, 3) xyz coordinates of the features
|
||||
new_xyz : torch.Tensor
|
||||
(B, npoint, 3) centers of the cylinder query
|
||||
rot: torch.Tensor
|
||||
(B, npoint, 9) flatten rotation matrices from
|
||||
cylinder frame to world frame
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
(B, npoint, nsample) tensor with the indicies of the features that form the query balls
|
||||
"""
|
||||
return pointnet2.cylinder_query(new_xyz, xyz, rot, radius, hmin, hmax, nsample)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, a=None):
|
||||
return None, None, None, None, None, None, None
|
||||
|
||||
|
||||
cylinder_query = CylinderQuery.apply
|
||||
|
||||
|
||||
class CylinderQueryAndGroup(nn.Module):
|
||||
r"""
|
||||
Groups with a cylinder query of radius and height
|
||||
|
||||
Parameters
|
||||
---------
|
||||
radius : float32
|
||||
Radius of cylinder
|
||||
hmin, hmax: float32
|
||||
endpoints of cylinder height in x-rotation axis
|
||||
nsample : int32
|
||||
Maximum number of features to gather in the ball
|
||||
"""
|
||||
|
||||
def __init__(self, radius, hmin, hmax, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, rotate_xyz=True, sample_uniformly=False, ret_unique_cnt=False):
|
||||
super(CylinderQueryAndGroup, self).__init__()
|
||||
self.radius, self.nsample, self.hmin, self.hmax, = radius, nsample, hmin, hmax
|
||||
self.use_xyz = use_xyz
|
||||
self.ret_grouped_xyz = ret_grouped_xyz
|
||||
self.normalize_xyz = normalize_xyz
|
||||
self.rotate_xyz = rotate_xyz
|
||||
self.sample_uniformly = sample_uniformly
|
||||
self.ret_unique_cnt = ret_unique_cnt
|
||||
if self.ret_unique_cnt:
|
||||
assert(self.sample_uniformly)
|
||||
|
||||
def forward(self, xyz, new_xyz, rot, features=None):
|
||||
r"""
|
||||
Parameters
|
||||
----------
|
||||
xyz : torch.Tensor
|
||||
xyz coordinates of the features (B, N, 3)
|
||||
new_xyz : torch.Tensor
|
||||
centriods (B, npoint, 3)
|
||||
rot : torch.Tensor
|
||||
rotation matrices (B, npoint, 3, 3)
|
||||
features : torch.Tensor
|
||||
Descriptors of the features (B, C, N)
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_features : torch.Tensor
|
||||
(B, 3 + C, npoint, nsample) tensor
|
||||
"""
|
||||
B, npoint, _ = new_xyz.size()
|
||||
idx = cylinder_query(self.radius, self.hmin, self.hmax, self.nsample, xyz, new_xyz, rot.view(B, npoint, 9))
|
||||
|
||||
if self.sample_uniformly:
|
||||
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
|
||||
for i_batch in range(idx.shape[0]):
|
||||
for i_region in range(idx.shape[1]):
|
||||
unique_ind = torch.unique(idx[i_batch, i_region, :])
|
||||
num_unique = unique_ind.shape[0]
|
||||
unique_cnt[i_batch, i_region] = num_unique
|
||||
sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long)
|
||||
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
|
||||
idx[i_batch, i_region, :] = all_ind
|
||||
|
||||
|
||||
xyz_trans = xyz.transpose(1, 2).contiguous()
|
||||
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
|
||||
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
|
||||
if self.normalize_xyz:
|
||||
grouped_xyz /= self.radius
|
||||
if self.rotate_xyz:
|
||||
grouped_xyz_ = grouped_xyz.permute(0, 2, 3, 1).contiguous() # (B, npoint, nsample, 3)
|
||||
grouped_xyz_ = torch.matmul(grouped_xyz_, rot)
|
||||
grouped_xyz = grouped_xyz_.permute(0, 3, 1, 2).contiguous()
|
||||
|
||||
|
||||
if features is not None:
|
||||
grouped_features = grouping_operation(features, idx)
|
||||
if self.use_xyz:
|
||||
new_features = torch.cat(
|
||||
[grouped_xyz, grouped_features], dim=1
|
||||
) # (B, C + 3, npoint, nsample)
|
||||
else:
|
||||
new_features = grouped_features
|
||||
else:
|
||||
assert (
|
||||
self.use_xyz
|
||||
), "Cannot have not features and not use xyz as a feature!"
|
||||
new_features = grouped_xyz
|
||||
|
||||
ret = [new_features]
|
||||
if self.ret_grouped_xyz:
|
||||
ret.append(grouped_xyz)
|
||||
if self.ret_unique_cnt:
|
||||
ret.append(unique_cnt)
|
||||
if len(ret) == 1:
|
||||
return ret[0]
|
||||
else:
|
||||
return tuple(ret)
|
||||
@@ -0,0 +1,298 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''
|
||||
# import torch
|
||||
import torch.nn as nn
|
||||
from typing import List, Tuple
|
||||
|
||||
class SharedMLP(nn.Sequential):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
args: List[int],
|
||||
*,
|
||||
bn: bool = False,
|
||||
activation=nn.ReLU(inplace=True),
|
||||
preact: bool = False,
|
||||
first: bool = False,
|
||||
name: str = ""
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
for i in range(len(args) - 1):
|
||||
self.add_module(
|
||||
name + 'layer{}'.format(i),
|
||||
Conv2d(
|
||||
args[i],
|
||||
args[i + 1],
|
||||
bn=(not first or not preact or (i != 0)) and bn,
|
||||
activation=activation
|
||||
if (not first or not preact or (i != 0)) else None,
|
||||
preact=preact
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class _BNBase(nn.Sequential):
|
||||
|
||||
def __init__(self, in_size, batch_norm=None, name=""):
|
||||
super().__init__()
|
||||
self.add_module(name + "bn", batch_norm(in_size))
|
||||
|
||||
nn.init.constant_(self[0].weight, 1.0)
|
||||
nn.init.constant_(self[0].bias, 0)
|
||||
|
||||
|
||||
class BatchNorm1d(_BNBase):
|
||||
|
||||
def __init__(self, in_size: int, *, name: str = ""):
|
||||
super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
|
||||
|
||||
|
||||
class BatchNorm2d(_BNBase):
|
||||
|
||||
def __init__(self, in_size: int, name: str = ""):
|
||||
super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
|
||||
|
||||
|
||||
class BatchNorm3d(_BNBase):
|
||||
|
||||
def __init__(self, in_size: int, name: str = ""):
|
||||
super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
|
||||
|
||||
|
||||
class _ConvBase(nn.Sequential):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_size,
|
||||
out_size,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
activation,
|
||||
bn,
|
||||
init,
|
||||
conv=None,
|
||||
batch_norm=None,
|
||||
bias=True,
|
||||
preact=False,
|
||||
name=""
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
bias = bias and (not bn)
|
||||
conv_unit = conv(
|
||||
in_size,
|
||||
out_size,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=bias
|
||||
)
|
||||
init(conv_unit.weight)
|
||||
if bias:
|
||||
nn.init.constant_(conv_unit.bias, 0)
|
||||
|
||||
if bn:
|
||||
if not preact:
|
||||
bn_unit = batch_norm(out_size)
|
||||
else:
|
||||
bn_unit = batch_norm(in_size)
|
||||
|
||||
if preact:
|
||||
if bn:
|
||||
self.add_module(name + 'bn', bn_unit)
|
||||
|
||||
if activation is not None:
|
||||
self.add_module(name + 'activation', activation)
|
||||
|
||||
self.add_module(name + 'conv', conv_unit)
|
||||
|
||||
if not preact:
|
||||
if bn:
|
||||
self.add_module(name + 'bn', bn_unit)
|
||||
|
||||
if activation is not None:
|
||||
self.add_module(name + 'activation', activation)
|
||||
|
||||
|
||||
class Conv1d(_ConvBase):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_size: int,
|
||||
out_size: int,
|
||||
*,
|
||||
kernel_size: int = 1,
|
||||
stride: int = 1,
|
||||
padding: int = 0,
|
||||
activation=nn.ReLU(inplace=True),
|
||||
bn: bool = False,
|
||||
init=nn.init.kaiming_normal_,
|
||||
bias: bool = True,
|
||||
preact: bool = False,
|
||||
name: str = ""
|
||||
):
|
||||
super().__init__(
|
||||
in_size,
|
||||
out_size,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
activation,
|
||||
bn,
|
||||
init,
|
||||
conv=nn.Conv1d,
|
||||
batch_norm=BatchNorm1d,
|
||||
bias=bias,
|
||||
preact=preact,
|
||||
name=name
|
||||
)
|
||||
|
||||
|
||||
class Conv2d(_ConvBase):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_size: int,
|
||||
out_size: int,
|
||||
*,
|
||||
kernel_size: Tuple[int, int] = (1, 1),
|
||||
stride: Tuple[int, int] = (1, 1),
|
||||
padding: Tuple[int, int] = (0, 0),
|
||||
activation=nn.ReLU(inplace=True),
|
||||
bn: bool = False,
|
||||
init=nn.init.kaiming_normal_,
|
||||
bias: bool = True,
|
||||
preact: bool = False,
|
||||
name: str = ""
|
||||
):
|
||||
super().__init__(
|
||||
in_size,
|
||||
out_size,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
activation,
|
||||
bn,
|
||||
init,
|
||||
conv=nn.Conv2d,
|
||||
batch_norm=BatchNorm2d,
|
||||
bias=bias,
|
||||
preact=preact,
|
||||
name=name
|
||||
)
|
||||
|
||||
|
||||
class Conv3d(_ConvBase):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_size: int,
|
||||
out_size: int,
|
||||
*,
|
||||
kernel_size: Tuple[int, int, int] = (1, 1, 1),
|
||||
stride: Tuple[int, int, int] = (1, 1, 1),
|
||||
padding: Tuple[int, int, int] = (0, 0, 0),
|
||||
activation=nn.ReLU(inplace=True),
|
||||
bn: bool = False,
|
||||
init=nn.init.kaiming_normal_,
|
||||
bias: bool = True,
|
||||
preact: bool = False,
|
||||
name: str = ""
|
||||
):
|
||||
super().__init__(
|
||||
in_size,
|
||||
out_size,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
activation,
|
||||
bn,
|
||||
init,
|
||||
conv=nn.Conv3d,
|
||||
batch_norm=BatchNorm3d,
|
||||
bias=bias,
|
||||
preact=preact,
|
||||
name=name
|
||||
)
|
||||
|
||||
|
||||
class FC(nn.Sequential):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_size: int,
|
||||
out_size: int,
|
||||
*,
|
||||
activation=nn.ReLU(inplace=True),
|
||||
bn: bool = False,
|
||||
init=None,
|
||||
preact: bool = False,
|
||||
name: str = ""
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
fc = nn.Linear(in_size, out_size, bias=not bn)
|
||||
if init is not None:
|
||||
init(fc.weight)
|
||||
if not bn:
|
||||
nn.init.constant_(fc.bias, 0)
|
||||
|
||||
if preact:
|
||||
if bn:
|
||||
self.add_module(name + 'bn', BatchNorm1d(in_size))
|
||||
|
||||
if activation is not None:
|
||||
self.add_module(name + 'activation', activation)
|
||||
|
||||
self.add_module(name + 'fc', fc)
|
||||
|
||||
if not preact:
|
||||
if bn:
|
||||
self.add_module(name + 'bn', BatchNorm1d(out_size))
|
||||
|
||||
if activation is not None:
|
||||
self.add_module(name + 'activation', activation)
|
||||
|
||||
def set_bn_momentum_default(bn_momentum):
|
||||
|
||||
def fn(m):
|
||||
if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
|
||||
m.momentum = bn_momentum
|
||||
|
||||
return fn
|
||||
|
||||
|
||||
class BNMomentumScheduler(object):
|
||||
|
||||
def __init__(
|
||||
self, model, bn_lambda, last_epoch=-1,
|
||||
setter=set_bn_momentum_default
|
||||
):
|
||||
if not isinstance(model, nn.Module):
|
||||
raise RuntimeError(
|
||||
"Class '{}' is not a PyTorch nn Module".format(
|
||||
type(model).__name__
|
||||
)
|
||||
)
|
||||
|
||||
self.model = model
|
||||
self.setter = setter
|
||||
self.lmbd = bn_lambda
|
||||
|
||||
self.step(last_epoch + 1)
|
||||
self.last_epoch = last_epoch
|
||||
|
||||
def step(self, epoch=None):
|
||||
if epoch is None:
|
||||
epoch = self.last_epoch + 1
|
||||
|
||||
self.last_epoch = epoch
|
||||
self.model.apply(self.setter(self.lmbd(epoch)))
|
||||
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
from setuptools import setup
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
|
||||
import glob
|
||||
import os
|
||||
|
||||
|
||||
ROOT = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
_ext_src_root = "_ext_src"
|
||||
|
||||
_ext_cpp_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root))
|
||||
_ext_cu_sources = glob.glob("{}/src/*.cu".format(_ext_src_root))
|
||||
|
||||
_ext_cuda_sources = _ext_cpp_sources + _ext_cu_sources
|
||||
|
||||
|
||||
def is_cuda_available():
|
||||
# return False
|
||||
try:
|
||||
import torch
|
||||
return torch.cuda.is_available() and hasattr(torch.utils.cpp_extension, "CUDAExtension")
|
||||
except (ImportError, AttributeError, RuntimeError):
|
||||
return False
|
||||
|
||||
_ext_modules = []
|
||||
if is_cuda_available():
|
||||
_ext_modules.append(
|
||||
CUDAExtension(
|
||||
name='pointnet2.pointnet2',
|
||||
sources=_ext_cuda_sources,
|
||||
extra_compile_args={
|
||||
"cxx": ["-O2", "-DWITH_CUDA", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
|
||||
"nvcc": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
|
||||
},
|
||||
)
|
||||
)
|
||||
else:
|
||||
_ext_modules.append(
|
||||
CppExtension(
|
||||
name='pointnet2.pointnet2',
|
||||
sources=_ext_cpp_sources,
|
||||
extra_compile_args={
|
||||
"cxx": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
setup(
|
||||
name='pointnet2',
|
||||
ext_modules=_ext_modules,
|
||||
cmdclass={
|
||||
'build_ext': BuildExtension
|
||||
}
|
||||
)
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
# 启用2个关键CUDA调试环境变量
|
||||
os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # 强制CUDA核函数同步执行,错误信息实时返回
|
||||
os.environ["TORCH_USE_CUDA_DSA"] = "1"
|
||||
|
||||
import torch
|
||||
import pointnet2
|
||||
|
||||
# 1. 构造简单的点云张量(避免空张量、形状异常)
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
# device = torch.device("cpu")
|
||||
print("使用设备:", device)
|
||||
|
||||
# 点云数据:(batch_size, 3, num_points)
|
||||
points = torch.randn(1, 3, 1024, device=device, dtype=torch.float32)
|
||||
# 中心点数据:(batch_size, 3, num_centers)
|
||||
centers = torch.randn(1, 3, 128, device=device, dtype=torch.float32)
|
||||
# 半径和近邻数(符合扩展函数预期)
|
||||
radius = 0.1
|
||||
k = 32
|
||||
|
||||
try:
|
||||
# 调用扩展函数
|
||||
idx = pointnet2.ball_query(centers, points, radius, k)
|
||||
print("扩展函数执行成功,输出形状:", idx.shape)
|
||||
print("扩展函数执行正常,无内存错误!")
|
||||
except Exception as e:
|
||||
print(f"扩展函数执行失败:{e}")
|
||||
@@ -6,9 +6,7 @@ from rclpy.node import Node
|
||||
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
|
||||
from ..utils import read_calibration_mat
|
||||
|
||||
import yaml
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
@@ -16,7 +14,14 @@ __all__ = [
|
||||
"ConfigBase"
|
||||
]
|
||||
|
||||
|
||||
class ConfigBase(Node):
|
||||
SHARE_DIR = get_package_share_directory('vision_detect')
|
||||
with open(os.path.join(
|
||||
SHARE_DIR, "configs/error_configs/report_logging_define.json"), "r"
|
||||
) as f:
|
||||
WARNING_LOG_MAP = json.load(f)["warring"]
|
||||
|
||||
def __init__(self, name):
|
||||
super().__init__(name)
|
||||
"""init parameter"""
|
||||
@@ -53,9 +58,13 @@ class ConfigBase(Node):
|
||||
self.camera_size = None
|
||||
self.position = None
|
||||
|
||||
self.e2e_model = None
|
||||
|
||||
self.map1 = self.map2 = None
|
||||
|
||||
self.calculate_function = None
|
||||
|
||||
|
||||
self.fx = self.fy = 0.5
|
||||
self.camera_data = {}
|
||||
self.distance = 1500
|
||||
@@ -82,7 +91,7 @@ class ConfigBase(Node):
|
||||
self.detect_mode = self.configs['detect_mode']
|
||||
self.calculate_mode = self.configs['calculate_mode']
|
||||
|
||||
self._read_calibration_mat()
|
||||
# self._read_calibration_mat()
|
||||
|
||||
if self.get_camera_mode == "Service":
|
||||
self.service_name = self.configs["Service_configs"]["service_name"]
|
||||
@@ -126,29 +135,10 @@ class ConfigBase(Node):
|
||||
os.path.join(share_dir, self.calculate_configs['complete_model_path'])
|
||||
)
|
||||
self.calculate_configs["source"] = source
|
||||
|
||||
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
|
||||
self.calculate_configs = self.configs['E2E_configs']
|
||||
else:
|
||||
self.get_logger().warning("Unknown calculate_mode, use PCA")
|
||||
self.calculate_configs = self.configs['PCA_configs']
|
||||
|
||||
self.get_logger().info("Get parameters done")
|
||||
|
||||
def _read_calibration_mat(self):
|
||||
eye_in_left_hand_path = os.path.join(share_dir, self.configs["calibration"]["left_hand"])
|
||||
eye_in_right_hand_path = os.path.join(share_dir, self.configs["calibration"]["right_hand"])
|
||||
eye_to_hand_path = os.path.join(share_dir, self.configs["calibration"]["head"])
|
||||
|
||||
self.eye_in_left_hand_mat, info, sign = read_calibration_mat(eye_in_left_hand_path)
|
||||
if not sign:
|
||||
self.get_logger().warning(f"left_mat: {info}")
|
||||
|
||||
self.eye_in_right_hand_mat, info, sign = read_calibration_mat(eye_in_right_hand_path)
|
||||
if not sign:
|
||||
self.get_logger().warning(f"right_mat: {info}")
|
||||
|
||||
self.eye_to_hand_mat, info, sign = read_calibration_mat(eye_to_hand_path)
|
||||
if not sign:
|
||||
self.get_logger().warning(f"head_mat: {info}")
|
||||
|
||||
self.get_logger().info("Read calibration mat file done")
|
||||
|
||||
|
||||
@@ -14,10 +14,18 @@ from geometry_msgs.msg import Pose, Point, Quaternion
|
||||
|
||||
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
|
||||
|
||||
from ..utils import *
|
||||
from ..utils import distortion_correction, crop_imgs_box_xywh, draw_box, draw_mask, rmat2quat, \
|
||||
crop_imgs_mask, get_map, create_o3d_pcd, save_img, refine_grasp_pose
|
||||
from .init_node import InitBase
|
||||
|
||||
|
||||
E2E_SIGN = True
|
||||
try:
|
||||
from ..utils import calculate_pose_e2e
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
E2E_SIGN = False
|
||||
|
||||
|
||||
class DetectNode(InitBase):
|
||||
"""Detect Node"""
|
||||
def __init__(self, name):
|
||||
@@ -55,10 +63,11 @@ class DetectNode(InitBase):
|
||||
"""同步回调函数"""
|
||||
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
|
||||
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
|
||||
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
|
||||
|
||||
img, pose_list = self.function(color_img_cv, depth_img_cv)
|
||||
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
|
||||
if not sign:
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
|
||||
|
||||
# masks为空,结束这一帧
|
||||
if img is None:
|
||||
@@ -83,7 +92,8 @@ class DetectNode(InitBase):
|
||||
self.pub_pose_list.publish(pose_list_all)
|
||||
|
||||
def _service_callback(self, request, response):
|
||||
# self.get_logger().info("service callback start")
|
||||
print(" \n ")
|
||||
print("========================== < start > ==========================")
|
||||
response.header.stamp = self.get_clock().now().to_msg()
|
||||
response.header.frame_id = "camera_detect"
|
||||
|
||||
@@ -95,23 +105,24 @@ class DetectNode(InitBase):
|
||||
response.success = False
|
||||
response.info = "Camera data have not objects"
|
||||
response.objects = []
|
||||
|
||||
# self.get_logger().info("service callback done")
|
||||
print("=========================== < end > ===========================")
|
||||
return response
|
||||
|
||||
response.success = False
|
||||
response.info = f"Name is wrong: {request.camera_position}"
|
||||
response.objects = []
|
||||
|
||||
# self.get_logger().info("service callback done")
|
||||
print("=========================== < end > ===========================")
|
||||
return response
|
||||
|
||||
if request.camera_position == 'left':
|
||||
self.hand_eye_mat = self.eye_in_left_hand_mat
|
||||
self.p = "left"
|
||||
elif request.camera_position == 'right':
|
||||
self.hand_eye_mat = self.eye_in_right_hand_mat
|
||||
self.p = "right"
|
||||
else:
|
||||
self.hand_eye_mat = self.eye_to_hand_mat
|
||||
self.p = "head"
|
||||
|
||||
self.camera_size = [color_img_ros.width, color_img_ros.height]
|
||||
|
||||
@@ -121,15 +132,15 @@ class DetectNode(InitBase):
|
||||
map1, map2, self.k = get_map(self.k, d, self.camera_size)
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
|
||||
|
||||
img, pose_list = self.function(color_img_cv, depth_img_cv)
|
||||
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
|
||||
if not sign:
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
|
||||
|
||||
# masks为空,结束这一帧
|
||||
if self.output_boxes or self.output_masks:
|
||||
if img is None:
|
||||
img = color_img_ros
|
||||
self.pub_detect_image.publish(img)
|
||||
response.info = "pose dict is empty"
|
||||
response.success = False
|
||||
response.objects = []
|
||||
|
||||
if pose_list:
|
||||
else:
|
||||
response.info = "Success get pose"
|
||||
response.success = True
|
||||
for item in pose_list:
|
||||
@@ -141,19 +152,23 @@ class DetectNode(InitBase):
|
||||
grab_width = item["grab_width"]
|
||||
)
|
||||
)
|
||||
else:
|
||||
response.info = "pose dict is empty"
|
||||
response.success = False
|
||||
response.objects = []
|
||||
|
||||
# self.get_logger().info("service callback done")
|
||||
# publish detect image
|
||||
if self.output_boxes or self.output_masks:
|
||||
if img is None:
|
||||
img = color_img_ros
|
||||
self.pub_detect_image.publish(img)
|
||||
print("=========================== < end > ===========================")
|
||||
return response
|
||||
|
||||
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
|
||||
"""Use segmentation model"""
|
||||
self.get_logger().info('start')
|
||||
pose_list = []
|
||||
|
||||
home = os.path.expanduser("~")
|
||||
save_dir = os.path.join(home, "images")
|
||||
save_img(rgb_img.copy(), "orign_image.png", save_dir=save_dir, mark_cur_time=True)
|
||||
|
||||
# Get Predict Results
|
||||
time1 = time.time()
|
||||
results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
|
||||
@@ -162,8 +177,7 @@ class DetectNode(InitBase):
|
||||
|
||||
# Get masks
|
||||
if result.masks is None or len(result.masks) == 0:
|
||||
self.get_logger().info(f"Detect object num: 0")
|
||||
return None, None
|
||||
return None, 1000, False
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
|
||||
# Get boxes
|
||||
@@ -171,81 +185,96 @@ class DetectNode(InitBase):
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
|
||||
sorted_index = np.lexsort((-y_centers, x_centers))
|
||||
masks = masks[sorted_index]
|
||||
boxes = boxes[sorted_index]
|
||||
class_ids = class_ids[sorted_index]
|
||||
|
||||
time3 = time.time()
|
||||
|
||||
self.get_logger().info(f"Detect object num: {len(masks)}")
|
||||
|
||||
full_points = create_o3d_pcd(
|
||||
depth_img, self.camera_size, self.k, **self.calculate_configs
|
||||
)
|
||||
time_full_points = time.time()
|
||||
print(f"create full points: {(time_full_points - time3) * 1000}")
|
||||
|
||||
if self.calculate_mode == "E2E" and self.detect_mode == 'Detect' and E2E_SIGN:
|
||||
self.calculate_configs["orign_point_clouds"] = create_o3d_pcd(
|
||||
depth_img, self.camera_size, self.k, **self.calculate_configs
|
||||
)
|
||||
|
||||
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
||||
imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_img, mask], box, True)
|
||||
depth_crop, mask_crop = imgs_crop
|
||||
|
||||
if depth_crop is None:
|
||||
self.get_logger().warning("Depth crop is None")
|
||||
continue
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.k[0],
|
||||
self.k[4],
|
||||
self.k[2] - x_min,
|
||||
self.k[5] - y_min
|
||||
int(self.camera_size[0]), int(self.camera_size[1]),
|
||||
self.k[0], self.k[4],
|
||||
self.k[2] - x_min, self.k[5] - y_min
|
||||
)
|
||||
|
||||
rmat, grab_width = self.calculate_function(
|
||||
mask_crop,
|
||||
depth_crop,
|
||||
intrinsics,
|
||||
calculate_grab_width=True,
|
||||
**self.calculate_configs
|
||||
rmat, grab_width, sign = self.calculate_function(
|
||||
mask_crop, depth_crop, intrinsics,
|
||||
calculate_grab_width=True, **self.calculate_configs
|
||||
)
|
||||
if rmat is None:
|
||||
self.get_logger().warning("Object point cloud have too many noise")
|
||||
if not sign:
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
|
||||
continue
|
||||
|
||||
self.get_logger().info(f"grab_width: {grab_width}")
|
||||
|
||||
rmat = self.hand_eye_mat @ rmat
|
||||
|
||||
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_list.append(
|
||||
{
|
||||
"class_id": int(class_ids[i]),
|
||||
"class_name": labels[class_ids[i]],
|
||||
"pose": pose,
|
||||
"grab_width": grab_width
|
||||
}
|
||||
time_c = time.time()
|
||||
if self.p == "left" or self.p == "right":
|
||||
position = rmat[0:3, 3]
|
||||
rmat, sign = refine_grasp_pose(
|
||||
full_points, self.calculate_configs.get("voxel_size"), position,
|
||||
search_mode=True
|
||||
)
|
||||
if not sign:
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
|
||||
continue
|
||||
|
||||
time_e = time.time()
|
||||
print(f"Refine: {(time_e - time_c) * 1000} ms")
|
||||
|
||||
self.get_logger().info(f"grab_width: {grab_width}")
|
||||
x, y, z, rw, rx, ry, rz = rmat2quat(self.hand_eye_mat @ rmat)
|
||||
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
self.get_logger().info(f"xyz, wxyz: {x, y, z, rw, rx, ry, rz}")
|
||||
pose_list.append(
|
||||
{
|
||||
"class_id": int(class_ids[i]),
|
||||
"class_name": labels[class_ids[i]],
|
||||
"pose": pose,
|
||||
"grab_width": grab_width
|
||||
}
|
||||
)
|
||||
|
||||
time4 = time.time()
|
||||
|
||||
if not pose_list:
|
||||
return None, 1003, False
|
||||
|
||||
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
|
||||
# self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
|
||||
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
|
||||
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
|
||||
self.get_logger().info('end')
|
||||
|
||||
# mask_img and box_img is or not output
|
||||
if self.output_boxes and not self.output_masks:
|
||||
home = os.path.expanduser("~")
|
||||
save_path = os.path.join(home, "detect_image.png")
|
||||
draw_box(rgb_img, result, save_path=save_path)
|
||||
# draw_box(rgb_img, result, save_path=False)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
|
||||
elif self.output_boxes and self.output_masks:
|
||||
draw_box(rgb_img, result)
|
||||
if not self.output_boxes and not self.output_masks:
|
||||
return None, pose_list, True
|
||||
if self.output_boxes:
|
||||
draw_box(rgb_img, result, save_dir=save_dir, mark_time = True)
|
||||
if self.output_masks:
|
||||
draw_mask(rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
|
||||
elif not self.output_boxes and self.output_masks:
|
||||
draw_mask(rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
|
||||
else:
|
||||
return None, pose_list
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
|
||||
|
||||
def _seg_color(self, rgb_img: np.ndarray, depth_img: np.ndarray):
|
||||
"""Use segmentation model"""
|
||||
@@ -268,31 +297,26 @@ class DetectNode(InitBase):
|
||||
depth_crop = imgs_crop[0]
|
||||
|
||||
if depth_crop is None:
|
||||
self.get_logger().warning("depth_crop is None")
|
||||
return None, None
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
|
||||
return None, 1001, False
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.k[0],
|
||||
self.k[4],
|
||||
self.k[2] - x_min,
|
||||
self.k[5] - y_min
|
||||
int(self.camera_size[0]), int(self.camera_size[1]),
|
||||
self.k[0], self.k[4],
|
||||
self.k[2] - x_min, self.k[5] - y_min
|
||||
)
|
||||
|
||||
rmat, _ = self.calculate_function(
|
||||
mask_crop,
|
||||
depth_crop,
|
||||
intrinsics,
|
||||
**self.calculate_configs
|
||||
rmat, _, sign = self.calculate_function(
|
||||
mask_crop, depth_crop, intrinsics, **self.calculate_configs
|
||||
)
|
||||
if rmat is None:
|
||||
self.get_logger().warning("Color Area point cloud have too many noise")
|
||||
return None, None
|
||||
if not sign:
|
||||
# self.get_logger().warning("Color Area point cloud have too many noise")
|
||||
return None, 1100, False
|
||||
|
||||
rmat = self.hand_eye_mat @ rmat
|
||||
|
||||
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
|
||||
T = self.hand_eye_mat @ rmat
|
||||
|
||||
x, y, z, rw, rx, ry, rz = rmat2quat(T)
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
@@ -307,12 +331,13 @@ class DetectNode(InitBase):
|
||||
}
|
||||
)
|
||||
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
|
||||
|
||||
def _seg_crossboard(self, rgb_img, depth_img):
|
||||
pose_list = []
|
||||
rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
|
||||
ret, corners = cv2.findChessboardCorners(rgb_img_gray, self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
|
||||
ret, corners = cv2.findChessboardCorners(rgb_img_gray,
|
||||
self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
|
||||
if ret:
|
||||
# 角点亚像素精确化(提高标定精度)
|
||||
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
|
||||
@@ -337,64 +362,50 @@ class DetectNode(InitBase):
|
||||
depth_crop = img_crop[0]
|
||||
|
||||
if depth_crop is None:
|
||||
return None, None
|
||||
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
|
||||
return None, 1001, False
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.k[0],
|
||||
self.k[4],
|
||||
self.k[2] - x_min,
|
||||
self.k[5] - y_min
|
||||
int(self.camera_size[0]), int(self.camera_size[1]),
|
||||
self.k[0], self.k[4],
|
||||
self.k[2] - x_min, self.k[5] - y_min
|
||||
)
|
||||
|
||||
rmat, _ = self.calculate_function(
|
||||
mask_crop,
|
||||
depth_crop,
|
||||
intrinsics,
|
||||
**self.calculate_configs
|
||||
rmat, _, sign = self.calculate_function(
|
||||
mask_crop, depth_crop, intrinsics, **self.calculate_configs
|
||||
)
|
||||
|
||||
if rmat is None:
|
||||
self.get_logger().warning("Corssboard point cloud have too many noise")
|
||||
return None, None
|
||||
if not sign:
|
||||
# self.get_logger().warning("Corssboard point cloud have too many noise")
|
||||
return None, 1100, False
|
||||
|
||||
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
|
||||
|
||||
self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_list.append(
|
||||
{
|
||||
"class_id": int(99),
|
||||
"class_name": 'crossboard',
|
||||
"pose": pose,
|
||||
"grab_width": 0.0
|
||||
}
|
||||
)
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_list.append(
|
||||
{
|
||||
"class_id": int(99),
|
||||
"class_name": 'crossboard',
|
||||
"pose": pose,
|
||||
"grab_width": 0.0
|
||||
}
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
rgb_img,
|
||||
f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
|
||||
(0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(255, 255, 0),
|
||||
2
|
||||
rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
|
||||
(0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
|
||||
)
|
||||
cv2.putText(
|
||||
rgb_img,
|
||||
f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
|
||||
(0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(255, 255, 0),
|
||||
2
|
||||
rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
|
||||
(0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
|
||||
)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
|
||||
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None, True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import os
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
import torch
|
||||
@@ -12,15 +13,22 @@ from sensor_msgs.msg import Image, CameraInfo
|
||||
from interfaces.msg import PoseArrayClassAndID, ImgMsg
|
||||
from interfaces.srv import VisionObjectRecognition
|
||||
|
||||
from ..utils import calculate_pose_pca, calculate_pose_icp
|
||||
from ..utils import calculate_pose_pca, calculate_pose_icp, read_calibration_mat
|
||||
from .config_node import ConfigBase
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
E2E_SIGN = True
|
||||
try:
|
||||
from ..net import GraspNet
|
||||
from ..utils import calculate_pose_e2e
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
E2E_SIGN = False
|
||||
|
||||
|
||||
class InitBase(ConfigBase):
|
||||
def __init__(self, name):
|
||||
super().__init__(name)
|
||||
self.future = Future()
|
||||
self._read_calibration_mat()
|
||||
|
||||
if self.get_camera_mode == "Service":
|
||||
self._init_service()
|
||||
@@ -53,6 +61,19 @@ class InitBase(ConfigBase):
|
||||
self.calculate_function = calculate_pose_pca
|
||||
elif self.calculate_mode == "ICP" and self.detect_mode == 'Detect':
|
||||
self.calculate_function = calculate_pose_icp
|
||||
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
|
||||
self.calculate_function = calculate_pose_e2e
|
||||
self.e2e_model = GraspNet(seed_feat_dim=512, is_training=False)
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
self.e2e_model.to(device)
|
||||
self.e2e_model.load_state_dict(
|
||||
torch.load(
|
||||
os.path.join(self.SHARE_DIR, self.calculate_configs["checkpoint_path"]),
|
||||
map_location=device
|
||||
)
|
||||
)
|
||||
self.e2e_model.eval()
|
||||
self.calculate_configs["model"] = self.e2e_model
|
||||
else:
|
||||
self.calculate_function = calculate_pose_pca
|
||||
|
||||
@@ -145,6 +166,28 @@ class InitBase(ConfigBase):
|
||||
|
||||
self.get_logger().info("Initialize subscriber done")
|
||||
|
||||
def _read_calibration_mat(self):
|
||||
eye_in_left_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["left_hand"])
|
||||
eye_in_right_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["right_hand"])
|
||||
eye_to_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["head"])
|
||||
|
||||
self.eye_in_left_hand_mat, info, sign = read_calibration_mat(eye_in_left_hand_path)
|
||||
self.get_logger().info(f"left_hand_mat: {self.eye_in_left_hand_mat}")
|
||||
if not sign:
|
||||
self.get_logger().warning(f"left_mat: {info}")
|
||||
|
||||
self.eye_in_right_hand_mat, info, sign = read_calibration_mat(eye_in_right_hand_path)
|
||||
self.get_logger().info(f"right_hand_mat: {self.eye_in_right_hand_mat}")
|
||||
if not sign:
|
||||
self.get_logger().warning(f"right_mat: {info}")
|
||||
|
||||
self.eye_to_hand_mat, info, sign = read_calibration_mat(eye_to_hand_path)
|
||||
self.get_logger().info(f"head_mat: {self.eye_to_hand_mat}")
|
||||
if not sign:
|
||||
self.get_logger().warning(f"head_mat: {info}")
|
||||
|
||||
self.get_logger().info("Read calibration mat file done")
|
||||
|
||||
def _camera_info_callback(self, msg: CameraInfo):
|
||||
pass
|
||||
|
||||
|
||||
@@ -2,3 +2,5 @@ from .image_tools import *
|
||||
from .draw_tools import *
|
||||
from .calculate_tools import *
|
||||
from .file_tools import *
|
||||
from .pointclouds_tools import *
|
||||
from .grasp_refine import *
|
||||
@@ -1,18 +1,35 @@
|
||||
"""计算工具"""
|
||||
import logging
|
||||
|
||||
import cv2
|
||||
# import cv2
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
import transforms3d as tfs
|
||||
|
||||
from .object_icp import object_icp
|
||||
from .object_icp import *
|
||||
from .pointclouds_tools import *
|
||||
|
||||
E2E_SIGN = True
|
||||
try:
|
||||
import torch
|
||||
import MinkowskiEngine as ME
|
||||
import collections.abc as container_abcs
|
||||
from ..net import pred_decode, ModelFreeCollisionDetector
|
||||
except (ImportError, OSError, RuntimeError):
|
||||
logging.warning("ImportError or OSError or RuntimeError")
|
||||
E2E_SIGN = False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"calculate_pose_pca", "calculate_pose_icp",
|
||||
"rmat2quat", "quat2rmat",
|
||||
"rmat2quat", "quat2rmat", "calculate_pose_e2e"
|
||||
]
|
||||
|
||||
|
||||
if not E2E_SIGN:
|
||||
__all__.remove("calculate_pose_e2e")
|
||||
|
||||
|
||||
def pca(data: np.ndarray, sort=True):
|
||||
"""主成分分析 """
|
||||
center = np.mean(data, axis=0)
|
||||
@@ -40,30 +57,52 @@ def calculate_pose_pca(
|
||||
calculate_grab_width: bool = False,
|
||||
**kwargs
|
||||
):
|
||||
"""点云主成分分析法计算位态"""
|
||||
"""点云主成分分析法计算位态
|
||||
----------
|
||||
input:
|
||||
mask: np.ndarray
|
||||
depth_img: np.ndarray
|
||||
intrinsics: o3d.camera.PinholeCameraIntrinsic
|
||||
calculate_grab_width: bool
|
||||
**kwargs:
|
||||
|
||||
output:
|
||||
rmat: np.ndarray (4, 4)
|
||||
grab_width: list
|
||||
sign: bool
|
||||
"""
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
|
||||
if not sign:
|
||||
return point_cloud, [], False
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
depth_trunc=kwargs.get("depth_trunc", 3.0),
|
||||
)
|
||||
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
#
|
||||
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
# depth=depth_o3d,
|
||||
# intrinsic=intrinsics,
|
||||
# depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
# depth_trunc=kwargs.get("depth_trunc", 3.0),
|
||||
# )
|
||||
#
|
||||
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
|
||||
# if not sign:
|
||||
# return 1100, [], False
|
||||
#
|
||||
# if len(point_cloud.points) == 0:
|
||||
# # logging.warning("point_cloud is empty")
|
||||
# return 1101, [], False
|
||||
|
||||
point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.005))
|
||||
if point_cloud is None:
|
||||
return None, [0.0, 0.0, 0.0]
|
||||
|
||||
if len(point_cloud.points) == 0:
|
||||
logging.warning("clean_pcd is empty")
|
||||
return np.eye(4), [0.0, 0.0, 0.0]
|
||||
center = point_cloud.get_center()
|
||||
x, y, z = center
|
||||
|
||||
if calculate_grab_width:
|
||||
if np.asarray(point_cloud.points).shape[0] < 4:
|
||||
# logging.warning("点数不足,不能算 OBB")
|
||||
return 1200, [0.0, 0.0, 0.0], False
|
||||
obb = point_cloud.get_oriented_bounding_box()
|
||||
x, y, z = obb.center
|
||||
extent = obb.extent
|
||||
order = np.argsort(-extent)
|
||||
|
||||
@@ -74,20 +113,16 @@ def calculate_pose_pca(
|
||||
v = v[:, order]
|
||||
|
||||
if v is None:
|
||||
logging.warning("PCA output v is None")
|
||||
return np.eye(4), [0.0, 0.0, 0.0]
|
||||
return 1201, [0.0, 0.0, 0.0], False
|
||||
|
||||
grab_width = grab_width * 1.05
|
||||
|
||||
else:
|
||||
center = point_cloud.get_center()
|
||||
x, y, z = center
|
||||
|
||||
w, v = pca(np.asarray(point_cloud.points))
|
||||
|
||||
if w is None or v is None:
|
||||
logging.warning("PCA output w or v is None")
|
||||
return np.eye(4), 0.0
|
||||
return 1201, 0.0, False
|
||||
|
||||
grab_width = [0.0, 0.0, 0.0]
|
||||
|
||||
vx, vy, vz = v[:,0], v[:,1], v[:,2]
|
||||
@@ -105,7 +140,7 @@ def calculate_pose_pca(
|
||||
# draw(point_cloud_u, rmat)
|
||||
# draw(point_cloud, rmat)
|
||||
|
||||
return rmat, grab_width
|
||||
return rmat, grab_width, True
|
||||
|
||||
|
||||
def calculate_pose_icp(
|
||||
@@ -119,22 +154,26 @@ def calculate_pose_icp(
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
|
||||
if not sign:
|
||||
return point_cloud, [], False
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
depth_trunc=kwargs.get("depth_trunc", 3.0)
|
||||
)
|
||||
|
||||
point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
|
||||
if point_cloud is None:
|
||||
return None, [0.0, 0.0, 0.0]
|
||||
|
||||
if len(point_cloud.points) == 0:
|
||||
logging.warning("clean_pcd is empty")
|
||||
return np.eye(4), [0.0, 0.0, 0.0]
|
||||
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
#
|
||||
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
# depth=depth_o3d,
|
||||
# intrinsic=intrinsics,
|
||||
# depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
# depth_trunc=kwargs.get("depth_trunc", 3.0)
|
||||
# )
|
||||
#
|
||||
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
|
||||
# if not sign:
|
||||
# return 1100, [], False
|
||||
#
|
||||
# if len(point_cloud.points) == 0:
|
||||
# # logging.warning("clean_pcd is empty")
|
||||
# return 1101, [0.0, 0.0, 0.0], False
|
||||
|
||||
if calculate_grab_width:
|
||||
pass
|
||||
@@ -148,97 +187,190 @@ def calculate_pose_icp(
|
||||
)
|
||||
|
||||
grab_width = [0.0, 0.0, 0.0]
|
||||
return rmat, grab_width
|
||||
return rmat, grab_width, True
|
||||
|
||||
|
||||
def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
|
||||
"""点云去噪"""
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
while True:
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size * 0.5)
|
||||
point_num = len(down_pcd.points)
|
||||
if point_num <= 1000:
|
||||
break
|
||||
voxel_size *= 2
|
||||
if E2E_SIGN:
|
||||
def minkowski_collate_fn(list_data):
|
||||
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
|
||||
[d["feats"] for d in list_data])
|
||||
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
|
||||
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
|
||||
coordinates_batch, features_batch, return_index=True, return_inverse=True)
|
||||
|
||||
# logging.fatal("point_cloud_denoising: point_num={}".format(len(point_cloud.points)))
|
||||
# logging.fatal("point_cloud_denoising: point_num={}".format(point_num))
|
||||
res = {
|
||||
"coors": coordinates_batch,
|
||||
"feats": features_batch,
|
||||
"quantize2original": quantize2original,
|
||||
"point_clouds": torch.stack([torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
|
||||
}
|
||||
return res
|
||||
|
||||
# 半径滤波
|
||||
clean_pcd, _ = down_pcd.remove_radius_outlier(
|
||||
nb_points=10,
|
||||
radius=voxel_size * 5
|
||||
|
||||
def calculate_pose_e2e(
|
||||
mask,
|
||||
depth_img: np.ndarray,
|
||||
intrinsics,
|
||||
calculate_grab_width: bool = False,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
点云抓取姿态预测模型计算位态
|
||||
-----
|
||||
input:
|
||||
mask: np.ndarray
|
||||
depth_img: np.ndarray
|
||||
intrinsics: o3d.camera.PinholeCameraIntrinsic
|
||||
calculate_grab_width: bool (abandon)
|
||||
**kwargs:
|
||||
"depth_scale": float
|
||||
"depth_trunc": float
|
||||
"voxel_size": float
|
||||
"model":
|
||||
"collision_thresh": float
|
||||
|
||||
"""
|
||||
# logging.error("stage 1")
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
# 点云创建
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
depth_trunc=kwargs.get("depth_trunc", 3.0),
|
||||
)
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
|
||||
# 点云去噪过程
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=kwargs.get("voxel_size", 0.002))
|
||||
|
||||
clean_pcd, _ = down_pcd.remove_radius_outlier(
|
||||
nb_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
|
||||
radius=kwargs.get("voxel_size", 0.002) * 10
|
||||
)
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
|
||||
nb_neighbors=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
|
||||
std_ratio=2.0
|
||||
)
|
||||
points = np.asarray(clean_pcd.points)
|
||||
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
|
||||
|
||||
labels = np.array(
|
||||
clean_pcd.cluster_dbscan(
|
||||
eps=kwargs.get("voxel_size", 0.002) * 10,
|
||||
min_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3)
|
||||
)
|
||||
)
|
||||
|
||||
# 统计滤波
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
|
||||
nb_neighbors=10,
|
||||
std_ratio=2.0
|
||||
)
|
||||
# 点云簇过滤
|
||||
points = np.asarray(clean_pcd.points)
|
||||
cluster_label = set(labels)
|
||||
point_cloud_clusters = []
|
||||
for label in cluster_label:
|
||||
if label == -1:
|
||||
continue
|
||||
idx = np.where(labels == label)[0]
|
||||
point_cloud_cluster = clean_pcd.select_by_index(idx)
|
||||
points_cluster_z = points[idx, 2]
|
||||
z_avg = np.mean(points_cluster_z)
|
||||
if z_avg < 0.2:
|
||||
continue
|
||||
point_cloud_clusters.append((point_cloud_cluster, z_avg))
|
||||
|
||||
# 过滤过近的点
|
||||
points = np.asarray(clean_pcd.points)
|
||||
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
|
||||
point_cloud_clusters.sort(key=lambda x: x[1])
|
||||
point_cloud = point_cloud_clusters[0][0]
|
||||
|
||||
# # 使用数量最大簇判定噪声强度
|
||||
# _, counts = np.unique(labels[labels >= 0], return_counts=True)
|
||||
# largest_cluster_ratio = counts.max() / len(labels)
|
||||
# if largest_cluster_ratio < 0.5:
|
||||
# return None
|
||||
# 判断点云是否为空
|
||||
if len(point_cloud.points) == 0:
|
||||
return 1101, [0.0, 0.0, 0.0], False
|
||||
|
||||
labels = np.array(clean_pcd.cluster_dbscan(eps=voxel_size * 5, min_points=10))
|
||||
if len(labels[labels >= 0]) == 0:
|
||||
return clean_pcd
|
||||
# # 点云补齐15000个点
|
||||
# points = np.asarray(point_cloud.points)
|
||||
# if len(points) >= 15000:
|
||||
# idxs = np.random.choice(len(points), 15000, replace=False)
|
||||
# else:
|
||||
# idxs1 = np.arange(len(points))
|
||||
# idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
|
||||
# idxs = np.concatenate([idxs1, idxs2], axis=0)
|
||||
# points = points[idxs]
|
||||
|
||||
# 使用距离最近簇作为物体
|
||||
points = np.asarray(clean_pcd.points)
|
||||
cluster_label = set(labels)
|
||||
point_cloud_clusters = []
|
||||
for label in cluster_label:
|
||||
if label == -1:
|
||||
continue
|
||||
idx = np.where(labels == label)[0]
|
||||
point_cloud_cluster = clean_pcd.select_by_index(idx)
|
||||
points_cluster_z = points[idx, 2]
|
||||
z_avg = np.mean(points_cluster_z)
|
||||
if z_avg < 0.2:
|
||||
continue
|
||||
point_cloud_clusters.append((point_cloud_cluster, z_avg))
|
||||
# 构建推理数据结构
|
||||
ret_dict = {
|
||||
'point_clouds': points.astype(np.float32),
|
||||
'coors': points.astype(np.float32) / kwargs.get("voxel_size", 0.002),
|
||||
'feats': np.ones_like(points).astype(np.float32),
|
||||
}
|
||||
batch_data = minkowski_collate_fn([ret_dict])
|
||||
|
||||
if len(point_cloud_clusters) == 0:
|
||||
return clean_pcd
|
||||
# 将数据置于对应的设备上
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
for key in batch_data:
|
||||
batch_data[key] = batch_data[key].to(device)
|
||||
|
||||
point_cloud_clusters.sort(key=lambda x: x[1])
|
||||
clean_pcd = point_cloud_clusters[0][0]
|
||||
logging.warning(f'points num: {len(points)}')
|
||||
|
||||
# 使用最近簇判断噪音强度
|
||||
largest_cluster_ratio = len(clean_pcd.points) / point_num
|
||||
if largest_cluster_ratio < 0.20:
|
||||
return None
|
||||
# 点云数量判断,是否返回
|
||||
if batch_data['coors'].shape[0] < 128: # 例如 128 / 256
|
||||
return 1300, [0.0], False
|
||||
if batch_data["point_clouds"].shape[1] < 128: # 例如 128 / 256
|
||||
return 1301, [0.0], False
|
||||
|
||||
return clean_pcd
|
||||
# 梯度置0,进入推理
|
||||
with torch.no_grad():
|
||||
end_points = kwargs.get("model")(batch_data)
|
||||
if end_points is None:
|
||||
return 1302, [0.0, 0.0, 0.0], False
|
||||
grasp_preds = pred_decode(end_points)
|
||||
|
||||
# 推理结果后处理
|
||||
preds = grasp_preds[0].detach().cpu().numpy()
|
||||
sorted_index = np.argsort(-preds[:, 0])
|
||||
preds = preds[sorted_index]
|
||||
preds = preds[:10]
|
||||
|
||||
if kwargs.get("collision_thresh", 0.01) > 0:
|
||||
cloud = kwargs.get("orign_point_clouds")
|
||||
if cloud is not None and len(preds) > 0:
|
||||
mfcdetector = ModelFreeCollisionDetector(
|
||||
cloud,
|
||||
voxel_size=kwargs.get("voxel_size", 0.002)
|
||||
)
|
||||
collision_mask = mfcdetector.detect(
|
||||
preds, approach_dist=0.05,
|
||||
collision_thresh=kwargs.get("collision_thresh", 0.01)
|
||||
)
|
||||
preds = preds[~collision_mask]
|
||||
|
||||
# logging.error("stage 8")
|
||||
Rs = preds[:, 4:13].reshape(-1, 3, 3)
|
||||
centers = preds[:, 13:16].reshape(-1, 3)
|
||||
grab_width = preds[:, 1]
|
||||
|
||||
if not len(Rs):
|
||||
return 1303, [0.0, 0.0, 0.0], False
|
||||
|
||||
# logging.error("stage 9")
|
||||
rmat = []
|
||||
for r, center in zip(Rs, centers):
|
||||
vz, vx, vy = r[:, 0], r[:, 1], r[:, 2]
|
||||
if vx[0] < 0:
|
||||
vx = -vx
|
||||
if vz[2] < 0:
|
||||
vz = -vz
|
||||
if not np.allclose(np.cross(vx, vy), vz):
|
||||
vy = -vy
|
||||
|
||||
R = np.column_stack((vx, vy, vz))
|
||||
rmat.append(tfs.affines.compose(np.squeeze(np.asarray(center)), R, [1, 1, 1]))
|
||||
|
||||
if len(rmat) == 0:
|
||||
return 1304, [0.0, 0.0, 0.0], False
|
||||
|
||||
return rmat[0], [grab_width[0]], True
|
||||
|
||||
# def calculate_grab_width(mask, k, depth):
|
||||
# """计算重心宽度"""
|
||||
# mask = mask.astype(np.uint8) * 255
|
||||
# contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
#
|
||||
# if not contours:
|
||||
# return 0.0
|
||||
#
|
||||
# c = max(contours, key=cv2.contourArea)
|
||||
# box = cv2.boxPoints(cv2.minAreaRect(c))
|
||||
# if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
|
||||
# point_diff = box[1] - box[0]
|
||||
# else:
|
||||
# point_diff = box[1] - box[2]
|
||||
#
|
||||
# grab_width = depth * np.sqrt(
|
||||
# point_diff[0]**2 / k[0]**2 + point_diff[1]**2 / k[4]**2
|
||||
# )
|
||||
# return grab_width
|
||||
|
||||
|
||||
def rmat2quat(rmat):
|
||||
"""Convert rotation matrix to quaternion."""
|
||||
|
||||
@@ -6,6 +6,7 @@ import cv2
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
|
||||
from .image_tools import save_img
|
||||
|
||||
|
||||
__all__ = [
|
||||
@@ -16,7 +17,8 @@ def draw_box(
|
||||
rgb_img: np.ndarray,
|
||||
segmentation_result,
|
||||
put_text: bool = True,
|
||||
save_path: Union[bool, str] = False
|
||||
save_dir: Union[bool, str] = False,
|
||||
mark_time: bool = False
|
||||
):
|
||||
"""
|
||||
绘制目标检测边界框
|
||||
@@ -26,6 +28,12 @@ def draw_box(
|
||||
class_ids = segmentation_result.boxes.cls.cpu().numpy()
|
||||
labels = segmentation_result.names
|
||||
|
||||
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
|
||||
sorted_index = np.lexsort((-y_centers, x_centers))
|
||||
boxes = boxes[sorted_index]
|
||||
class_ids = class_ids[sorted_index]
|
||||
confidences = confidences[sorted_index]
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
x_center, y_center, width, height = box[:4]
|
||||
|
||||
@@ -41,15 +49,8 @@ def draw_box(
|
||||
cv2.putText(rgb_img, f"{i}", (p1[0] + 15, p1[1] + 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
|
||||
if save_path:
|
||||
if isinstance(save_path, str):
|
||||
dir_path = os.path.dirname(save_path)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
else:
|
||||
home_path = os.path.expanduser("~")
|
||||
save_path = os.path.join(home_path, "detect_color_img.png")
|
||||
cv2.imwrite(save_path, rgb_img)
|
||||
if save_dir:
|
||||
save_img(rgb_img, "detect_color_img.png", save_dir, mark_time)
|
||||
|
||||
|
||||
def draw_mask(rgb_img: np.ndarray, segmentation_result):
|
||||
|
||||
282
vision_detect/vision_detect/VisionDetect/utils/grasp_refine.py
Normal file
282
vision_detect/vision_detect/VisionDetect/utils/grasp_refine.py
Normal file
@@ -0,0 +1,282 @@
|
||||
import numpy as np
|
||||
import transforms3d as tfs
|
||||
# import open3d as o3d
|
||||
|
||||
__all__ = ["refine_grasp_pose"]
|
||||
|
||||
|
||||
def collision_detector(
|
||||
points: np.ndarray,
|
||||
refine: np.ndarray,
|
||||
volume: list[float],
|
||||
iou: float = 0.001,
|
||||
search_mode: bool = False,
|
||||
**kwargs
|
||||
) -> int:
|
||||
"""
|
||||
collision detector
|
||||
-----
|
||||
input:
|
||||
points: np.ndarray (3, n)
|
||||
refine: np.ndarray (3, ), Grab target poes coordinate system
|
||||
volume: list [left, right]
|
||||
iou : float
|
||||
search_mode: bool, Default False
|
||||
**kwargs:
|
||||
"grab_width": float
|
||||
"hand_size": list [width, height, length]
|
||||
"left_size": list [thick, width, length]
|
||||
"right_size": list [thick, width, length]
|
||||
thick of gripper finger, width of gripper finger, length of gripper finger
|
||||
output:
|
||||
collision_code: int
|
||||
"""
|
||||
hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
|
||||
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
|
||||
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
|
||||
grab_width = kwargs.get('grab_width', 0.10) * 0.95
|
||||
x, y, z = refine
|
||||
|
||||
if not search_mode:
|
||||
hand_top_box = (
|
||||
(points[2] < z) & (points[2] > (z - hand_size[2]))
|
||||
& (points[0] < (x - hand_size[1]*1/4)) & (points[0] > (x - hand_size[1]/2))
|
||||
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
|
||||
)
|
||||
hand_center_box = (
|
||||
(points[2] < z) & (points[2] > (z - hand_size[2]))
|
||||
& (points[0] < (x + hand_size[1]*1/4)) & (points[0] > (x - hand_size[1]*1/4))
|
||||
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
|
||||
)
|
||||
hand_bottom_box = (
|
||||
(points[2] < z) & (points[2] > (z - hand_size[2]))
|
||||
& (points[0] < (x + hand_size[1]/2)) & (points[0] > (x + hand_size[1]*1/4))
|
||||
& (points[1] < (y + hand_size[0]/2)) & (points[1] > (y - hand_size[0]/2))
|
||||
)
|
||||
|
||||
if (hand_top_box.any() and hand_bottom_box.any()) or hand_center_box.any():
|
||||
return 1
|
||||
if hand_bottom_box.any():
|
||||
return 2
|
||||
if hand_top_box.any():
|
||||
return 3
|
||||
|
||||
left_finger_box = (
|
||||
(points[2] < (z + left_size[2])) & (points[2] > z -0.05)
|
||||
& (points[0] < (x + left_size[1]/2)) & (points[0] > (x - left_size[1]/2))
|
||||
& (points[1] < (y + grab_width/2 + left_size[0])) & (points[1] > (y + grab_width/2))
|
||||
)
|
||||
right_finger_box = (
|
||||
(points[2] < (z + right_size[2])) & (points[2] > z - 0.05)
|
||||
& (points[0] < (x + right_size[1]/2)) & (points[0] > (x - right_size[1]/2))
|
||||
& (points[1] < (y - grab_width/2)) & (points[1] > (y-(grab_width/2 + right_size[0])))
|
||||
)
|
||||
|
||||
left_iou = left_finger_box.sum() / (volume[0]+1e-6)
|
||||
right_iou = right_finger_box.sum() / (volume[1]+1e-6)
|
||||
|
||||
if left_iou > iou and right_iou > iou:
|
||||
return 6
|
||||
|
||||
if left_iou > iou :
|
||||
return 4
|
||||
if right_iou > iou :
|
||||
return 5
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def refine_grasp_pose(
|
||||
points: np.ndarray,
|
||||
voxel_size: float,
|
||||
target_position: np.ndarray,
|
||||
expect_orientation: np.ndarray | None = None,
|
||||
search_mode: bool = False,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
refine grasp pose
|
||||
-----
|
||||
input:
|
||||
points: np.ndarray (n, 3), already complete voxel downsample
|
||||
voxel_size: float
|
||||
target_position: np.ndarray (3, ), grab target position
|
||||
expect_orientation: np.ndarray (3, 3), expect grab target orientation
|
||||
search_mode: bool, Default False
|
||||
**kwargs:
|
||||
"grab_width": float
|
||||
"hand_size": list [width, height, length]
|
||||
"left_size": list [thick, width, length]
|
||||
"right_size": list [thick, width, length]
|
||||
thick of gripper finger, width of gripper finger, length of gripper finger
|
||||
output:
|
||||
"""
|
||||
if expect_orientation is None:
|
||||
expect_orientation = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
|
||||
|
||||
refine = np.zeros(3)
|
||||
grab_width = kwargs.get('grab_width', 0.10)
|
||||
hand_size = kwargs.get('hand_size', [0.113, 0.063, 0.13])
|
||||
left_size = kwargs.get('left_size', [0.006, 0.037, 0.086])
|
||||
right_size = kwargs.get('right_size', [0.006, 0.037, 0.086])
|
||||
|
||||
left_volume = left_size[0] * left_size[1] * left_size[2] / (voxel_size**3)
|
||||
right_volume = right_size[0] * right_size[1] * right_size[2] / (voxel_size**3)
|
||||
|
||||
points = points - target_position[None, :] # (n, 3) - (1, 3) = (n, 3)
|
||||
points = expect_orientation.T @ points.T # (3, 3) @ (3, n) = (3, n)
|
||||
|
||||
points = points[:,
|
||||
(points[2] < left_size[2]) & (points[2] > -(0.1 + hand_size[2]))
|
||||
& (points[0] < (0.05 + hand_size[1]/2)) & (points[0] > -(0.05 + hand_size[1]/2))
|
||||
& (points[1] < (0.05 + grab_width/2 + left_size[0]))
|
||||
& (points[1] > -(0.05 + grab_width/2 + right_size[0]))
|
||||
]
|
||||
|
||||
frist_sign = False
|
||||
y_n = y_p = False
|
||||
left_last = right_last = False
|
||||
hand_num = left_num = right_num = 0
|
||||
while hand_num < 20 and left_num < 10 and right_num < 10:
|
||||
collision_code = collision_detector(
|
||||
points, refine, volume=[left_volume, right_volume], **kwargs
|
||||
)
|
||||
|
||||
if collision_code == 0:
|
||||
if y_p and not y_n:
|
||||
refine[1] += 0.004
|
||||
y_p = y_n = False
|
||||
left_last = True
|
||||
right_last = False
|
||||
continue
|
||||
if not y_p and y_n:
|
||||
refine[1] -= 0.004
|
||||
y_p = y_n = False
|
||||
left_last = False
|
||||
right_last = True
|
||||
continue
|
||||
if search_mode:
|
||||
break
|
||||
position = target_position + (expect_orientation @ refine.T).T
|
||||
rmat = tfs.affines.compose(
|
||||
np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
|
||||
)
|
||||
return rmat, True
|
||||
if collision_code == 6:
|
||||
return 1202, False
|
||||
|
||||
if collision_code == 1:
|
||||
refine[2] -= 0.008
|
||||
hand_num += 1
|
||||
print("z + 0.008")
|
||||
continue
|
||||
|
||||
if collision_code == 2:
|
||||
if frist_sign:
|
||||
print("z + 0.004")
|
||||
refine[2] -= 0.004
|
||||
frist_sign = True
|
||||
continue
|
||||
refine[0] -= 0.002
|
||||
hand_num += 1
|
||||
print("x - 0.002")
|
||||
continue
|
||||
|
||||
if collision_code == 3:
|
||||
if frist_sign:
|
||||
print("z + 0.004")
|
||||
refine[2] -= 0.004
|
||||
frist_sign = True
|
||||
continue
|
||||
refine[0] += 0.002
|
||||
hand_num += 1
|
||||
print("x + 0.002")
|
||||
continue
|
||||
|
||||
if collision_code == 4:
|
||||
y_p = True
|
||||
y_n = False
|
||||
refine[1] += 0.004
|
||||
left_num += 1
|
||||
print("y + 0.004")
|
||||
continue
|
||||
|
||||
if collision_code == 5:
|
||||
y_p = False
|
||||
y_n = True
|
||||
refine[1] -= 0.004
|
||||
right_num += 1
|
||||
print("y - 0.004")
|
||||
continue
|
||||
else:
|
||||
return 1202, False
|
||||
|
||||
if search_mode:
|
||||
right_num = left_num = 0
|
||||
if left_last and not right_last:
|
||||
y_min = refine[1]
|
||||
while left_num < 10:
|
||||
left_num += 1
|
||||
refine[1] += 0.004
|
||||
collision_code = collision_detector(
|
||||
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
|
||||
)
|
||||
if collision_code:
|
||||
refine[1] = (refine[1] - 0.004 + y_min) / 2
|
||||
break
|
||||
else:
|
||||
refine[1] = y_min + 0.2
|
||||
print(f"left_num = {left_num}")
|
||||
|
||||
elif not left_last and right_last:
|
||||
y_max = refine[1]
|
||||
while right_num < 10:
|
||||
right_num += 1
|
||||
refine[1] -= 0.004
|
||||
collision_code = collision_detector(
|
||||
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
|
||||
)
|
||||
if collision_code:
|
||||
refine[1] = (refine[1] + 0.004 + y_max) / 2
|
||||
break
|
||||
else:
|
||||
refine[1] = y_max - 0.2
|
||||
print(f"right_num = {right_num}")
|
||||
|
||||
elif not left_last and not right_last:
|
||||
y_cur = refine[1]
|
||||
while left_num < 6:
|
||||
left_num += 1
|
||||
refine[1] += 0.004
|
||||
collision_code = collision_detector(
|
||||
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
|
||||
)
|
||||
if collision_code:
|
||||
y_max = refine[1] - 0.004
|
||||
break
|
||||
else:
|
||||
y_max = refine[1] + 0.024
|
||||
print(f"left_num = {left_num}")
|
||||
|
||||
refine[1] = y_cur
|
||||
while right_num < 6:
|
||||
right_num += 1
|
||||
refine[1] -= 0.004
|
||||
collision_code = collision_detector(
|
||||
points, refine, volume=[left_volume, right_volume], search_mode=True, **kwargs
|
||||
)
|
||||
if collision_code:
|
||||
refine[1] = (refine[1] + 0.004 + y_max) / 2
|
||||
break
|
||||
else:
|
||||
refine[1] = (y_cur - 0.024 + y_max) / 2
|
||||
print(f"right_num = {right_num}")
|
||||
|
||||
position = target_position + (expect_orientation @ refine.T).T
|
||||
rmat = tfs.affines.compose(
|
||||
np.squeeze(np.asarray(position)),
|
||||
expect_orientation,
|
||||
[1, 1, 1]
|
||||
)
|
||||
return rmat, True
|
||||
return 1202, False
|
||||
@@ -1,3 +1,6 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import logging
|
||||
import numpy as np
|
||||
@@ -5,7 +8,7 @@ import numpy as np
|
||||
|
||||
__all__ = [
|
||||
"crop_imgs_box_xywh", "crop_imgs_box_xyxy", "crop_imgs_mask", "get_map",
|
||||
"distortion_correction",
|
||||
"distortion_correction", "save_img"
|
||||
]
|
||||
|
||||
def crop_imgs_box_xywh(imgs: list, box, same_sign: bool = False):
|
||||
@@ -154,3 +157,28 @@ def distortion_correction(
|
||||
return undistorted_color, undistorted_depth
|
||||
|
||||
|
||||
def save_img(img: np.ndarray, save_name, save_dir: str | None = None, mark_cur_time: bool = False):
|
||||
"""
|
||||
保存图像
|
||||
|
||||
input:
|
||||
img: np.ndarray
|
||||
- uint8, BGR (H, W, 3)
|
||||
- uint16 single-channel
|
||||
save_name: str
|
||||
save_path: str | None
|
||||
mark_cur_time: bool
|
||||
"""
|
||||
if isinstance(save_dir, str):
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
save_path = os.path.join(save_dir, save_name)
|
||||
else:
|
||||
home_path = os.path.expanduser("~")
|
||||
save_path = os.path.join(home_path, save_name)
|
||||
|
||||
if mark_cur_time:
|
||||
cur_time = int(time.time() * 1000)
|
||||
path, ext = os.path.splitext(save_path)
|
||||
save_path = path + f'_{cur_time}' + ext
|
||||
|
||||
cv2.imwrite(save_path, img)
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
# import logging
|
||||
#
|
||||
# import cv2
|
||||
import numpy as np
|
||||
import open3d as o3d
|
||||
|
||||
|
||||
__all__ = ["create_o3d_pcd", "create_o3d_denoised_pcd"]
|
||||
|
||||
|
||||
def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.005):
|
||||
"""点云去噪"""
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
|
||||
point_num = len(down_pcd.points)
|
||||
|
||||
# 半径滤波
|
||||
clean_pcd, _ = down_pcd.remove_radius_outlier(
|
||||
nb_points=max(int(round(10 * voxel_size / 0.005)), 3),
|
||||
radius=voxel_size * 10
|
||||
)
|
||||
|
||||
# 统计滤波
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
|
||||
nb_neighbors=max(int(round(10 * voxel_size / 0.005)), 3),
|
||||
std_ratio=2.0
|
||||
)
|
||||
|
||||
# 过滤过近的点
|
||||
points = np.asarray(clean_pcd.points)
|
||||
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
|
||||
|
||||
# # 使用数量最大簇判定噪声强度
|
||||
# _, counts = np.unique(labels[labels >= 0], return_counts=True)
|
||||
# largest_cluster_ratio = counts.max() / len(labels)
|
||||
# if largest_cluster_ratio < 0.5:
|
||||
# return None
|
||||
|
||||
labels = np.array(
|
||||
clean_pcd.cluster_dbscan(
|
||||
eps=voxel_size * 10,
|
||||
min_points=max(int(round(10 * voxel_size / 0.005)), 3)
|
||||
)
|
||||
)
|
||||
if len(labels[labels >= 0]) == 0:
|
||||
return clean_pcd, True
|
||||
|
||||
# 使用距离最近簇作为物体
|
||||
points = np.asarray(clean_pcd.points)
|
||||
cluster_label = set(labels)
|
||||
point_cloud_clusters = []
|
||||
for label in cluster_label:
|
||||
if label == -1:
|
||||
continue
|
||||
idx = np.where(labels == label)[0]
|
||||
point_cloud_cluster = clean_pcd.select_by_index(idx)
|
||||
points_cluster_z = points[idx, 2]
|
||||
z_avg = np.mean(points_cluster_z)
|
||||
if z_avg < 0.2:
|
||||
continue
|
||||
point_cloud_clusters.append((point_cloud_cluster, z_avg))
|
||||
|
||||
if len(point_cloud_clusters) == 0:
|
||||
return clean_pcd
|
||||
|
||||
point_cloud_clusters.sort(key=lambda x: x[1])
|
||||
clean_pcd = point_cloud_clusters[0][0]
|
||||
|
||||
# 使用最近簇判断噪音强度
|
||||
largest_cluster_ratio = len(clean_pcd.points) / point_num
|
||||
if largest_cluster_ratio < 0.05:
|
||||
return 1100, False
|
||||
|
||||
return clean_pcd, True
|
||||
|
||||
|
||||
def create_o3d_pcd(depth_img, camera_size, k, **kwargs):
|
||||
"""
|
||||
create o3d pcd
|
||||
--------
|
||||
input:
|
||||
depth_img: np.ndarray
|
||||
camera_size: list
|
||||
k: np.ndarray | list
|
||||
**kwargs:
|
||||
"depth_scale": float
|
||||
"depth_trunc": float
|
||||
"voxel_size": float
|
||||
output:
|
||||
orign_points: o3d.geometry.PointCloud
|
||||
"""
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(camera_size[0]),
|
||||
int(camera_size[1]),
|
||||
k[0], k[4], k[2], k[5]
|
||||
)
|
||||
depth_o3d = o3d.geometry.Image(depth_img.astype(np.uint16))
|
||||
orign_point_clouds = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
depth_trunc=kwargs.get("depth_trunc", 3.0),
|
||||
)
|
||||
orign_point_clouds = orign_point_clouds.voxel_down_sample(
|
||||
kwargs.get("voxel_size", 0.01)
|
||||
)
|
||||
# # 半径滤波
|
||||
# orign_point_clouds, _ = orign_point_clouds.remove_radius_outlier(
|
||||
# nb_points=int(round(10 * kwargs.get("voxel_size", 0.01) / 0.005)),
|
||||
# radius=kwargs.get("voxel_size", 0.01) * 10
|
||||
# )
|
||||
#
|
||||
# # 统计滤波
|
||||
# orign_point_clouds, _ = orign_point_clouds.remove_statistical_outlier(
|
||||
# nb_neighbors=int(round(20 * kwargs.get("voxel_size", 0.01) / 0.005)),
|
||||
# std_ratio=2.0
|
||||
# )
|
||||
orign_points = np.asarray(orign_point_clouds.points)
|
||||
|
||||
return orign_points
|
||||
|
||||
|
||||
def create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs):
|
||||
"""
|
||||
create o3d denoised pcd
|
||||
--------
|
||||
input:
|
||||
depth_img_mask: np.ndarray
|
||||
intrinsics: o3d.camera.PinholeCameraIntrinsic
|
||||
**kwargs:
|
||||
"depth_scale": float
|
||||
"depth_trunc": float
|
||||
"voxel_size": float
|
||||
output:
|
||||
point_cloud: o3d.geometry.PointCloud
|
||||
"""
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=kwargs.get("depth_scale", 1000.0),
|
||||
depth_trunc=kwargs.get("depth_trunc", 3.0),
|
||||
)
|
||||
|
||||
point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
|
||||
if not sign:
|
||||
return 1100, False
|
||||
|
||||
if len(point_cloud.points) == 0:
|
||||
return 1101, False
|
||||
|
||||
return point_cloud, True
|
||||
@@ -1,260 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
import transforms3d as tfs
|
||||
import numpy as np
|
||||
import math
|
||||
import json
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
|
||||
def get_matrix_quat(x, y, z, rw, rx, ry, rz):
|
||||
"""从单位四元数构建齐次变换矩阵"""
|
||||
'''构造旋转矩阵'''
|
||||
q = [rw, rx, ry, rz]
|
||||
rmat = tfs.quaternions.quat2mat(q)
|
||||
|
||||
"""构造齐次变换矩阵"""
|
||||
rmat = tfs.affines.compose(
|
||||
np.squeeze(np.asarray((x, y, z))),
|
||||
rmat,
|
||||
[1, 1, 1]
|
||||
)
|
||||
return rmat
|
||||
|
||||
|
||||
def get_matrix_eular_radu(x, y, z, rx, ry, rz):
|
||||
"""从欧拉角构建齐次变换矩阵"""
|
||||
'''构造旋转矩阵'''
|
||||
rmat = tfs.euler.euler2mat(
|
||||
# math.radians(rx), math.radians(ry), math.radians(rz)
|
||||
rx, ry, rz
|
||||
)
|
||||
|
||||
"""构造齐次变换矩阵"""
|
||||
rmat = tfs.affines.compose(
|
||||
np.squeeze(np.asarray((x, y, z))),
|
||||
rmat,
|
||||
[1, 1, 1]
|
||||
)
|
||||
return rmat
|
||||
|
||||
|
||||
def get_matrix_rotvector(x, y, z, rx, ry, rz):
|
||||
"""从旋转向量构建齐次变换矩阵"""
|
||||
'''构造旋转矩阵'''
|
||||
rvec = np.array([rx, ry, rz])
|
||||
theta = np.linalg.norm(rvec)
|
||||
if theta < 1e-8:
|
||||
rmat = np.eye(3) # 小角度直接返回单位矩阵
|
||||
else:
|
||||
axis = rvec / theta
|
||||
rmat = tfs.axangles.axangle2mat(axis, theta)
|
||||
|
||||
"""构造齐次变换矩阵"""
|
||||
rmat = tfs.affines.compose(
|
||||
np.squeeze(np.asarray((x, y, z))),
|
||||
rmat,
|
||||
[1, 1, 1]
|
||||
)
|
||||
return rmat
|
||||
|
||||
|
||||
def skew(v):
|
||||
return np.array([[0, -v[2], v[1]],
|
||||
[v[2], 0, -v[0]],
|
||||
[-v[1], v[0], 0]])
|
||||
|
||||
|
||||
def R2P(T):
|
||||
"""旋转矩阵 --> 修正罗德里格斯向量"""
|
||||
axis, angle= tfs.axangles.mat2axangle(T[0:3, 0:3])
|
||||
P = 2 * math.sin(angle / 2) * axis
|
||||
return P
|
||||
|
||||
|
||||
def P2R(P):
|
||||
"""修正罗德里格斯向量 --> 旋转矩阵"""
|
||||
P2 = np.dot(P.T, P)
|
||||
part_1 = (1 - P2 / 2) * np.eye(3)
|
||||
part_2 = np.add(np.dot(P, P.T), np.sqrt(4- P2) * skew(P.flatten().T))
|
||||
R = np.add(part_1, np.divide(part_2, 2))
|
||||
return R
|
||||
|
||||
|
||||
def calculate(Hgs, Hcs):
|
||||
"""计算标定矩阵"""
|
||||
# H代表矩阵、h代表标量
|
||||
Hgijs = []
|
||||
Hcijs = []
|
||||
A = []
|
||||
B = []
|
||||
size = 0
|
||||
for i in range(len(Hgs)):
|
||||
for j in range(i + 1, len(Hgs)):
|
||||
size += 1
|
||||
Hgij = np.dot(np.linalg.inv(Hgs[j]), Hgs[i])
|
||||
Hgijs.append(Hgij)
|
||||
Pgij = np.dot(2, R2P(Hgij))
|
||||
|
||||
Hcij = np.dot(Hcs[j], np.linalg.inv(Hcs[i]))
|
||||
Hcijs.append(Hcij)
|
||||
Pcij = np.dot(2, R2P(Hcij))
|
||||
|
||||
A.append(skew(np.add(Pgij, Pcij)))
|
||||
B.append(np.subtract(Pcij, Pgij).reshape(3, 1))
|
||||
|
||||
MA = np.vstack(A)
|
||||
MB = np.vstack(B)
|
||||
Pcg_ = np.dot(np.linalg.pinv(MA), MB)
|
||||
pcg = np.sqrt(np.add(1, np.dot(Pcg_.T, Pcg_)))
|
||||
Pcg = np.dot(np.dot(2, Pcg_), np.linalg.inv(pcg))
|
||||
Rcg = P2R(Pcg).reshape(3, 3)
|
||||
|
||||
A = []
|
||||
B = []
|
||||
id = 0
|
||||
for i in range(len(Hgs)):
|
||||
for j in range(i + 1, len(Hgs)):
|
||||
Hgij = Hgijs[id]
|
||||
Hcij = Hcijs[id]
|
||||
A.append(np.subtract(Hgij[0:3, 0:3], np.eye(3, 3)))
|
||||
B.append(np.subtract(np.dot(Rcg, Hcij[0:3, 3:4]), Hgij[0:3, 3:4]))
|
||||
id += 1
|
||||
|
||||
MA = np.asarray(A).reshape(size * 3, 3)
|
||||
MB = np.asarray(B).reshape(size * 3, 1)
|
||||
Tcg = np.dot(np.linalg.pinv(MA), MB).reshape(3, )
|
||||
|
||||
return tfs.affines.compose(Tcg, np.squeeze(Rcg), [1, 1, 1])
|
||||
|
||||
|
||||
class Calibration(Node):
|
||||
def __init__(self, name):
|
||||
super(Calibration, self).__init__(name)
|
||||
self.sync_subscriber = None
|
||||
self.sub_camera_pose = None
|
||||
self.sub_hand_pose = None
|
||||
self.Hgs, self.Hcs = [], []
|
||||
self.hand, self.camera = [], []
|
||||
self.calibration_matrix = None
|
||||
|
||||
self.declare_parameter('matrix_name', 'eye_to_hand')
|
||||
self.matrix_name = self.get_parameter('matrix_name').value
|
||||
|
||||
self.declare_parameter('mode', 'eye_to_hand')
|
||||
self.mode = self.get_parameter('mode').value.lower()
|
||||
|
||||
if self.mode not in ['eye_in_hand', 'eye_to_hand']:
|
||||
raise ValueError("mode must be 'eye_in_hand' or 'eye_to_hand'")
|
||||
|
||||
self.declare_parameter('input', 'quat')
|
||||
self.input = self.get_parameter('input').value.lower()
|
||||
if self.input == 'eular':
|
||||
self.function = get_matrix_eular_radu
|
||||
elif self.input == 'rotvertor':
|
||||
self.function = get_matrix_rotvector
|
||||
elif self.input == 'quat':
|
||||
self.function = get_matrix_quat
|
||||
else:
|
||||
raise ValueError("INPUT ERROR")
|
||||
|
||||
self.declare_parameter('camera_pose_path', 'camera_pose_data.json')
|
||||
self.declare_parameter('hand_pose_path', 'hand_pose_data.json')
|
||||
self.camera_pose_path = self.get_parameter('camera_pose_path').value
|
||||
self.hand_pose_path = self.get_parameter('hand_pose_path').value
|
||||
|
||||
self.get_pose()
|
||||
|
||||
self.done = False
|
||||
|
||||
def get_pose(self):
|
||||
with open(f'{self.camera_pose_path}', 'r', encoding='utf-8') as f:
|
||||
self.camera = json.load(f)
|
||||
|
||||
with open(f'{self.hand_pose_path}', 'r', encoding='utf-8') as f:
|
||||
self.hand = json.load(f)
|
||||
|
||||
self.calculate_calibration()
|
||||
|
||||
print(self.hand)
|
||||
print(self.camera)
|
||||
|
||||
self.get_logger().info(f"{self.calibration_matrix}")
|
||||
hand_eye_result = {
|
||||
"T": self.calibration_matrix.tolist()
|
||||
}
|
||||
with open(f"{self.matrix_name}_matrix.json", "w") as f:
|
||||
json.dump(hand_eye_result, f, indent=4)
|
||||
self.get_logger().info(f"Save as {self.matrix_name}_matrix.json")
|
||||
|
||||
self.done = True
|
||||
|
||||
def calculate_data(self):
|
||||
if self.input == 'quat':
|
||||
for i in range(0, len(self.hand), 7):
|
||||
self.Hgs.append(
|
||||
np.linalg.inv(
|
||||
self.function(
|
||||
self.hand[i], self.hand[i + 1], self.hand[i + 2],
|
||||
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
|
||||
)
|
||||
)
|
||||
if self.mode == 'eye_to_hand' else
|
||||
self.function(
|
||||
self.hand[i], self.hand[i + 1], self.hand[i + 2],
|
||||
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5], self.hand[i + 6]
|
||||
)
|
||||
)
|
||||
self.Hcs.append(
|
||||
self.function(
|
||||
self.camera[i], self.camera[i + 1], self.camera[i + 2],
|
||||
self.camera[i + 3], self.camera[i + 4], self.camera[i + 5], self.camera[i + 6]
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
for i in range(0, len(self.hand), 6):
|
||||
self.Hgs.append(
|
||||
np.linalg.inv(
|
||||
self.function(
|
||||
self.hand[i], self.hand[i + 1], self.hand[i + 2],
|
||||
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
|
||||
)
|
||||
)
|
||||
if self.mode == 'eye_to_hand' else
|
||||
self.function(
|
||||
self.hand[i], self.hand[i + 1], self.hand[i + 2],
|
||||
self.hand[i + 3], self.hand[i + 4], self.hand[i + 5]
|
||||
)
|
||||
)
|
||||
self.Hcs.append(
|
||||
self.function(
|
||||
self.camera[i], self.camera[i + 1], self.camera[i + 2],
|
||||
self.camera[i + 3], self.camera[i + 4], self.camera[i + 5]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def calculate_calibration(self):
|
||||
self.calculate_data()
|
||||
self.calibration_matrix = calculate(self.Hgs, self.Hcs)
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = Calibration('calibration')
|
||||
try:
|
||||
while rclpy.ok() and not node.done:
|
||||
rclpy.spin_once(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,323 +0,0 @@
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
import cv2
|
||||
import open3d as o3d
|
||||
import numpy as np
|
||||
import transforms3d as tfs
|
||||
from cv_bridge import CvBridge
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
from message_filters import ApproximateTimeSynchronizer, Subscriber
|
||||
|
||||
from sensor_msgs.msg import Image, CameraInfo
|
||||
from geometry_msgs.msg import Pose, Point, Quaternion
|
||||
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
|
||||
def get_map(K, D, camera_size):
|
||||
h, w = camera_size[::-1]
|
||||
K = np.array(K).reshape(3, 3)
|
||||
D = np.array(D)
|
||||
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
|
||||
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
|
||||
|
||||
return map1, map2, new_K.flatten()
|
||||
|
||||
|
||||
def pca(data, sort=True):
|
||||
center = np.mean(data, axis=0)
|
||||
centered_points = data - center # 去中心化
|
||||
|
||||
try:
|
||||
cov_matrix = np.cov(centered_points.T) # 转置
|
||||
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
|
||||
except np.linalg.LinAlgError:
|
||||
return None, None
|
||||
|
||||
if sort:
|
||||
sort = eigenvalues.argsort()[::-1] # 降序排列
|
||||
eigenvalues = eigenvalues[sort] # 索引
|
||||
eigenvectors = eigenvectors[:, sort]
|
||||
|
||||
return eigenvalues, eigenvectors
|
||||
|
||||
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics):
|
||||
"""计算位态"""
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=1000.0,
|
||||
depth_trunc=2,
|
||||
)
|
||||
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.02)
|
||||
clean_pcd, _ = down_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
|
||||
|
||||
if len(clean_pcd.points) == 0:
|
||||
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
|
||||
center = clean_pcd.get_center()
|
||||
x, y, z = center
|
||||
|
||||
w, v = pca(np.asarray(clean_pcd.points))
|
||||
|
||||
if w is not None and v is not None:
|
||||
vx, vy, vz = v[:,0], v[:,1], v[:,2]
|
||||
|
||||
if vx[0] < 0:
|
||||
vx = -vx
|
||||
if vy[1] < 0:
|
||||
vy = -vy
|
||||
if not np.allclose(np.cross(vx, vy), vz):
|
||||
vz = -vz
|
||||
|
||||
R = np.column_stack((vx, vy, vz))
|
||||
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
|
||||
|
||||
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
|
||||
|
||||
# point = [
|
||||
# [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
|
||||
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
|
||||
# ] # 画点:原点、第一主成分、第二主成分
|
||||
# lines = [
|
||||
# [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
|
||||
# ] # 画出三点之间两两连线
|
||||
# colors = [
|
||||
# [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
|
||||
# ]
|
||||
# # 构造open3d中的LineSet对象,用于主成分显示
|
||||
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
|
||||
# line_set.colors = o3d.utility.Vector3dVector(colors)
|
||||
# o3d.visualization.draw_geometries([point_cloud, line_set])
|
||||
# o3d.visualization.draw_geometries([clean_pcd, line_set])
|
||||
|
||||
return x, y, z, rw, rx, ry, rz
|
||||
|
||||
else:
|
||||
return 0.0, 0.0, 0.0, None, None, None, None
|
||||
|
||||
|
||||
def distortion_correction(color_image, depth_image, map1, map2):
|
||||
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
|
||||
undistorted_color = undistorted_color.astype(color_image.dtype)
|
||||
|
||||
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
|
||||
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
|
||||
|
||||
return undistorted_color, undistorted_depth
|
||||
|
||||
|
||||
def crop_mask_bbox(depth_img, mask):
|
||||
"""
|
||||
输入:
|
||||
depth_img: H x W
|
||||
mask: H x W (0/1 或 bool)
|
||||
输出:
|
||||
depth_crop, mask_crop
|
||||
"""
|
||||
high, width = depth_img.shape
|
||||
ys, xs = np.where(mask > 0)
|
||||
|
||||
x_min, x_max = int(round(xs.min())), int(round(xs.max()))
|
||||
y_min, y_max = int(round(ys.min())), int(round(ys.max()))
|
||||
|
||||
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
|
||||
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
|
||||
|
||||
|
||||
class DetectNode(Node):
|
||||
def __init__(self, name):
|
||||
super().__init__(name)
|
||||
self.output_boxes = None
|
||||
self.output_masks = None
|
||||
self.K = self.D = None
|
||||
self.map1 = self.map2 = None
|
||||
self.intrinsics = None
|
||||
|
||||
self.function = self._test_image
|
||||
self.calculate_function = calculate_pose_cpu
|
||||
self.cv_bridge = CvBridge()
|
||||
|
||||
'''init'''
|
||||
self._init_param()
|
||||
self._init_publisher()
|
||||
self._init_subscriber()
|
||||
|
||||
def _init_param(self):
|
||||
"""init parameter"""
|
||||
self.declare_parameter('output_boxes', True)
|
||||
self.output_boxes = self.get_parameter('output_boxes').value
|
||||
|
||||
self.declare_parameter('output_masks', False)
|
||||
self.output_masks = self.get_parameter('output_masks').value
|
||||
|
||||
self.declare_parameter('color_image_topic', '/camera/color/image_raw')
|
||||
self.color_image_topic = self.get_parameter('color_image_topic').value
|
||||
|
||||
self.declare_parameter('depth_image_topic', '/camera/depth/image_raw')
|
||||
self.depth_image_topic = self.get_parameter('depth_image_topic').value
|
||||
|
||||
self.declare_parameter('camera_info_topic', '/camera/color/camera_info')
|
||||
self.camera_info_topic = self.get_parameter('camera_info_topic').value
|
||||
|
||||
def _init_publisher(self):
|
||||
"""init_publisher"""
|
||||
self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose', 10)
|
||||
|
||||
if self.output_boxes or self.output_masks:
|
||||
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
|
||||
|
||||
def _init_subscriber(self):
|
||||
"""init_subscriber"""
|
||||
self.sub_camera_info = self.create_subscription(
|
||||
CameraInfo,
|
||||
self.camera_info_topic,
|
||||
self._camera_info_callback,
|
||||
10
|
||||
)
|
||||
|
||||
'''sync get color and depth img'''
|
||||
self.sub_color_image = Subscriber(self, Image, self.color_image_topic)
|
||||
self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic)
|
||||
|
||||
self.sync_subscriber = ApproximateTimeSynchronizer(
|
||||
[self.sub_color_image, self.sub_depth_image],
|
||||
queue_size=10,
|
||||
slop=0.1
|
||||
)
|
||||
self.sync_subscriber.registerCallback(self._sync_callback)
|
||||
|
||||
def _camera_info_callback(self, msg: CameraInfo):
|
||||
"""Get camera info"""
|
||||
self.K = msg.k
|
||||
self.D = msg.d
|
||||
|
||||
self.camera_size = [msg.width, msg.height]
|
||||
|
||||
if self.K is not None and self.D is not None:
|
||||
self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
|
||||
|
||||
if len(self.D) != 0:
|
||||
self.destroy_subscription(self.sub_camera_info)
|
||||
else:
|
||||
self.D = [0, 0, 0, 0, 0]
|
||||
self.destroy_subscription(self.sub_camera_info)
|
||||
else:
|
||||
raise "K and D are not defined"
|
||||
|
||||
|
||||
def _sync_callback(self, color_img_ros, depth_img_ros):
|
||||
"""同步回调函数"""
|
||||
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
|
||||
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
|
||||
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
|
||||
|
||||
img, pose_dict = self.function(color_img_cv, depth_img_cv)
|
||||
|
||||
"""masks为空,结束这一帧"""
|
||||
if img is None:
|
||||
img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
|
||||
|
||||
if self.output_boxes or self.output_masks:
|
||||
self.pub_detect_image.publish(img)
|
||||
|
||||
if pose_dict:
|
||||
pose_list_all = PoseArrayClassAndID()
|
||||
for (class_id, class_name), pose_list in pose_dict.items():
|
||||
pose_list_all.objects.append(
|
||||
PoseClassAndID(
|
||||
class_name = class_name,
|
||||
class_id = class_id,
|
||||
pose_list = pose_list
|
||||
)
|
||||
)
|
||||
pose_list_all.header.stamp = self.get_clock().now().to_msg()
|
||||
pose_list_all.header.frame_id = "pose_list"
|
||||
self.pub_pose_list.publish(pose_list_all)
|
||||
|
||||
def _test_image(self, rgb_img, depth_img):
|
||||
pose_dict = defaultdict(list)
|
||||
rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
|
||||
pattern_size = (8, 5)
|
||||
# pattern_size = (15, 7)
|
||||
ret, corners = cv2.findChessboardCorners(rgb_img_gray, pattern_size, cv2.CALIB_CB_FAST_CHECK)
|
||||
if ret:
|
||||
# 角点亚像素精确化(提高标定精度)
|
||||
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
|
||||
corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
|
||||
|
||||
corners_subpix = corners_subpix.reshape(pattern_size[1], pattern_size[0], 2)
|
||||
mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
|
||||
|
||||
for i in range(0, pattern_size[1] - 1):
|
||||
for j in range(0, pattern_size[0] - 1):
|
||||
pts = np.array([
|
||||
corners_subpix[i, j],
|
||||
corners_subpix[i, j + 1],
|
||||
corners_subpix[i + 1, j + 1],
|
||||
corners_subpix[i + 1, j]
|
||||
], dtype=np.int32)
|
||||
cv2.fillConvexPoly(mask, pts, 1)
|
||||
|
||||
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
|
||||
orig_shape = rgb_img_gray.shape
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask)
|
||||
|
||||
if depth_crop is None:
|
||||
return None, None
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.K[0],
|
||||
self.K[4],
|
||||
self.K[2] - x_min,
|
||||
self.K[5] - y_min
|
||||
)
|
||||
|
||||
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics)
|
||||
self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_dict[int(99), 'crossboard'].append(pose)
|
||||
|
||||
cv2.putText(rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}', (0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
cv2.putText(rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}', (0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
else:
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = DetectNode('detect')
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,575 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import ast
|
||||
from collections import defaultdict
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
import cv2
|
||||
import open3d as o3d
|
||||
import numpy as np
|
||||
import transforms3d as tfs
|
||||
from cv_bridge import CvBridge
|
||||
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
from sensor_msgs.msg import Image
|
||||
from geometry_msgs.msg import Pose, Point, Quaternion
|
||||
from interfaces.msg import PoseClassAndID, ImgMsg
|
||||
from interfaces.srv import VisionObjectRecognition
|
||||
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
|
||||
def draw_pointcloud(pcd, T):
|
||||
|
||||
R = T[0:3, 0:3]
|
||||
point = T[0:3, 3:4].flatten()
|
||||
x, y, z = R[:, 0], R[:, 1], R[:, 2]
|
||||
|
||||
points = [
|
||||
[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
|
||||
point, point + x, point + y, point + z
|
||||
|
||||
] # 画点:原点、第一主成分、第二主成分
|
||||
lines = [
|
||||
[0, 1], [0, 2], [0, 3],
|
||||
[4, 5], [4, 6], [4, 7]
|
||||
] # 画出三点之间两两连线
|
||||
colors = [
|
||||
[1, 0, 0], [0, 1, 0], [0, 0, 1],
|
||||
[1, 0, 0], [0, 1, 0], [0, 0, 1]
|
||||
]
|
||||
|
||||
line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
|
||||
line_set.colors = o3d.utility.Vector3dVector(colors)
|
||||
|
||||
pcd.append(line_set)
|
||||
o3d.visualization.draw_geometries(pcd)
|
||||
|
||||
|
||||
def preprocess_point_cloud(pcd, voxel_size):
|
||||
|
||||
pcd_down = pcd.voxel_down_sample(voxel_size)
|
||||
radius_normal = voxel_size * 2
|
||||
|
||||
pcd_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
|
||||
|
||||
radius_feature = voxel_size * 5
|
||||
|
||||
pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
|
||||
pcd_down,
|
||||
o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
|
||||
)
|
||||
|
||||
return pcd_down, pcd_fpfh
|
||||
|
||||
def prepare_dataset(source, target, voxel_size):
|
||||
|
||||
trans_init = np.identity(4)
|
||||
source.transform(trans_init)
|
||||
|
||||
source_down, source_fpfh = preprocess_point_cloud(source, voxel_size)
|
||||
target_down, target_fpfh = preprocess_point_cloud(target, voxel_size)
|
||||
|
||||
return source_down, target_down, source_fpfh, target_fpfh
|
||||
|
||||
|
||||
def execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size):
|
||||
|
||||
distance_threshold = voxel_size * 1.5
|
||||
result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
|
||||
source_down,
|
||||
target_down,
|
||||
source_fpfh,
|
||||
target_fpfh,
|
||||
True,
|
||||
distance_threshold,
|
||||
o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
|
||||
3,
|
||||
[
|
||||
o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
|
||||
o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
|
||||
],
|
||||
o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
|
||||
|
||||
return result.transformation
|
||||
|
||||
|
||||
def object_icp(
|
||||
target: o3d.geometry.PointCloud,
|
||||
source: o3d.geometry.PointCloud | str,
|
||||
ransac_voxel_size: float = 0.005,
|
||||
icp_voxel_radius: list[float] | None = None,
|
||||
icp_max_iter: list[int] | None = None,
|
||||
):
|
||||
|
||||
if icp_voxel_radius is None:
|
||||
icp_voxel_radius = [0.004, 0.002, 0.001]
|
||||
|
||||
if icp_max_iter is None:
|
||||
icp_max_iter = [50, 30, 14]
|
||||
|
||||
if isinstance(source, str):
|
||||
source = o3d.io.read_point_cloud(source)
|
||||
elif isinstance(source, o3d.geometry.PointCloud):
|
||||
pass
|
||||
else:
|
||||
raise TypeError(f"Unsupported Type {type(source)}")
|
||||
|
||||
voxel_size = 0.005 # means 5mm for this dataset
|
||||
source_down, target_down, source_fpfh, target_fpfh = prepare_dataset(source, target, voxel_size)
|
||||
|
||||
T = execute_global_registration(
|
||||
source_down,
|
||||
target_down,
|
||||
source_fpfh,
|
||||
target_fpfh,
|
||||
ransac_voxel_size
|
||||
)
|
||||
|
||||
for scale in range(3):
|
||||
iter = icp_max_iter[scale]
|
||||
radius = icp_voxel_radius[scale]
|
||||
# print([iter, radius, scale])
|
||||
|
||||
source_down = source.voxel_down_sample(radius)
|
||||
target_down = target.voxel_down_sample(radius)
|
||||
|
||||
source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
|
||||
target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
|
||||
|
||||
result_icp = o3d.pipelines.registration.registration_icp(
|
||||
source_down,
|
||||
target_down,
|
||||
radius * 5,
|
||||
T,
|
||||
o3d.pipelines.registration.TransformationEstimationPointToPoint(),
|
||||
o3d.pipelines.registration.ICPConvergenceCriteria(
|
||||
relative_fitness=1e-6,
|
||||
relative_rmse=1e-6,
|
||||
max_iteration=iter
|
||||
)
|
||||
)
|
||||
|
||||
T = result_icp.transformation
|
||||
|
||||
draw_pointcloud([source.transform(T), target], T)
|
||||
|
||||
return T
|
||||
|
||||
|
||||
def get_map(K, D, camera_size):
|
||||
h, w = camera_size[::-1]
|
||||
K = np.array(K).reshape(3, 3)
|
||||
D = np.array(D)
|
||||
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
|
||||
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
|
||||
|
||||
return map1, map2, new_K.flatten()
|
||||
|
||||
|
||||
def pca(data, sort=True):
|
||||
"""主成分分析 """
|
||||
center = np.mean(data, axis=0)
|
||||
centered_points = data - center # 去中心化
|
||||
|
||||
try:
|
||||
cov_matrix = np.cov(centered_points.T) # 转置
|
||||
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
|
||||
|
||||
except np.linalg.LinAlgError:
|
||||
return None, None
|
||||
|
||||
if sort:
|
||||
sort = eigenvalues.argsort()[::-1] # 降序排列
|
||||
eigenvalues = eigenvalues[sort] # 索引
|
||||
eigenvectors = eigenvectors[:, sort]
|
||||
|
||||
return eigenvalues, eigenvectors
|
||||
|
||||
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, model_pcd):
|
||||
"""计算位态"""
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=1000.0,
|
||||
depth_trunc=2.0,
|
||||
)
|
||||
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
|
||||
clean_pcd, _ = point_cloud.remove_radius_outlier(nb_points=10, radius=0.1)
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
|
||||
|
||||
if len(clean_pcd.points) == 0:
|
||||
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
|
||||
|
||||
rmat = object_icp(model_pcd, clean_pcd)
|
||||
|
||||
x, y, z = rmat[0:3, 3:4].flatten()
|
||||
|
||||
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
|
||||
|
||||
return x, y, z, rw, rx, ry, rz
|
||||
|
||||
|
||||
def draw_box(set_confidence, rgb_img, result):
|
||||
"""绘制目标检测边界框"""
|
||||
boxes = result.boxes.xywh.cpu().numpy()
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
if confidences[i] >= set_confidence:
|
||||
x_center, y_center, width, height = box[:4]
|
||||
|
||||
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
|
||||
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
|
||||
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
|
||||
|
||||
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
|
||||
|
||||
def draw_mask(set_confidence, rgb_img, result):
|
||||
"""绘制实例分割mask"""
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
|
||||
for i, mask in enumerate(masks):
|
||||
if confidences[i] >= set_confidence:
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
def distortion_correction(color_image, depth_image, map1, map2):
|
||||
"""畸变矫正"""
|
||||
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
|
||||
undistorted_color = undistorted_color.astype(color_image.dtype)
|
||||
|
||||
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
|
||||
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
|
||||
|
||||
return undistorted_color, undistorted_depth
|
||||
|
||||
|
||||
def crop_mask_bbox(depth_img, mask, box):
|
||||
"""
|
||||
输入:
|
||||
depth_img: H x W
|
||||
mask: H x W (0/1 或 bool)
|
||||
输出:
|
||||
depth_crop, mask_crop
|
||||
"""
|
||||
high, width = depth_img.shape
|
||||
x_center, y_center, w, h = box[:4]
|
||||
|
||||
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
|
||||
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
|
||||
|
||||
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
|
||||
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
|
||||
|
||||
|
||||
def calculate_grav_width(mask, K, depth):
|
||||
"""计算重心宽度"""
|
||||
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if contours:
|
||||
box = cv2.boxPoints(cv2.minAreaRect(contours[0]))
|
||||
if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
|
||||
point_diff = box[1] - box[0]
|
||||
else:
|
||||
point_diff = box[1] - box[2]
|
||||
|
||||
grab_width = depth * np.sqrt(point_diff[0]**2 / K[0]**2 + point_diff[1]**2 / K[4]**2)
|
||||
return grab_width
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
|
||||
class DetectNode(Node):
|
||||
def __init__(self, name, mode):
|
||||
super().__init__(name)
|
||||
self.mode = mode
|
||||
self.device = None
|
||||
self.checkpoint_path = None
|
||||
self.checkpoint_name = None
|
||||
self.output_boxes = None
|
||||
self.output_masks = None
|
||||
self.function = None
|
||||
self.calculate_function = None
|
||||
|
||||
self.K = None
|
||||
|
||||
self.fx = self.fy = 0.5
|
||||
self.camera_data = {}
|
||||
|
||||
self.cv_bridge = CvBridge()
|
||||
self.lock = threading.Lock()
|
||||
|
||||
'''init'''
|
||||
self._init_param()
|
||||
|
||||
self.source_model = o3d.io.read_point_cloud(self.source_model_path)
|
||||
|
||||
if mode == 'detect':
|
||||
self._init_model()
|
||||
else:
|
||||
self.function = None
|
||||
self.get_logger().error('Error: Mode Unknown')
|
||||
|
||||
if self.device == 'cpu':
|
||||
self.calculate_function = calculate_pose_cpu
|
||||
elif self.device == 'gpu':
|
||||
raise NotImplementedError('Function: calculate_pose_gpu not implemented')
|
||||
else:
|
||||
raise ValueError(f"device must be cpu or gpu, now {self.device}")
|
||||
|
||||
self._init_publisher()
|
||||
self._init_subscriber()
|
||||
self._init_service()
|
||||
|
||||
def _init_param(self):
|
||||
"""init parameter"""
|
||||
self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
|
||||
self.checkpoint_name = self.get_parameter('checkpoint_name').value
|
||||
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
|
||||
|
||||
self.source_model_path = os.path.join(share_dir, 'pointclouds/bottle_model.pcd')
|
||||
|
||||
self.declare_parameter('output_boxes', True)
|
||||
self.output_boxes = self.get_parameter('output_boxes').value
|
||||
|
||||
self.declare_parameter('output_masks', False)
|
||||
self.output_masks = self.get_parameter('output_masks').value
|
||||
|
||||
self.declare_parameter('set_confidence', 0.25)
|
||||
self.set_confidence = self.get_parameter('set_confidence').value
|
||||
|
||||
self.declare_parameter('device', 'cpu')
|
||||
self.device = self.get_parameter('device').value
|
||||
|
||||
self.declare_parameter('classes', 'None')
|
||||
self.classes = ast.literal_eval(self.get_parameter('classes').value)
|
||||
|
||||
def _init_model(self):
|
||||
"""init model"""
|
||||
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
try:
|
||||
self.model = YOLO(self.checkpoint_path).to(device_model)
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Failed to load YOLO model: {e}')
|
||||
raise
|
||||
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
|
||||
|
||||
if self.checkpoint_name.endswith('-seg.pt'):
|
||||
self.function = self._seg_image
|
||||
else:
|
||||
self.function = None
|
||||
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
|
||||
|
||||
def _init_publisher(self):
|
||||
"""init publisher"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
|
||||
|
||||
def _init_service(self):
|
||||
"""init service server"""
|
||||
self.server = self.create_service(
|
||||
VisionObjectRecognition,
|
||||
"/vision_object_recognition",
|
||||
self._service_callback
|
||||
)
|
||||
|
||||
def _init_subscriber(self):
|
||||
"""init subscriber"""
|
||||
self.sub_img = self.create_subscription(
|
||||
ImgMsg,
|
||||
"/img_msg",
|
||||
self._sub_callback,
|
||||
10
|
||||
)
|
||||
|
||||
def _sub_callback(self, msg):
|
||||
"""同步回调函数"""
|
||||
with self.lock:
|
||||
self.camera_data[msg.position] = [
|
||||
msg.image_color,
|
||||
msg.image_depth,
|
||||
msg.karr,
|
||||
msg.darr
|
||||
]
|
||||
|
||||
def _service_callback(self, request, response):
|
||||
response.header.stamp = self.get_clock().now().to_msg()
|
||||
response.header.frame_id = "camera_detect"
|
||||
|
||||
with self.lock:
|
||||
if request.camera_position in self.camera_data:
|
||||
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
|
||||
else:
|
||||
response.success = False
|
||||
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
self.camera_size = [color_img_ros.width, color_img_ros.height]
|
||||
|
||||
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
|
||||
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
|
||||
|
||||
map1, map2, self.K = get_map(self.K, D, self.camera_size)
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
|
||||
|
||||
img, pose_dict = self.function(color_img_cv, depth_img_cv)
|
||||
|
||||
"""masks为空,结束这一帧"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
if img is None:
|
||||
img = color_img_ros
|
||||
self.pub_detect_image.publish(img)
|
||||
|
||||
if pose_dict:
|
||||
response.info = "Success get pose"
|
||||
response.success = True
|
||||
# self.get_logger().info('get_pose')
|
||||
for (class_id, class_name), pose_list in pose_dict.items():
|
||||
response.objects.append(
|
||||
PoseClassAndID(
|
||||
class_name = class_name,
|
||||
class_id = class_id,
|
||||
pose_list = pose_list
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
response.info = "pose dict is empty"
|
||||
response.success = False
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
|
||||
"""Use segmentation model"""
|
||||
pose_dict = defaultdict(list)
|
||||
|
||||
'''Get Predict Results'''
|
||||
# time1 = time.time()
|
||||
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[39])
|
||||
# time2 = time.time()
|
||||
result = results[0]
|
||||
|
||||
'''Get masks'''
|
||||
if result.masks is None or len(result.masks) == 0:
|
||||
return None, None
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
|
||||
'''Get boxes'''
|
||||
boxes = result.boxes.data.cpu().numpy()
|
||||
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
# time3 = time.time()
|
||||
|
||||
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
|
||||
|
||||
if depth_crop is None:
|
||||
continue
|
||||
|
||||
if mask.shape[0] >= (orig_shape[0] * 0.5) and mask.shape[1] >= (orig_shape[1] * 0.5):
|
||||
mask_crop = cv2.resize(mask_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
|
||||
depth_crop = cv2.resize(depth_crop, None, fx=self.fx, fy=self.fy, interpolation=cv2.INTER_NEAREST)
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0] * self.fx),
|
||||
int(self.camera_size[1] * self.fy),
|
||||
self.K[0] * self.fx,
|
||||
self.K[4] * self.fy,
|
||||
(self.K[2] - x_min) * self.fx,
|
||||
(self.K[5] - y_min) * self.fy
|
||||
)
|
||||
else:
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.K[0],
|
||||
self.K[4],
|
||||
self.K[2] - x_min,
|
||||
self.K[5] - y_min
|
||||
)
|
||||
|
||||
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, self.source_model)
|
||||
|
||||
grab_width = calculate_grav_width(mask, self.K, z)
|
||||
z = z + grab_width * 0.45
|
||||
|
||||
if (x, y, z) == (None, None, None):
|
||||
self.get_logger().error("have wrong pose")
|
||||
continue
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
|
||||
|
||||
# time4 = time.time()
|
||||
|
||||
|
||||
# self.get_logger().info(f'start')
|
||||
# self.get_logger().info(f'{(time2 - time1)*1000} ms, model predict')
|
||||
# self.get_logger().info(f'{(time3 - time2)*1000} ms, get mask and some param')
|
||||
# self.get_logger().info(f'{(time4 - time3)*1000} ms, calculate all mask PCA')
|
||||
# self.get_logger().info(f'{(time4 - time1)*1000} ms, completing a picture entire process')
|
||||
# self.get_logger().info(f'end')
|
||||
|
||||
'''mask_img and box_img is or not output'''
|
||||
if self.output_boxes and not self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif self.output_boxes and self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif not self.output_boxes and self.output_masks:
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
else:
|
||||
return None, pose_dict
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = DetectNode('detect', 'detect')
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,446 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import ast
|
||||
from collections import defaultdict
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
import cv2
|
||||
import open3d as o3d
|
||||
import numpy as np
|
||||
import transforms3d as tfs
|
||||
from cv_bridge import CvBridge
|
||||
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
from sensor_msgs.msg import Image
|
||||
from geometry_msgs.msg import Pose, Point, Quaternion
|
||||
from interfaces.msg import PoseClassAndID, ImgMsg
|
||||
from interfaces.srv import VisionObjectRecognition
|
||||
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
|
||||
def get_map(K, D, camera_size):
|
||||
h, w = camera_size[::-1]
|
||||
K = np.array(K).reshape(3, 3)
|
||||
D = np.array(D)
|
||||
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
|
||||
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
|
||||
|
||||
return map1, map2, new_K.flatten()
|
||||
|
||||
|
||||
def pca(data, sort=True):
|
||||
"""主成分分析 """
|
||||
center = np.mean(data, axis=0)
|
||||
centered_points = data - center # 去中心化
|
||||
|
||||
try:
|
||||
cov_matrix = np.cov(centered_points.T) # 转置
|
||||
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
|
||||
|
||||
except np.linalg.LinAlgError:
|
||||
return None, None
|
||||
|
||||
if sort:
|
||||
sort = eigenvalues.argsort()[::-1] # 降序排列
|
||||
eigenvalues = eigenvalues[sort] # 索引
|
||||
eigenvectors = eigenvectors[:, sort]
|
||||
|
||||
return eigenvalues, eigenvectors
|
||||
|
||||
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
|
||||
"""计算位态"""
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=1000.0,
|
||||
depth_trunc=8.0,
|
||||
)
|
||||
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.022)
|
||||
clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
|
||||
|
||||
if len(clean_pcd.points) == 0:
|
||||
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
|
||||
center = clean_pcd.get_center()
|
||||
x, y, z = center
|
||||
|
||||
w, v = pca(np.asarray(clean_pcd.points))
|
||||
|
||||
if w is not None and v is not None:
|
||||
vx, vy, vz = v[:,0], v[:,1], v[:,2]
|
||||
|
||||
if vx[0] < 0:
|
||||
vx = -vx
|
||||
if vy[1] < 0:
|
||||
vy = -vy
|
||||
if not np.allclose(np.cross(vx, vy), vz):
|
||||
vz = -vz
|
||||
|
||||
R = np.column_stack((vx, vy, vz))
|
||||
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
|
||||
rmat = hand_eye_mat @ rmat
|
||||
|
||||
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
|
||||
x, y, z = rmat[0:3, 3].flatten()
|
||||
|
||||
# point = [
|
||||
# [x, y, z], [x, y, z] + vx, [x, y, z] + vy, [x, y, z] + vz,
|
||||
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]
|
||||
# ] # 画点:原点、第一主成分、第二主成分
|
||||
# lines = [
|
||||
# [0, 1], [0, 2], [0, 3], [4, 5], [4, 6], [4, 7]
|
||||
# ] # 画出三点之间两两连线
|
||||
# colors = [
|
||||
# [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]
|
||||
# ]
|
||||
# # 构造open3d中的LineSet对象,用于主成分显示
|
||||
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(point), lines=o3d.utility.Vector2iVector(lines))
|
||||
# line_set.colors = o3d.utility.Vector3dVector(colors)
|
||||
# o3d.visualization.draw_geometries([point_cloud, line_set])
|
||||
# o3d.visualization.draw_geometries([clean_pcd, line_set])
|
||||
|
||||
return x, y, z, rw, rx, ry, rz
|
||||
|
||||
|
||||
def draw_box(set_confidence, rgb_img, result):
|
||||
"""绘制目标检测边界框"""
|
||||
boxes = result.boxes.xywh.cpu().numpy()
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
if confidences[i] >= set_confidence:
|
||||
x_center, y_center, width, height = box[:4]
|
||||
|
||||
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
|
||||
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
|
||||
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
|
||||
|
||||
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
|
||||
|
||||
def draw_mask(set_confidence, rgb_img, result):
|
||||
"""绘制实例分割mask"""
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
|
||||
for i, mask in enumerate(masks):
|
||||
if confidences[i] >= set_confidence:
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
def distortion_correction(color_image, depth_image, map1, map2):
|
||||
"""畸变矫正"""
|
||||
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
|
||||
undistorted_color = undistorted_color.astype(color_image.dtype)
|
||||
|
||||
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
|
||||
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
|
||||
|
||||
return undistorted_color, undistorted_depth
|
||||
|
||||
|
||||
def crop_mask_bbox(depth_img, mask, box):
|
||||
"""
|
||||
输入:
|
||||
depth_img: H x W
|
||||
mask: H x W (0/1 或 bool)
|
||||
输出:
|
||||
depth_crop, mask_crop
|
||||
"""
|
||||
high, width = depth_img.shape
|
||||
x_center, y_center, w, h = box[:4]
|
||||
|
||||
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
|
||||
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
|
||||
|
||||
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
|
||||
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
|
||||
|
||||
|
||||
|
||||
class DetectNode(Node):
|
||||
def __init__(self, name, mode):
|
||||
super().__init__(name)
|
||||
self.mode = mode
|
||||
self.device = None
|
||||
self.checkpoint_path = None
|
||||
self.checkpoint_name = None
|
||||
self.output_boxes = None
|
||||
self.output_masks = None
|
||||
self.function = None
|
||||
self.calculate_function = None
|
||||
self.eye_in_hand_mat = None
|
||||
self.eye_to_hand_mat = None
|
||||
|
||||
self.K = None
|
||||
|
||||
self.fx = self.fy = 0.5
|
||||
self.camera_data = {}
|
||||
|
||||
self.cv_bridge = CvBridge()
|
||||
self.lock = threading.Lock()
|
||||
|
||||
'''init'''
|
||||
self._init_param()
|
||||
|
||||
|
||||
if mode == 'detect':
|
||||
self._init_model()
|
||||
else:
|
||||
self.function = None
|
||||
self.get_logger().error('Error: Mode Unknown')
|
||||
|
||||
if self.device == 'cpu':
|
||||
self.calculate_function = calculate_pose_cpu
|
||||
elif self.device == 'gpu':
|
||||
raise NotImplementedError('Function: calculate_pose_gpu not implemented')
|
||||
else:
|
||||
raise ValueError(f"device must be cpu or gpu, now {self.device}")
|
||||
|
||||
self._init_publisher()
|
||||
self._init_subscriber()
|
||||
self._init_service()
|
||||
|
||||
def _init_param(self):
|
||||
"""init parameter"""
|
||||
self.declare_parameter('checkpoint_name', 'hivecorebox-seg.pt')
|
||||
self.checkpoint_name = self.get_parameter('checkpoint_name').value
|
||||
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
|
||||
|
||||
self.declare_parameter('output_boxes', True)
|
||||
self.output_boxes = self.get_parameter('output_boxes').value
|
||||
|
||||
self.declare_parameter('output_masks', True)
|
||||
self.output_masks = self.get_parameter('output_masks').value
|
||||
|
||||
self.declare_parameter('set_confidence', 0.60)
|
||||
self.set_confidence = self.get_parameter('set_confidence').value
|
||||
|
||||
self.declare_parameter('device', 'cpu')
|
||||
self.device = self.get_parameter('device').value
|
||||
|
||||
self.declare_parameter('classes', 'None')
|
||||
self.classes = ast.literal_eval(self.get_parameter('classes').value)
|
||||
|
||||
self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
|
||||
0.0, 1.0, 0.0, 0.0,
|
||||
0.0, 0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 0.0, 1.0])
|
||||
self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
|
||||
|
||||
self.declare_parameter('eye_to_hand', [1.0, 0.0, 0.0, 0.0,
|
||||
0.0, 1.0, 0.0, 0.0,
|
||||
0.0, 0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 0.0, 1.0])
|
||||
self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
|
||||
|
||||
def _init_model(self):
|
||||
"""init model"""
|
||||
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
try:
|
||||
self.model = YOLO(self.checkpoint_path).to(device_model)
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Failed to load YOLO model: {e}')
|
||||
raise
|
||||
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
|
||||
|
||||
if self.checkpoint_name.endswith('-seg.pt'):
|
||||
self.function = self._seg_image
|
||||
else:
|
||||
self.function = None
|
||||
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
|
||||
|
||||
def _init_publisher(self):
|
||||
"""init publisher"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
|
||||
|
||||
def _init_service(self):
|
||||
"""init service server"""
|
||||
self.server = self.create_service(
|
||||
VisionObjectRecognition,
|
||||
"/vision_object_recognition",
|
||||
self._service_callback
|
||||
)
|
||||
|
||||
def _init_subscriber(self):
|
||||
"""init subscriber"""
|
||||
self.sub_img = self.create_subscription(
|
||||
ImgMsg,
|
||||
"/img_msg",
|
||||
self._sub_callback,
|
||||
10
|
||||
)
|
||||
|
||||
def _sub_callback(self, msg):
|
||||
"""同步回调函数"""
|
||||
with self.lock:
|
||||
self.camera_data[msg.position] = [
|
||||
msg.image_color,
|
||||
msg.image_depth,
|
||||
msg.karr,
|
||||
msg.darr
|
||||
]
|
||||
|
||||
def _service_callback(self, request, response):
|
||||
response.header.stamp = self.get_clock().now().to_msg()
|
||||
response.header.frame_id = "camera_detect"
|
||||
|
||||
with self.lock:
|
||||
if request.camera_position in self.camera_data:
|
||||
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
|
||||
else:
|
||||
response.success = False
|
||||
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
if request.camera_position == 'left' or request.camera_position == 'right':
|
||||
hand_eye_mat = self.eye_in_hand_mat
|
||||
else:
|
||||
hand_eye_mat = self.eye_to_hand_mat
|
||||
|
||||
self.camera_size = [color_img_ros.width, color_img_ros.height]
|
||||
|
||||
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
|
||||
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
|
||||
|
||||
map1, map2, self.K = get_map(self.K, D, self.camera_size)
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
|
||||
|
||||
img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
|
||||
|
||||
"""masks为空,结束这一帧"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
if img is None:
|
||||
img = color_img_ros
|
||||
self.pub_detect_image.publish(img)
|
||||
|
||||
if pose_dict:
|
||||
response.info = "Success get pose"
|
||||
response.success = True
|
||||
# self.get_logger().info('get_pose')
|
||||
for (class_id, class_name), pose_list in pose_dict.items():
|
||||
response.objects.append(
|
||||
PoseClassAndID(
|
||||
class_name = class_name,
|
||||
class_id = class_id,
|
||||
pose_list = pose_list
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
response.info = "pose dict is empty"
|
||||
response.success = False
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
|
||||
"""Use segmentation model"""
|
||||
pose_dict = defaultdict(list)
|
||||
|
||||
depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
|
||||
depth_filter_mask[(depth_img > 0) & (depth_img < 2000)] = 1
|
||||
|
||||
rgb_img[depth_filter_mask == 0] = 0
|
||||
|
||||
'''Get Predict Results'''
|
||||
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=[0])
|
||||
result = results[0]
|
||||
|
||||
'''Get masks'''
|
||||
if result.masks is None or len(result.masks) == 0:
|
||||
return None, None
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
|
||||
'''Get boxes'''
|
||||
boxes = result.boxes.xywh.cpu().numpy()
|
||||
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
|
||||
|
||||
if depth_crop is None:
|
||||
continue
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.K[0],
|
||||
self.K[4],
|
||||
self.K[2] - x_min,
|
||||
self.K[5] - y_min
|
||||
)
|
||||
|
||||
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
|
||||
|
||||
if (x, y, z) == (None, None, None):
|
||||
self.get_logger().error("have wrong pose")
|
||||
continue
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
|
||||
|
||||
'''mask_img and box_img is or not output'''
|
||||
if self.output_boxes and not self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif self.output_boxes and self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif not self.output_boxes and self.output_masks:
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
else:
|
||||
return None, pose_dict
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = DetectNode('detect', 'detect')
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,415 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
import ast
|
||||
from collections import defaultdict
|
||||
from ament_index_python.packages import get_package_share_directory
|
||||
|
||||
import cv2
|
||||
import open3d as o3d
|
||||
import numpy as np
|
||||
import transforms3d as tfs
|
||||
from cv_bridge import CvBridge
|
||||
|
||||
import torch
|
||||
from ultralytics import YOLO
|
||||
|
||||
import rclpy
|
||||
from rclpy.node import Node
|
||||
|
||||
from sensor_msgs.msg import Image
|
||||
from geometry_msgs.msg import Pose, Point, Quaternion
|
||||
from interfaces.msg import PoseClassAndID, ImgMsg
|
||||
from interfaces.srv import VisionObjectRecognition
|
||||
|
||||
|
||||
share_dir = get_package_share_directory('vision_detect')
|
||||
|
||||
|
||||
def get_map(K, D, camera_size):
|
||||
h, w = camera_size[::-1]
|
||||
K = np.array(K).reshape(3, 3)
|
||||
D = np.array(D)
|
||||
new_K, _ = cv2.getOptimalNewCameraMatrix(K, D, (w, h), 1, (w, h))
|
||||
map1, map2 = cv2.initUndistortRectifyMap(K, D, None, new_K, (w, h), cv2.CV_32FC1)
|
||||
|
||||
return map1, map2, new_K.flatten()
|
||||
|
||||
|
||||
def pca(data, sort=True):
|
||||
"""主成分分析 """
|
||||
center = np.mean(data, axis=0)
|
||||
centered_points = data - center # 去中心化
|
||||
|
||||
try:
|
||||
cov_matrix = np.cov(centered_points.T) # 转置
|
||||
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
|
||||
|
||||
except np.linalg.LinAlgError:
|
||||
return None, None
|
||||
|
||||
if sort:
|
||||
sort = eigenvalues.argsort()[::-1] # 降序排列
|
||||
eigenvalues = eigenvalues[sort] # 索引
|
||||
eigenvectors = eigenvectors[:, sort]
|
||||
|
||||
return eigenvalues, eigenvectors
|
||||
|
||||
def calculate_pose_cpu(mask, depth_img: np.ndarray, intrinsics, hand_eye_mat):
|
||||
"""计算位态"""
|
||||
depth_img_mask = np.zeros_like(depth_img)
|
||||
depth_img_mask[mask > 0] = depth_img[mask > 0]
|
||||
|
||||
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
|
||||
|
||||
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
|
||||
depth=depth_o3d,
|
||||
intrinsic=intrinsics,
|
||||
depth_scale=1000.0,
|
||||
depth_trunc=3.0,
|
||||
)
|
||||
|
||||
point_cloud = point_cloud.remove_non_finite_points()
|
||||
|
||||
down_pcd = point_cloud.voxel_down_sample(voxel_size=0.020)
|
||||
clean_pcd, _ = down_pcd.remove_radius_outlier(nb_points=10, radius=0.1)
|
||||
clean_pcd, _ = clean_pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=3.0)
|
||||
|
||||
if len(clean_pcd.points) == 0:
|
||||
return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
|
||||
center = clean_pcd.get_center()
|
||||
x, y, z = center
|
||||
|
||||
w, v = pca(np.asarray(clean_pcd.points))
|
||||
|
||||
if w is not None and v is not None:
|
||||
vx, vy, vz = v[:,0], v[:,1], v[:,2]
|
||||
|
||||
if vx[0] < 0:
|
||||
vx = -vx
|
||||
if vy[1] < 0:
|
||||
vy = -vy
|
||||
if not np.allclose(np.cross(vx, vy), vz):
|
||||
vz = -vz
|
||||
|
||||
R = np.column_stack((vx, vy, vz))
|
||||
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
|
||||
rmat = hand_eye_mat @ rmat
|
||||
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
|
||||
x, y, z = rmat[0:3, 3].flatten()
|
||||
|
||||
return x, y, z, rw, rx, ry, rz
|
||||
|
||||
else:
|
||||
return 0.0, 0.0, 0.0, None, None, None, None
|
||||
|
||||
|
||||
def draw_box(set_confidence, rgb_img, result):
|
||||
"""绘制目标检测边界框"""
|
||||
boxes = result.boxes.xywh.cpu().numpy()
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
for i, box in enumerate(boxes):
|
||||
if confidences[i] >= set_confidence:
|
||||
x_center, y_center, width, height = box[:4]
|
||||
|
||||
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
|
||||
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
|
||||
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
|
||||
|
||||
cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
|
||||
|
||||
|
||||
def draw_mask(set_confidence, rgb_img, result):
|
||||
"""绘制实例分割mask"""
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
confidences = result.boxes.conf.cpu().numpy()
|
||||
|
||||
for i, mask in enumerate(masks):
|
||||
if confidences[i] >= set_confidence:
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
def distortion_correction(color_image, depth_image, map1, map2):
|
||||
"""畸变矫正"""
|
||||
undistorted_color = cv2.remap(color_image, map1, map2, cv2.INTER_LINEAR)
|
||||
undistorted_color = undistorted_color.astype(color_image.dtype)
|
||||
|
||||
undistorted_depth = cv2.remap(depth_image, map1, map2, cv2.INTER_NEAREST)
|
||||
undistorted_depth = undistorted_depth.astype(depth_image.dtype)
|
||||
|
||||
return undistorted_color, undistorted_depth
|
||||
|
||||
|
||||
def crop_mask_bbox(depth_img, mask, box):
|
||||
"""
|
||||
输入:
|
||||
depth_img: H x W
|
||||
mask: H x W (0/1 或 bool)
|
||||
输出:
|
||||
depth_crop, mask_crop
|
||||
"""
|
||||
high, width = depth_img.shape
|
||||
x_center, y_center, w, h = box[:4]
|
||||
|
||||
x_min, x_max = int(round(x_center - w/2)), int(round(x_center + w/2))
|
||||
y_min, y_max = int(round(y_center - h/2)), int(round(y_center + h/2))
|
||||
|
||||
depth_crop = depth_img[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
mask_crop = mask[max(0, y_min):min(y_max, high) + 1, max(0, x_min):min(x_max, width) + 1]
|
||||
|
||||
return depth_crop, mask_crop, (max(0, x_min), max(0, y_min))
|
||||
|
||||
|
||||
class DetectNode(Node):
|
||||
def __init__(self, name):
|
||||
super().__init__(name)
|
||||
self.device = None
|
||||
self.checkpoint_path = None
|
||||
self.checkpoint_name = None
|
||||
self.output_boxes = None
|
||||
self.output_masks = None
|
||||
self.function = None
|
||||
self.K = None
|
||||
self.eye_in_hand_mat = None
|
||||
self.eye_to_hand_mat = None
|
||||
|
||||
self.calculate_function = calculate_pose_cpu
|
||||
self.fx = self.fy = 0.5
|
||||
self.camera_data = {}
|
||||
self.cv_bridge = CvBridge()
|
||||
self.lock = threading.Lock()
|
||||
|
||||
'''init'''
|
||||
self._init_param()
|
||||
self._init_model()
|
||||
self._init_publisher()
|
||||
self._init_subscriber()
|
||||
self._init_service()
|
||||
|
||||
def _init_param(self):
|
||||
"""init parameter"""
|
||||
self.declare_parameter('checkpoint_name', 'yolo11s-seg.pt')
|
||||
self.checkpoint_name = self.get_parameter('checkpoint_name').value
|
||||
self.checkpoint_path = os.path.join(share_dir, 'checkpoints', self.checkpoint_name)
|
||||
|
||||
self.declare_parameter('output_boxes', True)
|
||||
self.output_boxes = self.get_parameter('output_boxes').value
|
||||
|
||||
self.declare_parameter('output_masks', False)
|
||||
self.output_masks = self.get_parameter('output_masks').value
|
||||
|
||||
self.declare_parameter('set_confidence', 0.25)
|
||||
self.set_confidence = self.get_parameter('set_confidence').value
|
||||
|
||||
self.declare_parameter('classes', 'None')
|
||||
self.classes = ast.literal_eval(self.get_parameter('classes').value)
|
||||
|
||||
self.declare_parameter('eye_in_hand', [1.0, 0.0, 0.0, 0.0,
|
||||
0.0, 1.0, 0.0, 0.0,
|
||||
0.0, 0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 0.0, 1.0])
|
||||
self.eye_in_hand_mat = np.array(self.get_parameter('eye_in_hand').value).reshape(4, 4)
|
||||
|
||||
self.declare_parameter('eye_to_hand', [1, 0, 0, 0,
|
||||
0, 1, 0, 0,
|
||||
0, 0, 1, 0,
|
||||
0, 0, 0, 1])
|
||||
self.eye_to_hand_mat = np.array(self.get_parameter('eye_to_hand').value).reshape(4, 4)
|
||||
|
||||
def _init_model(self):
|
||||
"""init model"""
|
||||
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
try:
|
||||
self.model = YOLO(self.checkpoint_path).to(device_model)
|
||||
except Exception as e:
|
||||
self.get_logger().error(f'Failed to load YOLO model: {e}')
|
||||
raise
|
||||
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
|
||||
|
||||
if self.checkpoint_name.endswith('-seg.pt'):
|
||||
self.function = self._seg_image
|
||||
else:
|
||||
self.function = None
|
||||
self.get_logger().error(f'Unknown checkpoint: {self.checkpoint_name}')
|
||||
|
||||
def _init_publisher(self):
|
||||
"""init publisher"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
|
||||
|
||||
def _init_service(self):
|
||||
"""init service server"""
|
||||
self.server = self.create_service(
|
||||
VisionObjectRecognition,
|
||||
"/vision_object_recognition",
|
||||
self._service_callback
|
||||
)
|
||||
|
||||
def _init_subscriber(self):
|
||||
"""init subscriber"""
|
||||
self.sub_img = self.create_subscription(
|
||||
ImgMsg,
|
||||
"/img_msg",
|
||||
self._sub_callback,
|
||||
10
|
||||
)
|
||||
|
||||
def _sub_callback(self, msg):
|
||||
"""同步回调函数"""
|
||||
with self.lock:
|
||||
self.camera_data[msg.position] = [
|
||||
msg.image_color,
|
||||
msg.image_depth,
|
||||
msg.karr,
|
||||
msg.darr
|
||||
]
|
||||
|
||||
def _service_callback(self, request, response):
|
||||
response.header.stamp = self.get_clock().now().to_msg()
|
||||
response.header.frame_id = "camera_detect"
|
||||
|
||||
with self.lock:
|
||||
if request.camera_position in self.camera_data:
|
||||
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
|
||||
else:
|
||||
response.success = False
|
||||
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
if request.camera_position == 'left' or request.camera_position == 'right':
|
||||
hand_eye_mat = self.eye_in_hand_mat
|
||||
else:
|
||||
hand_eye_mat = self.eye_to_hand_mat
|
||||
|
||||
self.camera_size = [color_img_ros.width, color_img_ros.height]
|
||||
|
||||
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
|
||||
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
|
||||
|
||||
map1, map2, self.K = get_map(self.K, D, self.camera_size)
|
||||
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
|
||||
|
||||
img, pose_dict = self.function(color_img_cv, depth_img_cv, hand_eye_mat)
|
||||
|
||||
"""masks为空,结束这一帧"""
|
||||
if self.output_boxes or self.output_masks:
|
||||
if img is None:
|
||||
img = color_img_ros
|
||||
self.pub_detect_image.publish(img)
|
||||
|
||||
if pose_dict:
|
||||
response.info = "Success get pose"
|
||||
response.success = True
|
||||
for (class_id, class_name), pose_list in pose_dict.items():
|
||||
response.objects.append(
|
||||
PoseClassAndID(
|
||||
class_name = class_name,
|
||||
class_id = class_id,
|
||||
pose_list = pose_list
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
else:
|
||||
response.info = "pose dict is empty"
|
||||
response.success = False
|
||||
response.objects = []
|
||||
|
||||
return response
|
||||
|
||||
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray, hand_eye_mat):
|
||||
"""Use segmentation model"""
|
||||
pose_dict = defaultdict(list)
|
||||
|
||||
'''Get Predict Results'''
|
||||
time1 = time.time()
|
||||
results = self.model(rgb_img, retina_masks=True, conf=self.set_confidence, classes=self.classes)
|
||||
time2 = time.time()
|
||||
result = results[0]
|
||||
|
||||
'''Get masks'''
|
||||
if result.masks is None or len(result.masks) == 0:
|
||||
return None, None
|
||||
masks = result.masks.data.cpu().numpy()
|
||||
orig_shape = result.masks.orig_shape
|
||||
|
||||
'''Get boxes'''
|
||||
boxes = result.boxes.xywh.cpu().numpy()
|
||||
class_ids = result.boxes.cls.cpu().numpy()
|
||||
labels = result.names
|
||||
|
||||
time3 = time.time()
|
||||
|
||||
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
||||
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
|
||||
depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(depth_img, mask, box)
|
||||
|
||||
if depth_crop is None:
|
||||
continue
|
||||
|
||||
intrinsics = o3d.camera.PinholeCameraIntrinsic(
|
||||
int(self.camera_size[0]),
|
||||
int(self.camera_size[1]),
|
||||
self.K[0],
|
||||
self.K[4],
|
||||
self.K[2] - x_min,
|
||||
self.K[5] - y_min
|
||||
)
|
||||
|
||||
x, y, z, rw, rx, ry, rz = self.calculate_function(mask_crop, depth_crop, intrinsics, hand_eye_mat)
|
||||
|
||||
if (x, y, z) != (0.0, 0.0, 0.0):
|
||||
pose = Pose()
|
||||
pose.position = Point(x=x, y=y, z=z)
|
||||
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
|
||||
pose_dict[int(class_ids[i]), labels[class_ids[i]]].append(pose)
|
||||
|
||||
time4 = time.time()
|
||||
|
||||
self.get_logger().info(f'start')
|
||||
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
|
||||
self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
|
||||
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
|
||||
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
|
||||
self.get_logger().info(f'end')
|
||||
|
||||
'''mask_img and box_img is or not output'''
|
||||
if self.output_boxes and not self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif self.output_boxes and self.output_masks:
|
||||
draw_box(self.set_confidence, rgb_img, result)
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
elif not self.output_boxes and self.output_masks:
|
||||
draw_mask(self.set_confidence, rgb_img, result)
|
||||
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_dict
|
||||
else:
|
||||
return None, pose_dict
|
||||
|
||||
|
||||
def main(args=None):
|
||||
rclpy.init(args=args)
|
||||
node = DetectNode('detect')
|
||||
try:
|
||||
rclpy.spin(node)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
node.destroy_node()
|
||||
rclpy.shutdown()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user