diff --git a/test/D.json b/test/D.json
new file mode 100644
index 0000000..a0e9a3e
--- /dev/null
+++ b/test/D.json
@@ -0,0 +1 @@
+[0.0, 0.0, 0.0, 0.0, 0.0]
\ No newline at end of file
diff --git a/test/K.json b/test/K.json
new file mode 100644
index 0000000..a26375e
--- /dev/null
+++ b/test/K.json
@@ -0,0 +1 @@
+[915.5315551757812, 0.0, 640.0259399414062, 0.0, 915.300537109375, 362.9521789550781, 0.0, 0.0, 1.0]
\ No newline at end of file
diff --git a/test/color_image.png b/test/color_image.png
new file mode 100644
index 0000000..273351a
Binary files /dev/null and b/test/color_image.png differ
diff --git a/test/depth_image.png b/test/depth_image.png
new file mode 100644
index 0000000..af2f72a
Binary files /dev/null and b/test/depth_image.png differ
diff --git a/test/eye_in_right_hand.json b/test/eye_in_right_hand.json
new file mode 100644
index 0000000..1837548
--- /dev/null
+++ b/test/eye_in_right_hand.json
@@ -0,0 +1,28 @@
+{
+ "T": [
+ [
+ 0.013635791720580838,
+ 0.9992765703636767,
+ -0.03550212819481636,
+ -0.08645650535173793
+ ],
+ [
+ -0.9998865276218899,
+ 0.013399556063222661,
+ -0.006883587549256321,
+ 0.036196137264974
+ ],
+ [
+ -0.006402895000908794,
+ 0.03559196285001416,
+ 0.999345893630474,
+ 0.014407883180676354
+ ],
+ [
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0
+ ]
+ ]
+}
\ No newline at end of file
diff --git a/test/yolo11s-seg.pt b/test/yolo11s-seg.pt
new file mode 100644
index 0000000..d7be62d
Binary files /dev/null and b/test/yolo11s-seg.pt differ
diff --git a/tools/calculate_pose.py b/tools/calculate_pose.py
new file mode 100644
index 0000000..51fd978
--- /dev/null
+++ b/tools/calculate_pose.py
@@ -0,0 +1,285 @@
+import json
+import time
+
+import cv2
+import open3d as o3d
+import numpy as np
+import torch
+import transforms3d as tfs
+
+from ultralytics import YOLO
+
+import rclpy
+from rclpy.node import Node
+
+
+# x=0.08685893262849026, y=0.03733333467952511, z=0.5707736129272428
+# x=-0.04385456738166652, y=-0.6302417335103124, z=-0.07781808527160532, w=0.7712434634188767
+
+
+def main():
+ color_image = cv2.imread("test/color_image.png")
+ depth_image = cv2.imread("test/depth_image.png", cv2.IMREAD_UNCHANGED)
+ with open("test/K.json", 'r') as f:
+ k = json.load(f)
+ with open("test/D.json", 'r') as f:
+ d = json.load(f)
+ with open("test/eye_in_right_hand.json", 'r') as f:
+ hand_eye_mat = np.array(json.load(f)["T"])
+
+ camera_size = [color_image.shape[1], color_image.shape[0]]
+
+ # cv2.imshow("color_image", color_image)
+ # cv2.waitKey(0)
+ # cv2.destroyAllWindows()
+ # cv2.imshow("depth_image", depth_image)
+ # cv2.waitKey(0)
+ # cv2.destroyAllWindows()
+ # print(k)
+ # print(d)
+ # print(eye_in_right_hand)
+
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+ model = YOLO("test/yolo11s-seg.pt").to(device)
+
+ results = model(color_image, retina_masks=True, conf=0.5, classes=[39])
+ result = results[0]
+
+ # Get masks
+ if result.masks is None or len(result.masks) == 0:
+ return None, None
+ masks = result.masks.data.cpu().numpy()
+
+ # Get boxes
+ boxes = result.boxes.xywh.cpu().numpy()
+ class_ids = result.boxes.cls.cpu().numpy()
+ labels = result.names
+ print(f"Detect object num: {len(masks)}")
+
+
+
+ for i, (mask, box) in enumerate(zip(masks, boxes)):
+ imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_image, mask], box, True)
+ depth_crop, mask_crop = imgs_crop
+
+ if depth_crop is None:
+ continue
+
+ intrinsics = o3d.camera.PinholeCameraIntrinsic(
+ int(camera_size[0]),
+ int(camera_size[1]),
+ k[0],
+ k[4],
+ k[2] - x_min,
+ k[5] - y_min
+ )
+ time1 = time.time()
+ rmat = calculate_pose_pca(
+ mask_crop,
+ depth_crop,
+ intrinsics,
+ )
+ time2 = time.time()
+ print(f"PCA: {(time2 - time1) * 1000} ms")
+
+ grab_width = calculate_grav_width(mask.astype(np.uint8) * 255, k, rmat[2, 3])
+ rmat[2, 3] = rmat[2, 3] + grab_width * 0.22
+
+ rmat = hand_eye_mat @ rmat
+
+ x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
+
+ pose_list = []
+ if (x, y, z) != (0.0, 0.0, 0.0):
+ pose = [x, y, z, rx, ry, rz, rw]
+ pose_list.append(
+ {
+ "class_id": int(class_ids[i]),
+ "class_name": labels[class_ids[i]],
+ "pose": pose,
+ "grab_width": grab_width * 1.05
+ }
+ )
+
+
+def crop_imgs_box_xywh(imgs: list, box, same_sign: bool = False):
+ """
+ Crop imgs
+
+ input:
+ imgs: list, Each img in imgs has the same Width and High.
+ box: The YOLO model outputs bounding box data in the format [x, y, w, h, confidence, class_id].
+ same_sign: bool, Set True to skip size check if all img in imgs have the same Width and High.
+ output:
+ crop_imgs: list;
+ (x_min, y_min);
+ """
+ if not imgs:
+ print("imgs is empty")
+ return [], (0, 0)
+
+ if not same_sign and len(imgs) != 1:
+ for img in imgs:
+ if imgs[0].shape != img.shape:
+ raise ValueError(f"Img shape are different: {imgs[0].shape} - {img.shape}")
+
+ high, width = imgs[0].shape[:2]
+ x_center, y_center, w, h = box[:4]
+ x_min, x_max = max(0, int(round(x_center - w/2))), min(int(round(x_center + w/2)), width-1)
+ y_min, y_max = max(0, int(round(y_center - h/2))), min(int(round(y_center + h/2)), high-1)
+
+ crop_imgs = [img[y_min:y_max + 1, x_min:x_max + 1] for img in imgs]
+
+ return crop_imgs, (x_min, y_min)
+
+
+def pca(data: np.ndarray, sort=True):
+ """主成分分析 """
+ center = np.mean(data, axis=0)
+ centered_points = data - center # 去中心化
+
+ try:
+ cov_matrix = np.cov(centered_points.T) # 转置
+ eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
+
+ except np.linalg.LinAlgError:
+ return None, None
+
+ if sort:
+ sort = eigenvalues.argsort()[::-1] # 降序排列
+ eigenvalues = eigenvalues[sort] # 索引
+ eigenvectors = eigenvectors[:, sort]
+
+ return eigenvalues, eigenvectors
+
+
+def calculate_pose_pca(
+ mask,
+ depth_img: np.ndarray,
+ intrinsics,
+ source=None,
+ configs=None,
+ **kwargs
+):
+ """计算位态"""
+ if configs is None:
+ configs = {
+ "depth_scale": 1000.0,
+ "depth_trunc": 3.0,
+ "voxel_size": 0.010,
+ "nb_points": 16,
+ "radius": 0.03,
+ "nb_neighbors": 10,
+ "std_ratio": 3.0
+ }
+ depth_img_mask = np.zeros_like(depth_img)
+ depth_img_mask[mask > 0] = depth_img[mask > 0]
+
+ depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
+
+ point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
+ depth=depth_o3d,
+ intrinsic=intrinsics,
+ depth_scale=configs["depth_scale"],
+ depth_trunc=configs["depth_trunc"],
+ )
+
+ point_cloud = point_cloud.remove_non_finite_points()
+
+ down_pcd = point_cloud.voxel_down_sample(voxel_size=configs["voxel_size"])
+ clean_pcd, _ = down_pcd.remove_statistical_outlier(
+ nb_neighbors=configs["nb_neighbors"],
+ std_ratio=configs["std_ratio"]
+ )
+ clean_pcd, _ = clean_pcd.remove_radius_outlier(
+ nb_points=configs["nb_points"],
+ radius=configs["radius"]
+ )
+ labels = np.array(clean_pcd.cluster_dbscan(eps=0.03, min_points=16))
+ largest_cluster_idx = np.argmax(np.bincount(labels[labels >= 0]))
+ clean_pcd = clean_pcd.select_by_index(np.where(labels == largest_cluster_idx)[0])
+
+
+ if len(clean_pcd.points) == 0:
+ print("clean_pcd is empty")
+ return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+ center = clean_pcd.get_center()
+ x, y, z = center
+
+ w, v = pca(np.asarray(clean_pcd.points))
+
+ if w is None or v is None:
+ print("PCA output w or v is None")
+ return np.eye(4)
+
+ vx, vy, vz = v[:, 0], v[:, 1], v[:, 2]
+
+ if vx[0] < 0:
+ vx = -vx
+ if vy[1] < 0:
+ vy = -vy
+ if not np.allclose(np.cross(vx, vy), vz):
+ vz = -vz
+
+ R = np.column_stack((vx, vy, vz))
+ rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z+0.06*0.25))), R, [1, 1, 1])
+
+ draw(point_cloud, rmat)
+ # draw(down_pcd, rmat)
+ draw(clean_pcd, rmat)
+
+ return rmat
+
+
+def rmat2quat(rmat):
+ """Convert rotation matrix to quaternion."""
+ x, y, z = rmat[0:3, 3:4].flatten()
+ rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
+ quat = [x, y, z, rw, rx, ry, rz]
+ return quat
+
+
+def calculate_grav_width(mask, k, depth):
+ """计算重心宽度"""
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ if not contours:
+ return 0.0
+
+ c = max(contours, key=cv2.contourArea)
+ box = cv2.boxPoints(cv2.minAreaRect(c))
+ if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]):
+ point_diff = box[1] - box[0]
+ else:
+ point_diff = box[1] - box[2]
+
+ grab_width = depth * np.sqrt(
+ point_diff[0] ** 2 / k[0] ** 2 + point_diff[1] ** 2 / k[4] ** 2
+ )
+ return grab_width
+
+def draw(pcd, mat):
+ R = mat[0:3, 0:3]
+ point = mat[0:3, 3:4].flatten()
+ x, y, z = R[:, 0], R[:, 1], R[:, 2]
+
+ points = [
+ [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
+ point, point + x, point + y, point + z
+
+ ] # 画点:原点、第一主成分、第二主成分
+ lines = [
+ [0, 1], [0, 2], [0, 3],
+ [4, 5], [4, 6], [4, 7]
+ ] # 画出三点之间两两连线
+ colors = [
+ [1, 0, 0], [0, 1, 0], [0, 0, 1],
+ [1, 0, 0], [0, 1, 0], [0, 0, 1]
+ ]
+ line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
+ line_set.colors = o3d.utility.Vector3dVector(colors)
+
+ o3d.visualization.draw_geometries([pcd, line_set])
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/get_image.py b/tools/get_image.py
new file mode 100644
index 0000000..c5a9bc2
--- /dev/null
+++ b/tools/get_image.py
@@ -0,0 +1,50 @@
+import threading
+import json
+
+import cv2
+
+import rclpy
+from rclpy.node import Node
+from cv_bridge import CvBridge
+
+from interfaces.msg import ImgMsg
+
+
+lock = threading.Lock()
+cv_bridge = CvBridge()
+sign = True
+
+
+def main():
+ rclpy.init()
+ node = Node("get_image")
+ sub = node.create_subscription(ImgMsg, "/img_msg", sub_callback, 10)
+ node.get_logger().info("Waiting for image...")
+ while sign:
+ rclpy.spin_once(node)
+ node.get_logger().info("Image received")
+ node.destroy_node()
+ rclpy.shutdown()
+
+
+def sub_callback(msg):
+ global sign
+ camera_data = [
+ msg.image_color,
+ msg.image_depth,
+ msg.karr,
+ msg.darr
+ ]
+
+ cv2.imwrite("test/color_image.png", cv_bridge.imgmsg_to_cv2(camera_data[0]))
+ cv2.imwrite("test/depth_image.png", cv_bridge.imgmsg_to_cv2(camera_data[1]))
+ with open("test/K.json", "w") as f:
+ f.write(json.dumps(camera_data[2].tolist()))
+ with open("test/D.json", "w") as f:
+ f.write(json.dumps(camera_data[3].tolist()))
+
+ sign = False
+
+
+if __name__ == '__main__':
+ main()
diff --git a/vision_detect/.idea/.gitignore b/vision_detect/.idea/.gitignore
deleted file mode 100644
index 35410ca..0000000
--- a/vision_detect/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# 默认忽略的文件
-/shelf/
-/workspace.xml
-# 基于编辑器的 HTTP 客户端请求
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/vision_detect/.idea/cv_part.iml b/vision_detect/.idea/cv_part.iml
deleted file mode 100644
index 5b05384..0000000
--- a/vision_detect/.idea/cv_part.iml
+++ /dev/null
@@ -1,11 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/vision_detect/.idea/dictionaries/project.xml b/vision_detect/.idea/dictionaries/project.xml
deleted file mode 100644
index 7f00eca..0000000
--- a/vision_detect/.idea/dictionaries/project.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
- rclpy
-
-
-
\ No newline at end of file
diff --git a/vision_detect/.idea/inspectionProfiles/profiles_settings.xml b/vision_detect/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/vision_detect/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/vision_detect/.idea/misc.xml b/vision_detect/.idea/misc.xml
deleted file mode 100644
index 9de2865..0000000
--- a/vision_detect/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/vision_detect/.idea/modules.xml b/vision_detect/.idea/modules.xml
deleted file mode 100644
index 9bcae4c..0000000
--- a/vision_detect/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/vision_detect/vision_detect/VisionDetect/node/detect_node.py b/vision_detect/vision_detect/VisionDetect/node/detect_node.py
index 6db2c80..93c6e2d 100644
--- a/vision_detect/vision_detect/VisionDetect/node/detect_node.py
+++ b/vision_detect/vision_detect/VisionDetect/node/detect_node.py
@@ -158,10 +158,12 @@ class DetectNode(Node):
self.calculate_function = calculate_pose_pca
elif self.calculate_mode == "ICP":
self.configs = json.loads(self.get_parameter('ICA_configs').value)
- self.source = o3d.io.read_point_cloud(
+ source = o3d.io.read_point_cloud(
os.path.join(share_dir, self.configs['complete_model_path'])
)
+ self.configs["source"] = source
self.calculate_function = calculate_pose_icp
+
else:
self.get_logger().warning("Unknown calculate_mode, use PCA")
self.configs = json.loads(self.get_parameter('PCA_configs').value)
@@ -297,6 +299,7 @@ class DetectNode(Node):
def _service_sub_callback(self, msgs):
"""同步回调函数"""
with self.lock:
+ # self.get_logger().info("get msgs")
self.camera_data[msgs.position] = [
msgs.image_color,
msgs.image_depth,
@@ -336,6 +339,7 @@ class DetectNode(Node):
self.pub_pose_list.publish(pose_list_all)
def _service_callback(self, request, response):
+ # self.get_logger().info("service callback start")
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
@@ -343,10 +347,19 @@ class DetectNode(Node):
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position]
else:
+ if len(self.camera_data) == 0:
+ response.success = False
+ response.info = "Camera data have not objects"
+ response.objects = []
+
+ # self.get_logger().info("service callback done")
+ return response
+
response.success = False
- response.info = f"{request.camera_position} Camera data is empty or name is wrong"
+ response.info = f"Name is wrong: {request.camera_position}"
response.objects = []
+ # self.get_logger().info("service callback done")
return response
if request.camera_position == 'left':
@@ -389,6 +402,7 @@ class DetectNode(Node):
response.success = False
response.objects = []
+ # self.get_logger().info("service callback done")
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
@@ -403,6 +417,7 @@ class DetectNode(Node):
# Get masks
if result.masks is None or len(result.masks) == 0:
+ self.get_logger().info(f"Detect object num: 0")
return None, None
masks = result.masks.data.cpu().numpy()
@@ -435,12 +450,14 @@ class DetectNode(Node):
mask_crop,
depth_crop,
intrinsics,
- self.source,
- self.configs
+ **self.configs
)
+ if rmat is None:
+ self.get_logger().warning("Object point cloud have too many noise")
+ continue
grab_width = calculate_grav_width(mask, self.k, rmat[2, 3])
- rmat[2, 3] = rmat[2, 3] + grab_width * 0.22
+ rmat[2, 3] = rmat[2, 3] + grab_width * 0.38
rmat = self.hand_eye_mat @ rmat
@@ -463,14 +480,16 @@ class DetectNode(Node):
self.get_logger().info('start')
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
- self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
+ # self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
self.get_logger().info('end')
# mask_img and box_img is or not output
if self.output_boxes and not self.output_masks:
- draw_box(rgb_img, result)
+ home = os.path.expanduser("~")
+ save_path = os.path.join(home, "detect_image.png")
+ draw_box(rgb_img, result, save_path=save_path)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
elif self.output_boxes and self.output_masks:
draw_box(rgb_img, result)
@@ -519,8 +538,7 @@ class DetectNode(Node):
mask_crop,
depth_crop,
intrinsics,
- self.source,
- self.configs
+ **self.configs
)
rmat = self.hand_eye_mat @ rmat
@@ -585,8 +603,7 @@ class DetectNode(Node):
mask_crop,
depth_crop,
intrinsics,
- self.source,
- self.configs
+ **self.configs
)
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
diff --git a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
index 683c667..1f47ba5 100644
--- a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py
@@ -37,20 +37,9 @@ def calculate_pose_pca(
mask,
depth_img: np.ndarray,
intrinsics,
- source=None,
- configs=None
+ **kwargs
):
- """计算位态"""
- if configs is None:
- configs = {
- "depth_scale": 1000.0,
- "depth_trunc": 3.0,
- "voxel_size": 0.020,
- "nb_points": 10,
- "radius": 0.1,
- "nb_neighbors": 20,
- "std_ratio": 3.0
- }
+ """点云主成分分析法计算位态"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -59,29 +48,21 @@ def calculate_pose_pca(
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
- depth_scale=configs["depth_scale"],
- depth_trunc=configs["depth_trunc"],
+ depth_scale=kwargs.get("depth_scale", 1000.0),
+ depth_trunc=kwargs.get("depth_trunc", 3.0),
)
- point_cloud = point_cloud.remove_non_finite_points()
+ point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
+ if point_cloud is None:
+ return None
- down_pcd = point_cloud.voxel_down_sample(voxel_size=configs["voxel_size"])
- clean_pcd, _ = down_pcd.remove_radius_outlier(
- nb_points=configs["nb_points"],
- radius=configs["radius"]
- )
- clean_pcd, _ = clean_pcd.remove_statistical_outlier(
- nb_neighbors=configs["nb_neighbors"],
- std_ratio=configs["std_ratio"]
- )
-
- if len(clean_pcd.points) == 0:
+ if len(point_cloud.points) == 0:
logging.warning("clean_pcd is empty")
- return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
- center = clean_pcd.get_center()
+ return np.eye(4)
+ center = point_cloud.get_center()
x, y, z = center
- w, v = pca(np.asarray(clean_pcd.points))
+ w, v = pca(np.asarray(point_cloud.points))
if w is None or v is None:
logging.warning("PCA output w or v is None")
@@ -99,6 +80,9 @@ def calculate_pose_pca(
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
+ # draw(point_cloud, rmat)
+ # draw(point_cloud_1, rmat)
+
return rmat
@@ -106,22 +90,9 @@ def calculate_pose_icp(
mask,
depth_img: np.ndarray,
intrinsics,
- source,
- configs = None,
+ **kwargs
):
- if configs is None:
- configs = {
- "depth_scale": 1000.0,
- "depth_trunc": 2.0,
- "nb_points": 10,
- "radius": 0.1,
- "nb_neighbors": 20,
- "std_ratio": 3.0,
- "ransac_voxel_size": 0.005,
- "icp_voxel_radius": [0.004, 0.002, 0.001],
- "icp_max_iter": [50, 30, 14]
- }
- """计算位态"""
+ """点云配准法计算位姿"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
@@ -130,38 +101,92 @@ def calculate_pose_icp(
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
- depth_scale=configs["depth_scale"],
- depth_trunc=configs["depth_trunc"],
+ depth_scale=kwargs.get("depth_scale", 1000.0),
+ depth_trunc=kwargs.get("depth_trunc", 3.0)
)
- point_cloud = point_cloud.remove_non_finite_points()
+ point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010))
+ if point_cloud is None:
+ return None
- clean_pcd, _ = point_cloud.remove_radius_outlier(
- nb_points=configs["nb_points"],
- radius=configs["radius"]
- )
- clean_pcd, _ = clean_pcd.remove_statistical_outlier(
- nb_neighbors=configs["nb_neighbors"],
- std_ratio=configs["std_ratio"]
- )
-
- if len(clean_pcd.points) == 0:
+ if len(point_cloud.points) == 0:
logging.warning("clean_pcd is empty")
return np.eye(4)
rmat = object_icp(
- source,
- clean_pcd,
- ransac_voxel_size=configs["ransac_voxel_size"],
- icp_voxel_radius=configs["icp_voxel_radius"],
- icp_max_iter=configs["icp_max_iter"]
+ kwargs.get("source"),
+ point_cloud,
+ ransac_voxel_size=kwargs.get("ransac_voxel_size", 0.005),
+ icp_voxel_radius=kwargs.get("icp_voxel_radius", [0.004, 0.002, 0.001]),
+ icp_max_iter=kwargs.get("icp_max_iter", [50, 30, 14])
)
return rmat
+def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.010):
+ """点云去噪"""
+ point_cloud = point_cloud.remove_non_finite_points()
+ down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size)
+
+ # 半径滤波
+ clean_pcd, _ = down_pcd.remove_radius_outlier(
+ nb_points=10,
+ radius=voxel_size * 5
+ )
+
+ # 统计滤波
+ clean_pcd, _ = clean_pcd.remove_statistical_outlier(
+ nb_neighbors=10,
+ std_ratio=2.0
+ )
+
+ # 过滤过近的点
+ points = np.asarray(clean_pcd.points)
+ clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
+
+ # # 使用数量最大簇判定噪声强度
+ # _, counts = np.unique(labels[labels >= 0], return_counts=True)
+ # largest_cluster_ratio = counts.max() / len(labels)
+ # if largest_cluster_ratio < 0.5:
+ # return None
+
+ labels = np.array(clean_pcd.cluster_dbscan(eps=voxel_size * 5, min_points=10))
+ if len(labels[labels >= 0]) == 0:
+ return clean_pcd
+
+ # 使用距离最近簇作为物体
+ points = np.asarray(clean_pcd.points)
+ cluster_label = set(labels)
+ point_cloud_clusters = []
+ for label in cluster_label:
+ if label == -1:
+ continue
+ idx = np.where(labels == label)[0]
+ point_cloud_cluster = clean_pcd.select_by_index(idx)
+ points_cluster_z = points[idx, 2]
+ z_avg = np.mean(points_cluster_z)
+ if z_avg < 0.2:
+ continue
+ point_cloud_clusters.append((point_cloud_cluster, z_avg))
+
+ if len(point_cloud_clusters) == 0:
+ return clean_pcd
+
+ point_cloud_clusters.sort(key=lambda x: x[1])
+ clean_pcd = point_cloud_clusters[0][0]
+
+ # 使用最近簇判断噪音强度
+ largest_cluster_ratio = len(clean_pcd.points) / len(points)
+ if largest_cluster_ratio < 0.2:
+ return None
+
+ return clean_pcd
+
+
def calculate_grav_width(mask, k, depth):
"""计算重心宽度"""
+ mask = mask.astype(np.uint8) * 255
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
@@ -194,3 +219,26 @@ def quat2rmat(quat):
r = tfs.quaternions.quat2mat([rw, rx, ry, rz])
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), r, [1, 1, 1])
return rmat
+
+# def draw(pcd, mat):
+# R = mat[0:3, 0:3]
+# point = mat[0:3, 3:4].flatten()
+# x, y, z = R[:, 0], R[:, 1], R[:, 2]
+#
+# points = [
+# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
+# point, point + x, point + y, point + z
+#
+# ] # 画点:原点、第一主成分、第二主成分
+# lines = [
+# [0, 1], [0, 2], [0, 3],
+# [4, 5], [4, 6], [4, 7]
+# ] # 画出三点之间两两连线
+# colors = [
+# [1, 0, 0], [0, 1, 0], [0, 0, 1],
+# [1, 0, 0], [0, 1, 0], [0, 0, 1]
+# ]
+# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
+# line_set.colors = o3d.utility.Vector3dVector(colors)
+#
+# o3d.visualization.draw_geometries([pcd, line_set])
diff --git a/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py b/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py
index 4a4918c..ee62fb5 100644
--- a/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py
+++ b/vision_detect/vision_detect/VisionDetect/utils/draw_tools.py
@@ -1,15 +1,23 @@
+import os
import logging
+from typing import Union
import cv2
import numpy as np
import open3d as o3d
+
__all__ = [
"draw_box", "draw_mask", "draw_pointcloud",
]
-def draw_box(rgb_img: np.ndarray, segmentation_result, put_text: bool = True):
+def draw_box(
+ rgb_img: np.ndarray,
+ segmentation_result,
+ put_text: bool = True,
+ save_path: Union[bool, str] = False
+):
"""
绘制目标检测边界框
"""
@@ -26,9 +34,23 @@ def draw_box(rgb_img: np.ndarray, segmentation_result, put_text: bool = True):
cv2.rectangle(rgb_img, p1, p2, (255, 255, 0), 2)
if put_text:
- cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}', (p1[0], p1[1] - 10),
+ cv2.putText(rgb_img, f'{labels[class_ids[i]]}: {confidences[i]*100:.2f}',
+ (p1[0], p1[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
+ (255, 255, 0), 2)
+
+ cv2.putText(rgb_img, f"{i}", (p1[0] + 5, p1[1] + 15),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
+ if save_path:
+ if isinstance(save_path, str):
+ dir_path = os.path.dirname(save_path)
+ if dir_path:
+ os.makedirs(dir_path, exist_ok=True)
+ else:
+ home_path = os.path.expanduser("~")
+ save_path = os.path.join(home_path, "detect_color_img.png")
+ cv2.imwrite(save_path, rgb_img)
+
def draw_mask(rgb_img: np.ndarray, segmentation_result):
"""
@@ -57,7 +79,11 @@ def draw_pointcloud(pcd, axis: bool = True):
[1, 0, 0], [0, 1, 0], [0, 0, 1]
]
# 构造open3d中的LineSet对象,用于主成分显示
- axis_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(axis_point), lines=o3d.utility.Vector2iVector(axis))
+ axis_set = o3d.geometry.LineSet(
+ points=o3d.utility.Vector3dVector(axis_point),
+ lines=o3d.utility.Vector2iVector(axis)
+ )
+
axis_set.colors = o3d.utility.Vector3dVector(axis_colors)
pcd.append(axis_set)
diff --git a/vision_detect/vision_detect/flexivaidk_detect_service.py b/vision_detect/vision_detect/flexivaidk_detect_service.py
index a03fb97..4fb957c 100644
--- a/vision_detect/vision_detect/flexivaidk_detect_service.py
+++ b/vision_detect/vision_detect/flexivaidk_detect_service.py
@@ -322,45 +322,42 @@ class DetectNode(Node):
time3 = time.time()
- rgb_bytes = cv2.imencode('.png', rgb_img)[1]
- depth_bytes = cv2.imencode('.png', depth_img)[1]
+ # rgb_bytes = cv2.imencode('.png', rgb_img)[1]
+ # depth_bytes = cv2.imencode('.png', depth_img)[1]
for i, (mask, box) in enumerate(zip(masks, boxes)):
mask = cv2.resize(mask.astype(np.uint8), orig_shape[::-1], interpolation=cv2.INTER_NEAREST)
+ rgb_crop, depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(rgb_img, depth_img, mask, box)
+
+ if depth_crop is None:
+ self.get_logger().error("depth_crop is None")
+ continue
+
+ depth_img_crop_mask = np.zeros_like(depth_crop)
+ depth_img_crop_mask[mask_crop > 0] = depth_crop[mask_crop > 0]
+
+ rgb_bytes = cv2.imencode('.png', rgb_crop)[1]
+ depth_bytes = cv2.imencode('.png', depth_img_crop_mask)[1]
x_center, y_center, width, height = box[:4]
p1 = [int((x_center - width / 2)), int((y_center - height / 2))]
p2 = [int((x_center + width / 2)), int((y_center + height / 2))]
cv2.rectangle(img, p1, p2, (255, 255, 0), 2)
-
- # rgb_crop, depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(rgb_img, depth_img, mask, box)
+ cv2.putText(img, f"{i}", (p1[0], p1[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,
+ 1, (255, 255, 0), 2)
- # if depth_crop is None:
- # self.get_logger().error("depth_crop is None")
- # continue
-
- # depth_img_crop_mask = np.zeros_like(depth_crop)
- # depth_img_crop_mask[mask_crop > 0] = depth_crop[mask_crop > 0]
-
- # print(rgb_crop.shape)
- # print(rgb_crop.dtype)
-
-
- # rgb_bytes = cv2.imencode('.png', rgb_crop)[1]
- # depth_bytes = cv2.imencode('.png', depth_img_crop_mask)[1]
-
- contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- mask_contours = contours[0].reshape(1, -1, 2)
- res = self.aidk_client.set_direct_setting_variables({"one_mask": f"{mask_contours}"})
+ # contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ # mask_contours = contours[0].reshape(1, -1, 2)
+ # res = self.aidk_client.set_direct_setting_variables({"one_mask": f"{mask_contours}"})
intrinsics = [
int(self.camera_size[0]),
int(self.camera_size[1]),
- # self.K[2] - x_min,
- # self.K[5] - y_min,
- self.K[2],
- self.K[5],
+ self.K[2] - x_min,
+ self.K[5] - y_min,
+ # self.K[2],
+ # self.K[5],
self.K[0],
self.K[4]
]
@@ -378,7 +375,8 @@ class DetectNode(Node):
self.get_logger().info(f"state: {state}")
self.get_logger().info(
- f"current detected object names: {self.aidk_client.get_detected_obj_names()}, current detected object nums: {self.aidk_client.get_detected_obj_nums()}"
+ f"current detected object names: {self.aidk_client.get_detected_obj_names()}, "
+ f"current detected object nums: {self.aidk_client.get_detected_obj_nums()}"
)
self.get_logger().info(
@@ -415,42 +413,6 @@ class DetectNode(Node):
}
)
- # for key in self.config["keys"]:
- # parse_state, result_list = self.aidk_client.parse_result(self.config["command"]["obj_name"], key, -1)
- # self.get_logger().info(
- # "detected time stamp: {}".format(
- # datetime.fromtimestamp(self.aidk_client.get_detected_time())
- # )
- # )
- # if not parse_state:
- # self.get_logger().error("Parse result error!!!")
- # continue
- # else:
- # if key in ["bbox", "keypoints", "positions", "obj_pose"]:
- # for result in result_list:
- # self.get_logger().info(f'"double_value in result": {getattr(result, "double_value")}')
- # for vec in result.vect:
- # self.get_logger().info(f"vec: {vec}")
- # x, y, z, rw, rx, ry, rz = vec
- #
- # pose = Pose()
- # pose.position = Point(x=x, y=y, z=z)
- # pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
- # pose_list.append(
- # {
- # "class_id": int(class_ids[i]),
- # "class_name": labels[class_ids[i]],
- # "pose": pose,
- # "grab_width": getattr(result, "double_value")
- # }
- # )
- # self.get_logger().info(f"pose: position({x}, {y}, {z}), orientation({rw}, {rx}, {ry}, {rz})")
- # self.get_logger().info(f"class_id: {int(class_ids[i])}, class_name: {labels[class_ids[i]]}, grab_width: {getattr(result, 'double_value')}")
- #
- # elif key in ["valid", "double_value", "int_value", "name"]:
- # for result in result_list:
- # self.get_logger().info(f"{key}: {getattr(result, key)}")
-
time4 = time.time()
cv2.imwrite("/home/nvidia/detect_result.png", img)