diff --git a/.gitattributes b/.gitattributes index 429a00a..99ba89d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ -vision_test/ merge=ours \ No newline at end of file +vision_test/ merge=ours +test/ merge=ours \ No newline at end of file diff --git a/.gitignore b/.gitignore index f2d4f92..6ac09ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .idea/ .vscode/ -build/ -install/ -log/ \ No newline at end of file +**/build/ +**/install/ +**/log/ \ No newline at end of file diff --git a/test/D.json b/test/D.json new file mode 100644 index 0000000..a0e9a3e --- /dev/null +++ b/test/D.json @@ -0,0 +1 @@ +[0.0, 0.0, 0.0, 0.0, 0.0] \ No newline at end of file diff --git a/test/K.json b/test/K.json new file mode 100644 index 0000000..a26375e --- /dev/null +++ b/test/K.json @@ -0,0 +1 @@ +[915.5315551757812, 0.0, 640.0259399414062, 0.0, 915.300537109375, 362.9521789550781, 0.0, 0.0, 1.0] \ No newline at end of file diff --git a/test/color_image.png b/test/color_image.png new file mode 100644 index 0000000..273351a Binary files /dev/null and b/test/color_image.png differ diff --git a/test/depth_image.png b/test/depth_image.png new file mode 100644 index 0000000..af2f72a Binary files /dev/null and b/test/depth_image.png differ diff --git a/test/eye_in_right_hand.json b/test/eye_in_right_hand.json new file mode 100644 index 0000000..1837548 --- /dev/null +++ b/test/eye_in_right_hand.json @@ -0,0 +1,28 @@ +{ + "T": [ + [ + 0.013635791720580838, + 0.9992765703636767, + -0.03550212819481636, + -0.08645650535173793 + ], + [ + -0.9998865276218899, + 0.013399556063222661, + -0.006883587549256321, + 0.036196137264974 + ], + [ + -0.006402895000908794, + 0.03559196285001416, + 0.999345893630474, + 0.014407883180676354 + ], + [ + 0.0, + 0.0, + 0.0, + 1.0 + ] + ] +} \ No newline at end of file diff --git a/test/yolo11s-seg.pt b/test/yolo11s-seg.pt new file mode 100644 index 0000000..d7be62d Binary files /dev/null and b/test/yolo11s-seg.pt differ diff --git a/tools/calculate_pose.py b/tools/calculate_pose.py new file mode 100644 index 0000000..51fd978 --- /dev/null +++ b/tools/calculate_pose.py @@ -0,0 +1,285 @@ +import json +import time + +import cv2 +import open3d as o3d +import numpy as np +import torch +import transforms3d as tfs + +from ultralytics import YOLO + +import rclpy +from rclpy.node import Node + + +# x=0.08685893262849026, y=0.03733333467952511, z=0.5707736129272428 +# x=-0.04385456738166652, y=-0.6302417335103124, z=-0.07781808527160532, w=0.7712434634188767 + + +def main(): + color_image = cv2.imread("test/color_image.png") + depth_image = cv2.imread("test/depth_image.png", cv2.IMREAD_UNCHANGED) + with open("test/K.json", 'r') as f: + k = json.load(f) + with open("test/D.json", 'r') as f: + d = json.load(f) + with open("test/eye_in_right_hand.json", 'r') as f: + hand_eye_mat = np.array(json.load(f)["T"]) + + camera_size = [color_image.shape[1], color_image.shape[0]] + + # cv2.imshow("color_image", color_image) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + # cv2.imshow("depth_image", depth_image) + # cv2.waitKey(0) + # cv2.destroyAllWindows() + # print(k) + # print(d) + # print(eye_in_right_hand) + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + model = YOLO("test/yolo11s-seg.pt").to(device) + + results = model(color_image, retina_masks=True, conf=0.5, classes=[39]) + result = results[0] + + # Get masks + if result.masks is None or len(result.masks) == 0: + return None, None + masks = result.masks.data.cpu().numpy() + + # Get boxes + boxes = result.boxes.xywh.cpu().numpy() + class_ids = result.boxes.cls.cpu().numpy() + labels = result.names + print(f"Detect object num: {len(masks)}") + + + + for i, (mask, box) in enumerate(zip(masks, boxes)): + imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_image, mask], box, True) + depth_crop, mask_crop = imgs_crop + + if depth_crop is None: + continue + + intrinsics = o3d.camera.PinholeCameraIntrinsic( + int(camera_size[0]), + int(camera_size[1]), + k[0], + k[4], + k[2] - x_min, + k[5] - y_min + ) + time1 = time.time() + rmat = calculate_pose_pca( + mask_crop, + depth_crop, + intrinsics, + ) + time2 = time.time() + print(f"PCA: {(time2 - time1) * 1000} ms") + + grab_width = calculate_grav_width(mask.astype(np.uint8) * 255, k, rmat[2, 3]) + rmat[2, 3] = rmat[2, 3] + grab_width * 0.22 + + rmat = hand_eye_mat @ rmat + + x, y, z, rw, rx, ry, rz = rmat2quat(rmat) + + pose_list = [] + if (x, y, z) != (0.0, 0.0, 0.0): + pose = [x, y, z, rx, ry, rz, rw] + pose_list.append( + { + "class_id": int(class_ids[i]), + "class_name": labels[class_ids[i]], + "pose": pose, + "grab_width": grab_width * 1.05 + } + ) + + +def crop_imgs_box_xywh(imgs: list, box, same_sign: bool = False): + """ + Crop imgs + + input: + imgs: list, Each img in imgs has the same Width and High. + box: The YOLO model outputs bounding box data in the format [x, y, w, h, confidence, class_id]. + same_sign: bool, Set True to skip size check if all img in imgs have the same Width and High. + output: + crop_imgs: list; + (x_min, y_min); + """ + if not imgs: + print("imgs is empty") + return [], (0, 0) + + if not same_sign and len(imgs) != 1: + for img in imgs: + if imgs[0].shape != img.shape: + raise ValueError(f"Img shape are different: {imgs[0].shape} - {img.shape}") + + high, width = imgs[0].shape[:2] + x_center, y_center, w, h = box[:4] + x_min, x_max = max(0, int(round(x_center - w/2))), min(int(round(x_center + w/2)), width-1) + y_min, y_max = max(0, int(round(y_center - h/2))), min(int(round(y_center + h/2)), high-1) + + crop_imgs = [img[y_min:y_max + 1, x_min:x_max + 1] for img in imgs] + + return crop_imgs, (x_min, y_min) + + +def pca(data: np.ndarray, sort=True): + """主成分分析 """ + center = np.mean(data, axis=0) + centered_points = data - center # 去中心化 + + try: + cov_matrix = np.cov(centered_points.T) # 转置 + eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix) + + except np.linalg.LinAlgError: + return None, None + + if sort: + sort = eigenvalues.argsort()[::-1] # 降序排列 + eigenvalues = eigenvalues[sort] # 索引 + eigenvectors = eigenvectors[:, sort] + + return eigenvalues, eigenvectors + + +def calculate_pose_pca( + mask, + depth_img: np.ndarray, + intrinsics, + source=None, + configs=None, + **kwargs +): + """计算位态""" + if configs is None: + configs = { + "depth_scale": 1000.0, + "depth_trunc": 3.0, + "voxel_size": 0.010, + "nb_points": 16, + "radius": 0.03, + "nb_neighbors": 10, + "std_ratio": 3.0 + } + depth_img_mask = np.zeros_like(depth_img) + depth_img_mask[mask > 0] = depth_img[mask > 0] + + depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16)) + + point_cloud = o3d.geometry.PointCloud.create_from_depth_image( + depth=depth_o3d, + intrinsic=intrinsics, + depth_scale=configs["depth_scale"], + depth_trunc=configs["depth_trunc"], + ) + + point_cloud = point_cloud.remove_non_finite_points() + + down_pcd = point_cloud.voxel_down_sample(voxel_size=configs["voxel_size"]) + clean_pcd, _ = down_pcd.remove_statistical_outlier( + nb_neighbors=configs["nb_neighbors"], + std_ratio=configs["std_ratio"] + ) + clean_pcd, _ = clean_pcd.remove_radius_outlier( + nb_points=configs["nb_points"], + radius=configs["radius"] + ) + labels = np.array(clean_pcd.cluster_dbscan(eps=0.03, min_points=16)) + largest_cluster_idx = np.argmax(np.bincount(labels[labels >= 0])) + clean_pcd = clean_pcd.select_by_index(np.where(labels == largest_cluster_idx)[0]) + + + if len(clean_pcd.points) == 0: + print("clean_pcd is empty") + return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + center = clean_pcd.get_center() + x, y, z = center + + w, v = pca(np.asarray(clean_pcd.points)) + + if w is None or v is None: + print("PCA output w or v is None") + return np.eye(4) + + vx, vy, vz = v[:, 0], v[:, 1], v[:, 2] + + if vx[0] < 0: + vx = -vx + if vy[1] < 0: + vy = -vy + if not np.allclose(np.cross(vx, vy), vz): + vz = -vz + + R = np.column_stack((vx, vy, vz)) + rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z+0.06*0.25))), R, [1, 1, 1]) + + draw(point_cloud, rmat) + # draw(down_pcd, rmat) + draw(clean_pcd, rmat) + + return rmat + + +def rmat2quat(rmat): + """Convert rotation matrix to quaternion.""" + x, y, z = rmat[0:3, 3:4].flatten() + rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3]) + quat = [x, y, z, rw, rx, ry, rz] + return quat + + +def calculate_grav_width(mask, k, depth): + """计算重心宽度""" + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + if not contours: + return 0.0 + + c = max(contours, key=cv2.contourArea) + box = cv2.boxPoints(cv2.minAreaRect(c)) + if np.linalg.norm(box[1] - box[0]) < np.linalg.norm(box[1] - box[2]): + point_diff = box[1] - box[0] + else: + point_diff = box[1] - box[2] + + grab_width = depth * np.sqrt( + point_diff[0] ** 2 / k[0] ** 2 + point_diff[1] ** 2 / k[4] ** 2 + ) + return grab_width + +def draw(pcd, mat): + R = mat[0:3, 0:3] + point = mat[0:3, 3:4].flatten() + x, y, z = R[:, 0], R[:, 1], R[:, 2] + + points = [ + [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], + point, point + x, point + y, point + z + + ] # 画点:原点、第一主成分、第二主成分 + lines = [ + [0, 1], [0, 2], [0, 3], + [4, 5], [4, 6], [4, 7] + ] # 画出三点之间两两连线 + colors = [ + [1, 0, 0], [0, 1, 0], [0, 0, 1], + [1, 0, 0], [0, 1, 0], [0, 0, 1] + ] + line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines)) + line_set.colors = o3d.utility.Vector3dVector(colors) + + o3d.visualization.draw_geometries([pcd, line_set]) + +if __name__ == '__main__': + main() diff --git a/vision_detect/vision_detect/VisionDetect/node/detect_node.py b/vision_detect/vision_detect/VisionDetect/node/detect_node.py index 6db2c80..5cc51d4 100644 --- a/vision_detect/vision_detect/VisionDetect/node/detect_node.py +++ b/vision_detect/vision_detect/VisionDetect/node/detect_node.py @@ -158,10 +158,12 @@ class DetectNode(Node): self.calculate_function = calculate_pose_pca elif self.calculate_mode == "ICP": self.configs = json.loads(self.get_parameter('ICA_configs').value) - self.source = o3d.io.read_point_cloud( + source = o3d.io.read_point_cloud( os.path.join(share_dir, self.configs['complete_model_path']) ) + self.configs["source"] = source self.calculate_function = calculate_pose_icp + else: self.get_logger().warning("Unknown calculate_mode, use PCA") self.configs = json.loads(self.get_parameter('PCA_configs').value) @@ -297,6 +299,7 @@ class DetectNode(Node): def _service_sub_callback(self, msgs): """同步回调函数""" with self.lock: + # self.get_logger().info("get msgs") self.camera_data[msgs.position] = [ msgs.image_color, msgs.image_depth, @@ -336,6 +339,7 @@ class DetectNode(Node): self.pub_pose_list.publish(pose_list_all) def _service_callback(self, request, response): + # self.get_logger().info("service callback start") response.header.stamp = self.get_clock().now().to_msg() response.header.frame_id = "camera_detect" @@ -343,10 +347,19 @@ class DetectNode(Node): if request.camera_position in self.camera_data: color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position] else: + if len(self.camera_data) == 0: + response.success = False + response.info = "Camera data have not objects" + response.objects = [] + + # self.get_logger().info("service callback done") + return response + response.success = False - response.info = f"{request.camera_position} Camera data is empty or name is wrong" + response.info = f"Name is wrong: {request.camera_position}" response.objects = [] + # self.get_logger().info("service callback done") return response if request.camera_position == 'left': @@ -389,6 +402,7 @@ class DetectNode(Node): response.success = False response.objects = [] + # self.get_logger().info("service callback done") return response def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray): @@ -403,6 +417,7 @@ class DetectNode(Node): # Get masks if result.masks is None or len(result.masks) == 0: + self.get_logger().info(f"Detect object num: 0") return None, None masks = result.masks.data.cpu().numpy() @@ -435,12 +450,14 @@ class DetectNode(Node): mask_crop, depth_crop, intrinsics, - self.source, - self.configs + **self.configs ) + if rmat is None: + self.get_logger().warning("Object point cloud have too many noise") + continue grab_width = calculate_grav_width(mask, self.k, rmat[2, 3]) - rmat[2, 3] = rmat[2, 3] + grab_width * 0.22 + rmat[2, 3] = rmat[2, 3] + grab_width * 0.38 rmat = self.hand_eye_mat @ rmat @@ -463,7 +480,7 @@ class DetectNode(Node): self.get_logger().info('start') self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict') - self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param') + # self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param') self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA') self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process') self.get_logger().info('end') @@ -519,8 +536,7 @@ class DetectNode(Node): mask_crop, depth_crop, intrinsics, - self.source, - self.configs + **self.configs ) rmat = self.hand_eye_mat @ rmat @@ -585,8 +601,7 @@ class DetectNode(Node): mask_crop, depth_crop, intrinsics, - self.source, - self.configs + **self.configs ) x, y, z, rw, rx, ry, rz = rmat2quat(rmat) diff --git a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py index 683c667..1f47ba5 100644 --- a/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py +++ b/vision_detect/vision_detect/VisionDetect/utils/calculate_tools.py @@ -37,20 +37,9 @@ def calculate_pose_pca( mask, depth_img: np.ndarray, intrinsics, - source=None, - configs=None + **kwargs ): - """计算位态""" - if configs is None: - configs = { - "depth_scale": 1000.0, - "depth_trunc": 3.0, - "voxel_size": 0.020, - "nb_points": 10, - "radius": 0.1, - "nb_neighbors": 20, - "std_ratio": 3.0 - } + """点云主成分分析法计算位态""" depth_img_mask = np.zeros_like(depth_img) depth_img_mask[mask > 0] = depth_img[mask > 0] @@ -59,29 +48,21 @@ def calculate_pose_pca( point_cloud = o3d.geometry.PointCloud.create_from_depth_image( depth=depth_o3d, intrinsic=intrinsics, - depth_scale=configs["depth_scale"], - depth_trunc=configs["depth_trunc"], + depth_scale=kwargs.get("depth_scale", 1000.0), + depth_trunc=kwargs.get("depth_trunc", 3.0), ) - point_cloud = point_cloud.remove_non_finite_points() + point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010)) + if point_cloud is None: + return None - down_pcd = point_cloud.voxel_down_sample(voxel_size=configs["voxel_size"]) - clean_pcd, _ = down_pcd.remove_radius_outlier( - nb_points=configs["nb_points"], - radius=configs["radius"] - ) - clean_pcd, _ = clean_pcd.remove_statistical_outlier( - nb_neighbors=configs["nb_neighbors"], - std_ratio=configs["std_ratio"] - ) - - if len(clean_pcd.points) == 0: + if len(point_cloud.points) == 0: logging.warning("clean_pcd is empty") - return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 - center = clean_pcd.get_center() + return np.eye(4) + center = point_cloud.get_center() x, y, z = center - w, v = pca(np.asarray(clean_pcd.points)) + w, v = pca(np.asarray(point_cloud.points)) if w is None or v is None: logging.warning("PCA output w or v is None") @@ -99,6 +80,9 @@ def calculate_pose_pca( R = np.column_stack((vx, vy, vz)) rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1]) + # draw(point_cloud, rmat) + # draw(point_cloud_1, rmat) + return rmat @@ -106,22 +90,9 @@ def calculate_pose_icp( mask, depth_img: np.ndarray, intrinsics, - source, - configs = None, + **kwargs ): - if configs is None: - configs = { - "depth_scale": 1000.0, - "depth_trunc": 2.0, - "nb_points": 10, - "radius": 0.1, - "nb_neighbors": 20, - "std_ratio": 3.0, - "ransac_voxel_size": 0.005, - "icp_voxel_radius": [0.004, 0.002, 0.001], - "icp_max_iter": [50, 30, 14] - } - """计算位态""" + """点云配准法计算位姿""" depth_img_mask = np.zeros_like(depth_img) depth_img_mask[mask > 0] = depth_img[mask > 0] @@ -130,38 +101,92 @@ def calculate_pose_icp( point_cloud = o3d.geometry.PointCloud.create_from_depth_image( depth=depth_o3d, intrinsic=intrinsics, - depth_scale=configs["depth_scale"], - depth_trunc=configs["depth_trunc"], + depth_scale=kwargs.get("depth_scale", 1000.0), + depth_trunc=kwargs.get("depth_trunc", 3.0) ) - point_cloud = point_cloud.remove_non_finite_points() + point_cloud = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.010)) + if point_cloud is None: + return None - clean_pcd, _ = point_cloud.remove_radius_outlier( - nb_points=configs["nb_points"], - radius=configs["radius"] - ) - clean_pcd, _ = clean_pcd.remove_statistical_outlier( - nb_neighbors=configs["nb_neighbors"], - std_ratio=configs["std_ratio"] - ) - - if len(clean_pcd.points) == 0: + if len(point_cloud.points) == 0: logging.warning("clean_pcd is empty") return np.eye(4) rmat = object_icp( - source, - clean_pcd, - ransac_voxel_size=configs["ransac_voxel_size"], - icp_voxel_radius=configs["icp_voxel_radius"], - icp_max_iter=configs["icp_max_iter"] + kwargs.get("source"), + point_cloud, + ransac_voxel_size=kwargs.get("ransac_voxel_size", 0.005), + icp_voxel_radius=kwargs.get("icp_voxel_radius", [0.004, 0.002, 0.001]), + icp_max_iter=kwargs.get("icp_max_iter", [50, 30, 14]) ) return rmat +def point_cloud_denoising(point_cloud: o3d.geometry.PointCloud, voxel_size: float = 0.010): + """点云去噪""" + point_cloud = point_cloud.remove_non_finite_points() + down_pcd = point_cloud.voxel_down_sample(voxel_size=voxel_size) + + # 半径滤波 + clean_pcd, _ = down_pcd.remove_radius_outlier( + nb_points=10, + radius=voxel_size * 5 + ) + + # 统计滤波 + clean_pcd, _ = clean_pcd.remove_statistical_outlier( + nb_neighbors=10, + std_ratio=2.0 + ) + + # 过滤过近的点 + points = np.asarray(clean_pcd.points) + clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2]) + + # # 使用数量最大簇判定噪声强度 + # _, counts = np.unique(labels[labels >= 0], return_counts=True) + # largest_cluster_ratio = counts.max() / len(labels) + # if largest_cluster_ratio < 0.5: + # return None + + labels = np.array(clean_pcd.cluster_dbscan(eps=voxel_size * 5, min_points=10)) + if len(labels[labels >= 0]) == 0: + return clean_pcd + + # 使用距离最近簇作为物体 + points = np.asarray(clean_pcd.points) + cluster_label = set(labels) + point_cloud_clusters = [] + for label in cluster_label: + if label == -1: + continue + idx = np.where(labels == label)[0] + point_cloud_cluster = clean_pcd.select_by_index(idx) + points_cluster_z = points[idx, 2] + z_avg = np.mean(points_cluster_z) + if z_avg < 0.2: + continue + point_cloud_clusters.append((point_cloud_cluster, z_avg)) + + if len(point_cloud_clusters) == 0: + return clean_pcd + + point_cloud_clusters.sort(key=lambda x: x[1]) + clean_pcd = point_cloud_clusters[0][0] + + # 使用最近簇判断噪音强度 + largest_cluster_ratio = len(clean_pcd.points) / len(points) + if largest_cluster_ratio < 0.2: + return None + + return clean_pcd + + def calculate_grav_width(mask, k, depth): """计算重心宽度""" + mask = mask.astype(np.uint8) * 255 contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: @@ -194,3 +219,26 @@ def quat2rmat(quat): r = tfs.quaternions.quat2mat([rw, rx, ry, rz]) rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), r, [1, 1, 1]) return rmat + +# def draw(pcd, mat): +# R = mat[0:3, 0:3] +# point = mat[0:3, 3:4].flatten() +# x, y, z = R[:, 0], R[:, 1], R[:, 2] +# +# points = [ +# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], +# point, point + x, point + y, point + z +# +# ] # 画点:原点、第一主成分、第二主成分 +# lines = [ +# [0, 1], [0, 2], [0, 3], +# [4, 5], [4, 6], [4, 7] +# ] # 画出三点之间两两连线 +# colors = [ +# [1, 0, 0], [0, 1, 0], [0, 0, 1], +# [1, 0, 0], [0, 1, 0], [0, 0, 1] +# ] +# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines)) +# line_set.colors = o3d.utility.Vector3dVector(colors) +# +# o3d.visualization.draw_geometries([pcd, line_set]) diff --git a/vision_detect/vision_detect/flexivaidk_detect_service.py b/vision_detect/vision_detect/flexivaidk_detect_service.py index a03fb97..7b8ef93 100644 --- a/vision_detect/vision_detect/flexivaidk_detect_service.py +++ b/vision_detect/vision_detect/flexivaidk_detect_service.py @@ -333,22 +333,6 @@ class DetectNode(Node): p1 = [int((x_center - width / 2)), int((y_center - height / 2))] p2 = [int((x_center + width / 2)), int((y_center + height / 2))] cv2.rectangle(img, p1, p2, (255, 255, 0), 2) - - # rgb_crop, depth_crop, mask_crop, (x_min, y_min) = crop_mask_bbox(rgb_img, depth_img, mask, box) - - # if depth_crop is None: - # self.get_logger().error("depth_crop is None") - # continue - - # depth_img_crop_mask = np.zeros_like(depth_crop) - # depth_img_crop_mask[mask_crop > 0] = depth_crop[mask_crop > 0] - - # print(rgb_crop.shape) - # print(rgb_crop.dtype) - - - # rgb_bytes = cv2.imencode('.png', rgb_crop)[1] - # depth_bytes = cv2.imencode('.png', depth_img_crop_mask)[1] contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) mask_contours = contours[0].reshape(1, -1, 2) @@ -378,7 +362,8 @@ class DetectNode(Node): self.get_logger().info(f"state: {state}") self.get_logger().info( - f"current detected object names: {self.aidk_client.get_detected_obj_names()}, current detected object nums: {self.aidk_client.get_detected_obj_nums()}" + f"current detected object names: {self.aidk_client.get_detected_obj_names()}, " + f"current detected object nums: {self.aidk_client.get_detected_obj_nums()}" ) self.get_logger().info( @@ -415,42 +400,6 @@ class DetectNode(Node): } ) - # for key in self.config["keys"]: - # parse_state, result_list = self.aidk_client.parse_result(self.config["command"]["obj_name"], key, -1) - # self.get_logger().info( - # "detected time stamp: {}".format( - # datetime.fromtimestamp(self.aidk_client.get_detected_time()) - # ) - # ) - # if not parse_state: - # self.get_logger().error("Parse result error!!!") - # continue - # else: - # if key in ["bbox", "keypoints", "positions", "obj_pose"]: - # for result in result_list: - # self.get_logger().info(f'"double_value in result": {getattr(result, "double_value")}') - # for vec in result.vect: - # self.get_logger().info(f"vec: {vec}") - # x, y, z, rw, rx, ry, rz = vec - # - # pose = Pose() - # pose.position = Point(x=x, y=y, z=z) - # pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz) - # pose_list.append( - # { - # "class_id": int(class_ids[i]), - # "class_name": labels[class_ids[i]], - # "pose": pose, - # "grab_width": getattr(result, "double_value") - # } - # ) - # self.get_logger().info(f"pose: position({x}, {y}, {z}), orientation({rw}, {rx}, {ry}, {rz})") - # self.get_logger().info(f"class_id: {int(class_ids[i])}, class_name: {labels[class_ids[i]]}, grab_width: {getattr(result, 'double_value')}") - # - # elif key in ["valid", "double_value", "int_value", "name"]: - # for result in result_list: - # self.get_logger().info(f"{key}: {getattr(result, key)}") - time4 = time.time() cv2.imwrite("/home/nvidia/detect_result.png", img)