This commit is contained in:
liangyuxuan
2025-11-27 16:27:11 +08:00
parent 101857b8c4
commit 694272080a
3 changed files with 91 additions and 77 deletions

View File

@@ -1,3 +1,4 @@
"""Vision Detect Node"""
import os
import time
import threading
@@ -24,28 +25,46 @@ from interfaces.msg import PoseClassAndID, PoseArrayClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
from ..utils import calculate_pose_pca, calculate_pose_icp, get_map, distortion_correction, crop_imgs_box_xywh, \
from ..utils import calculate_pose_pca, calculate_pose_icp, get_map, \
distortion_correction, crop_imgs_box_xywh, \
draw_box, draw_mask, crop_imgs_mask, calculate_grav_width
share_dir = get_package_share_directory('vision_detect')
class DetectNode(Node):
"""Detect Node"""
def __init__(self, name):
super().__init__(name)
self.device = None
self.checkpoint_path = None
self.output_boxes = None
self.output_masks = None
self.function = None
self.source = None
self.K = None
self.eye_in_hand_mat = None
self.server = None
self.model = None
self.k = self.d = None
self.eye_in_left_hand_mat = None
self.eye_in_right_hand_mat = None
self.eye_to_hand_mat = None
self.hand_eye_mat = None
self.eye_in_left_hand_path = None
self.eye_in_right_hand_path = None
self.eye_to_hand_path = None
self.get_camera_mode = None
self.detect_mode = None
self.calculate_mode = None
self.camera_size = None
self.map1 = self.map2 = None
self.calculate_function = calculate_pose_pca
self.fx = self.fy = 0.5
self.camera_data = {}
@@ -158,7 +177,7 @@ class DetectNode(Node):
self.get_logger().warning(f"{self.eye_in_left_hand_path} not found, use E(4, 4)")
self.eye_in_left_hand_mat = np.eye(4)
else:
with open(self.eye_in_left_hand_path, "r") as f:
with open(self.eye_in_left_hand_path, "r", encoding="utf-8") as f:
self.eye_in_left_hand_mat = np.array(json.load(f)["T"])
if self.eye_in_left_hand_mat.shape != (4, 4):
self.get_logger().warning("The shape of eye_in_right_hand_mat is wrong, use E(4, 4)")
@@ -168,7 +187,7 @@ class DetectNode(Node):
self.get_logger().warning(f"{self.eye_in_right_hand_path} not found, use E(4, 4)")
self.eye_in_right_hand_mat = np.eye(4)
else:
with open(self.eye_in_right_hand_path, "r") as f:
with open(self.eye_in_right_hand_path, "r", encoding="utf-8") as f:
self.eye_in_right_hand_mat = np.array(json.load(f)["T"])
if self.eye_in_right_hand_mat.shape != (4, 4):
self.get_logger().warning("The shape of eye_in_right_hand_mat is wrong, use E(4, 4)")
@@ -178,7 +197,7 @@ class DetectNode(Node):
self.get_logger().warning(f"{self.eye_to_hand_path} not found, use E(4, 4)")
self.eye_to_hand_mat = np.eye(4)
else:
with open(self.eye_to_hand_path, "r") as f:
with open(self.eye_to_hand_path, "r", encoding="utf-8") as f:
self.eye_to_hand_mat = np.array(json.load(f)["T"])
if self.eye_to_hand_mat.shape != (4, 4):
self.get_logger().warning("The shape of eye_to_hand_mat is wrong, use E(4, 4)")
@@ -193,7 +212,7 @@ class DetectNode(Node):
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
raise
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
if self.checkpoint_path.endswith('-seg.pt'):
@@ -201,7 +220,7 @@ class DetectNode(Node):
else:
self.function = None
self.get_logger().fatal(f'Unknown checkpoint: {self.checkpoint_path}')
raise
raise ValueError(f'Unknown checkpoint: {self.checkpoint_path}')
self.get_logger().info("Initialize model done")
def _init_publisher(self):
@@ -244,7 +263,7 @@ class DetectNode(Node):
rclpy.spin_until_future_complete(self, self.future)
self.get_logger().info("Camera info received.")
'''sync get color and depth img'''
# sync get color and depth img
self.sub_color_image = Subscriber(self, Image, self.color_image_topic_name)
self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic_name)
@@ -267,17 +286,18 @@ class DetectNode(Node):
def _camera_info_callback(self, msg: CameraInfo):
"""Get camera info"""
self.K = msg.k
self.D = msg.d
self.k = msg.k
self.d = msg.d
self.camera_size = [msg.width, msg.height]
if self.K is not None and len(self.K) > 0 and self.D is not None and len(self.D) > 0:
self.map1, self.map2, self.K = get_map(msg.k, msg.d, self.camera_size)
if not self.future.done(): self.future.set_result(True)
if self.k is not None and len(self.k) > 0 and self.d is not None and len(self.d) > 0:
self.map1, self.map2, self.k = get_map(msg.k, msg.d, self.camera_size)
if not self.future.done():
self.future.set_result(True)
self.destroy_subscription(self.sub_camera_info)
else:
self.get_logger().warning("K and D are not defined")
self.get_logger().warning("K and d are not defined")
def _service_sub_callback(self, msgs):
"""同步回调函数"""
@@ -298,7 +318,7 @@ class DetectNode(Node):
img, pose_list = self.function(color_img_cv, depth_img_cv)
"""masks为空结束这一帧"""
# masks为空结束这一帧
if img is None:
img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
@@ -326,7 +346,7 @@ class DetectNode(Node):
with self.lock:
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.K, D = self.camera_data[request.camera_position]
color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position]
else:
response.success = False
response.info = f"{request.camera_position} Camera data is empty or name is wrong"
@@ -346,12 +366,12 @@ class DetectNode(Node):
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
map1, map2, self.K = get_map(self.K, D, self.camera_size)
map1, map2, self.k = get_map(self.k, d, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_list = self.function(color_img_cv, depth_img_cv)
"""masks为空结束这一帧"""
# masks为空结束这一帧
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
@@ -369,32 +389,29 @@ class DetectNode(Node):
grab_width = item["grab_width"]
)
)
return response
else:
response.info = "pose dict is empty"
response.success = False
response.objects = []
return response
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
pose_list = []
'''Get Predict Results'''
# Get Predict Results
time1 = time.time()
results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
time2 = time.time()
result = results[0]
'''Get masks'''
# Get masks
if result.masks is None or len(result.masks) == 0:
return None, None
masks = result.masks.data.cpu().numpy()
'''Get boxes'''
# Get boxes
boxes = result.boxes.xywh.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
@@ -411,10 +428,10 @@ class DetectNode(Node):
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -426,7 +443,7 @@ class DetectNode(Node):
self.configs
)
grab_width = calculate_grav_width(mask, self.K, z)
grab_width = calculate_grav_width(mask, self.k, z)
z = z + grab_width * 0.45
if (x, y, z) != (0.0, 0.0, 0.0):
@@ -444,14 +461,14 @@ class DetectNode(Node):
time4 = time.time()
self.get_logger().info(f'start')
self.get_logger().info('start')
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
self.get_logger().info(f'{(time3 - time2) * 1000} ms, get mask and some param')
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
self.get_logger().info(f'end')
self.get_logger().info('end')
'''mask_img and box_img is or not output'''
# mask_img and box_img is or not output
if self.output_boxes and not self.output_masks:
draw_box(rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
@@ -492,10 +509,10 @@ class DetectNode(Node):
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -554,10 +571,10 @@ class DetectNode(Node):
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]),
int(self.camera_size[1]),
self.K[0],
self.K[4],
self.K[2] - x_min,
self.K[5] - y_min
self.k[0],
self.k[4],
self.k[2] - x_min,
self.k[5] - y_min
)
x, y, z, rw, rx, ry, rz = self.calculate_function(
@@ -600,8 +617,8 @@ class DetectNode(Node):
2
)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list
else:
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None
if __name__ == '__main__':
@@ -613,4 +630,4 @@ if __name__ == '__main__':
pass
finally:
node.destroy_node()
rclpy.shutdown()
rclpy.shutdown()

View File

@@ -12,19 +12,16 @@
#include "interfaces/srv/vision_object_recognition.hpp"
#include "interfaces/msg/pose_array_class_and_id.hpp"
using json = nlohmann::json;
using SyncPolicy=message_filters::sync_policies::ApproximateTime<sensor_msgs::msg::Image, sensor_msgs::msg::Image>;
class VisionDetectNode: public rclcpp::Node {
public: explicit VisionDetectNode(const std::string& name);
private:
bool output_box, output_mask;
std::string GetCameraMode, DetectMode, CalculateMode;
std::string get_camera_mode, detect_mode, calculate_mode;
std::string cfg_str;
json calibrition_path;
json GetCameraConfigs, DetectConfigs, CalculateConfigs;
nlohmann::json calibrition_path;
nlohmann::json get_camera_configs, segmentation_configs, calculate_configs;
Eigen::Matrix4d left_hand_mat, right_hand_mat, head_mat;
@@ -36,7 +33,7 @@ class VisionDetectNode: public rclcpp::Node {
message_filters::Subscriber<sensor_msgs::msg::Image> color_img_sub;
message_filters::Subscriber<sensor_msgs::msg::Image> depth_img_sub;
std::shared_ptr<message_filters::Synchronizer<SyncPolicy>> sync_sub;
std::shared_ptr<message_filters::Synchronizer<message_filters::sync_policies::ApproximateTime<sensor_msgs::msg::Image, sensor_msgs::msg::Image>>> sync_sub;
rclcpp::Subscription<sensor_msgs::msg::CameraInfo>::SharedPtr camera_info_sub;

View File

@@ -13,38 +13,38 @@ VisionDetectNode::VisionDetectNode(const std::string& name): Node(name) {
this->init_publisher();
}
if (this->GetCameraMode == "Service") {
this->GetCameraConfigs = json::parse(this->get_parameter("ServiceConfigs").as_string());
if (this->get_camera_mode == "Service") {
this->get_camera_configs = json::parse(this->get_parameter("ServiceConfigs").as_string());
this->init_service();
this->init_subscriber();
}
else if (this->GetCameraMode == "Topic") {
this->GetCameraConfigs = json::parse(this->get_parameter("TopicConfigs").as_string());
else if (this->get_camera_mode == "Topic") {
this->get_camera_configs = json::parse(this->get_parameter("TopicConfigs").as_string());
this->init_topic();
}
else {
RCLCPP_WARN(this->get_logger(), "Unkonw GetCameraMode, Use Service");
this->GetCameraConfigs = json::parse(this->get_parameter("ServiceConfigs").as_string());
RCLCPP_WARN(this->get_logger(), "Unkonw get_camera_mode, Use Service");
this->get_camera_configs = json::parse(this->get_parameter("ServiceConfigs").as_string());
this->init_service();
this->init_subscriber();
}
if (this->DetectMode == "Detect") {
if (this->detect_mode == "Detect") {
this->init_model();
this->DetectConfigs = json::parse(this->get_parameter("DetectConfigs").as_string());
this->segmentation_configs = json::parse(this->get_parameter("DetectConfigs").as_string());
this->detect_function = &VisionDetectNode::seg_img;
}
else if (this->DetectMode == "Color") {
this->DetectConfigs = json::parse(this->get_parameter("ColorConfigs").as_string());
else if (this->detect_mode == "Color") {
this->segmentation_configs = json::parse(this->get_parameter("ColorConfigs").as_string());
this->detect_function = &VisionDetectNode::seg_color;
}
else if (this->DetectMode == "Crossboard") {
this->DetectConfigs = json::parse(this->get_parameter("CrossboardConfigs").as_string());
else if (this->detect_mode == "Crossboard") {
this->segmentation_configs = json::parse(this->get_parameter("CrossboardConfigs").as_string());
this->detect_function = &VisionDetectNode::seg_crossboard;
}
else {
RCLCPP_WARN(this->get_logger(), "Unkonw DetectMode, Use Service");
this->DetectConfigs = json::parse(this->get_parameter("DetectConfigs").as_string());
RCLCPP_WARN(this->get_logger(), "Unkonw detect_mode, Use Service");
this->segmentation_configs = json::parse(this->get_parameter("DetectConfigs").as_string());
}
RCLCPP_INFO(this->get_logger(), "Init process done");
}
@@ -58,13 +58,13 @@ void VisionDetectNode::init_param() {
this->output_box = this->get_parameter("output_box").as_bool();
this->output_mask = this->get_parameter("output_mask").as_bool();
this->declare_parameter<std::string>("GetCameraMode", "Service");
this->declare_parameter<std::string>("DetectMode", "Detect");
this->declare_parameter<std::string>("CalculateMode", "PCA");
this->declare_parameter<std::string>("get_camera_mode", "Service");
this->declare_parameter<std::string>("detect_mode", "Detect");
this->declare_parameter<std::string>("calculate_mode", "PCA");
this->GetCameraMode = this->get_parameter("GetCameraMode").as_string();
this->DetectMode = this->get_parameter("DetectMode").as_string();
this->CalculateMode = this->get_parameter("CalculateMode").as_string();
this->get_camera_mode = this->get_parameter("get_camera_mode").as_string();
this->detect_mode = this->get_parameter("detect_mode").as_string();
this->calculate_mode = this->get_parameter("calculate_mode").as_string();
this->declare_parameter<std::string>("calibrition_path", "{}");
@@ -116,7 +116,7 @@ void VisionDetectNode::init_calibration_mat() {
void VisionDetectNode::init_service() {
/*Init_service*/
detect_server = this->create_service<interfaces::srv::VisionObjectRecognition>(
this->GetCameraConfigs["service_name"].get<std::string>(),
this->get_camera_configs["service_name"].get<std::string>(),
std::bind(
&VisionDetectNode::service_callback,
this,
@@ -132,8 +132,8 @@ void VisionDetectNode::init_service() {
void VisionDetectNode::init_topic() {
/*Init Topic*/
this->color_img_sub.subscribe(this, this->GetCameraConfigs["color_image_topic_name"].get<std::string>());
this->depth_img_sub.subscribe(this, this->GetCameraConfigs["depth_image_topic_name"].get<std::string>());
this->color_img_sub.subscribe(this, this->get_camera_configs["color_image_topic_name"].get<std::string>());
this->depth_img_sub.subscribe(this, this->get_camera_configs["depth_image_topic_name"].get<std::string>());
this->sync_sub = std::make_shared<message_filters::Synchronizer<SyncPolicy>>(
SyncPolicy(10),
color_img_sub, depth_img_sub
@@ -142,7 +142,7 @@ void VisionDetectNode::init_topic() {
this->sync_sub->registerCallback(&VisionDetectNode::topic_callback, this);
this->camera_info_sub = this->create_subscription<sensor_msgs::msg::CameraInfo>(
this->GetCameraConfigs["camera_info_topic_name"].get<std::string>(),
this->get_camera_configs["camera_info_topic_name"].get<std::string>(),
10,
std::bind(&VisionDetectNode::camera_info_callback, this, std::placeholders::_1)
);
@@ -156,7 +156,7 @@ void VisionDetectNode::init_publisher() {
/*Init Publisher*/
pub_detect_img = this->create_publisher<sensor_msgs::msg::Image>("/image/detect_image", 10);
if (GetCameraMode == "Topic") {
if (get_camera_mode == "Topic") {
pub_pose_list = this->create_publisher<interfaces::msg::PoseArrayClassAndID>("/pose/cv_detect_pose", 10);
}