8 Commits

Author SHA1 Message Date
liangyuxuan
867606dd68 add 预处理模块测试节点、图像质量判断功能 2026-03-06 15:07:34 +08:00
liangyuxuan
0bcf24e0ca 增加相机控制接口,图像预处理部分功能 2026-03-06 10:07:31 +08:00
liangyuxuan
d960da1192 框架调整(部分),暂无crossboard-color-icp-e2e,添加了动作服务节点及动作客户端节点 2026-02-06 18:09:37 +08:00
liangyuxuan
6ebf159234 1 2026-01-23 11:47:45 +08:00
liangyuxuan
86e5ad76fc 1 2026-01-23 11:39:30 +08:00
liangyuxuan
54981b3892 1 2026-01-23 11:36:09 +08:00
liangyuxuan
1602b1423d 1 2026-01-21 16:06:22 +08:00
liangyuxuan
b04196de57 1 2026-01-21 09:13:06 +08:00
196 changed files with 4355 additions and 3170 deletions

3
.gitignore vendored
View File

@@ -3,4 +3,5 @@
**/build/
**/install/
**/log/
**/VisionDetect/**/**.so
**/vision_core/**/**.so
**/__pycache__

View File

@@ -0,0 +1,17 @@
import os
import json
from ultralytics import YOLO
checkpoint_path = "vision_detect/checkpoints/medical_sense-seg.pt"
save_path = "vision_detect/manager_map/label/medical_sense.json"
model = YOLO(os.path.expanduser(checkpoint_path))
# 反转name -> id
name_to_id = {v: k for k, v in model.names.items()}
print(name_to_id)
with open(os.path.expanduser(save_path), "w", encoding="utf-8") as f:
json.dump(name_to_id, f, ensure_ascii=False, indent=2)

View File

@@ -0,0 +1,56 @@
cmake_minimum_required(VERSION 3.8)
project(vision_control)
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()
# find dependencies
find_package(ament_cmake REQUIRED)
find_package(rclcpp REQUIRED)
find_package(std_srvs REQUIRED)
find_package(interfaces REQUIRED)
find_package(orbbec_camera_msgs REQUIRED)
add_executable(camera_control_node src/CameraRawControlNode.cpp)
target_include_directories(camera_control_node PRIVATE /usr/include/nlohmann) # json
target_include_directories(camera_control_node
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
ament_target_dependencies(
camera_control_node
rclcpp std_srvs interfaces orbbec_camera_msgs
)
install(
DIRECTORY
launch
configs
DESTINATION share/${PROJECT_NAME}
)
install(
TARGETS camera_control_node
DESTINATION lib/${PROJECT_NAME}
)
if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
# the following line skips the linter which checks for copyrights
# comment the line when a copyright and license is added to all source files
set(ament_cmake_copyright_FOUND TRUE)
# the following line skips cpplint (only works in a git repo)
# comment the line when this package is in a git repo and when
# a copyright and license is added to all source files
set(ament_cmake_cpplint_FOUND TRUE)
ament_lint_auto_find_test_dependencies()
endif()
ament_package()

View File

@@ -0,0 +1,8 @@
{
"left_name": "/camera1",
"left_namespace": "/camera1",
"right_name": "/camera2",
"right_namespace": "/camera2",
"head_name": "/camera",
"head_namespace": "/camera"
}

View File

@@ -0,0 +1,50 @@
#pragma once
#include <string>
#include <rclcpp/rclcpp.hpp>
#include <rclcpp/parameter_client.hpp>
#include <std_msgs/msg/string.hpp>
#include "interfaces/srv/set_camera_raw_params.hpp"
#include "interfaces/srv/set_camera_raw_status.hpp"
#include "orbbec_camera_msgs/srv/set_int32.hpp"
#include "std_srvs/srv/set_bool.hpp"
class CameraRawControlNode : public rclcpp::Node {
public:
CameraRawControlNode(std::string name);
private:
bool left_sign, right_sign, head_sign;
std::string left_name, right_name, head_name;
std::string left_namespace, right_namespace, head_namespace;
rclcpp::CallbackGroup::SharedPtr client_cb_group_;
rclcpp::Service<interfaces::srv::SetCameraRawParams>::SharedPtr raw_params_service;
rclcpp::Service<interfaces::srv::SetCameraRawStatus>::SharedPtr raw_status_service;
// rclcpp::Client<orbbec_camera_msgs::srv::SetInt32> ::SharedPtr
std::shared_ptr<rclcpp::SyncParametersClient> camera_left;
std::shared_ptr<rclcpp::SyncParametersClient> camera_right;
std::shared_ptr<rclcpp::SyncParametersClient> camera_head;
rclcpp::Client<orbbec_camera_msgs::srv::SetInt32>::SharedPtr color_exposure, depth_exposure;
rclcpp::Client<orbbec_camera_msgs::srv::SetInt32>::SharedPtr color_gain, depth_gain;
rclcpp::Client<std_srvs::srv::SetBool>::SharedPtr color_raw_control, depth_raw_control;
void raw_params_callback(
const std::shared_ptr<interfaces::srv::SetCameraRawParams::Request> &request,
const std::shared_ptr<interfaces::srv::SetCameraRawParams::Response> &response
);
void raw_status_callback(
const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request,
const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> &response
);
};

View File

@@ -1,21 +1,20 @@
<?xml version="1.0"?>
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
<package format="3">
<name>vision_pose_msgs</name>
<name>vision_control</name>
<version>0.0.0</version>
<description>TODO: Package description</description>
<maintainer email="lyx@todo.todo">lyx</maintainer>
<license>Apache-2.0</license>
<maintainer email="16126883+liangyuxuan123@user.noreply.gitee.com">lyx</maintainer>
<license>TODO: License declaration</license>
<buildtool_depend>ament_cmake</buildtool_depend>
<buildtool_depend>geometry_msgs</buildtool_depend>
<build_depend>rosidl_default_generators</build_depend>
<exec_depend>rosidl_default_runtime</exec_depend>
<member_of_group>rosidl_interface_packages</member_of_group>
<depend>rclpy</depend>
<depend>rclcpp</depend>
<depend>std_srvs</depend>
<depend>orbbec_camera_msgs</depend>
<depend>interfaces</depend>
<test_depend>ament_lint_auto</test_depend>
<test_depend>ament_lint_common</test_depend>

View File

@@ -0,0 +1,379 @@
#include <iostream>
#include <fstream>
#include <nlohmann/json.hpp>
#include <rclcpp/parameter.hpp>
#include <rclcpp/executors/multi_threaded_executor.hpp>
#include "ament_index_cpp/get_package_share_directory.hpp"
#include "vision_control/CameraRawControlNode.hpp"
using json = nlohmann::json;
using string = std::string;
using raw_params_type = orbbec_camera_msgs::srv::SetInt32;
const string SHARED_DIR = ament_index_cpp::get_package_share_directory("vision_control");
const string CONFIGS_PATH = "configs/raw_control_configs.json";
void create_params_client(
std::shared_ptr<rclcpp::SyncParametersClient> &camera_client,
rclcpp::Node *node,
const string &params_service_name,
const string position,
bool &sign
) {
int i = 0;
camera_client = std::make_shared<rclcpp::SyncParametersClient>(node, params_service_name);
while (!camera_client->wait_for_service(std::chrono::seconds(1)) && i < 3) {
RCLCPP_INFO(
node->get_logger(),
("Waiting for " + position + " camera parameter service...").c_str()
);
i++;
}
if (i == 3) {
RCLCPP_WARN(
node->get_logger(),
(position + " camera params service is unavailable").c_str()
);
sign = false;
} else {
RCLCPP_INFO(
node->get_logger(),
(position + " camera params service is alreadly available").c_str()
);
sign = true;
}
}
CameraRawControlNode::CameraRawControlNode(std::string name) : Node(name) {
// create callback group
client_cb_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
// open configs file
try {
std::ifstream file(SHARED_DIR + "/" + CONFIGS_PATH);
RCLCPP_INFO(this->get_logger(), (SHARED_DIR + "/" + CONFIGS_PATH).c_str());
if (!file.is_open()) {
throw std::runtime_error("Can't open json file");
} else {
json configs;
file >> configs;
left_name = configs["left_name"];
right_name = configs["right_name"];
head_name = configs["head_name"];
left_namespace = configs["left_namespace"];
right_namespace = configs["right_namespace"];
head_namespace = configs["head_namespace"];
}
} catch (const std::runtime_error &e) {
RCLCPP_ERROR(this->get_logger(), e.what());
left_name = "/camera1";
right_name = "/camera2";
head_name = "/camera";
left_namespace = "/camera1";
right_namespace = "/camera2";
head_namespace = "/camera";
};
// create camera parameter client
create_params_client(camera_left, this, left_namespace + left_name, "left", left_sign);
create_params_client(camera_right, this, right_namespace + right_name, "right", right_sign);
create_params_client(camera_head, this, head_namespace + head_name, "head", head_sign);
if (head_sign) {
camera_head.reset();
color_exposure = this->create_client<raw_params_type>(
head_name + "/set_color_exposure", rmw_qos_profile_services_default, client_cb_group_);
depth_exposure = this->create_client<raw_params_type>(
head_name + "/set_depth_exposure", rmw_qos_profile_services_default, client_cb_group_);
color_gain = this->create_client<raw_params_type>(
head_name + "/set_color_gain", rmw_qos_profile_services_default, client_cb_group_);
depth_gain = this->create_client<raw_params_type>(
head_name + "/set_depth_gain", rmw_qos_profile_services_default, client_cb_group_);
color_raw_control = this->create_client<std_srvs::srv::SetBool>(
head_name + "/toggle_color", rmw_qos_profile_services_default, client_cb_group_);
depth_raw_control = this->create_client<std_srvs::srv::SetBool>(
head_name + "/toggle_depth", rmw_qos_profile_services_default, client_cb_group_);
}
// create params control service
raw_params_service = this->create_service<interfaces::srv::SetCameraRawParams>(
"set_camera_raw_params",
std::bind(
&CameraRawControlNode::raw_params_callback,
this,
std::placeholders::_1,
std::placeholders::_2
)
);
// create status control service
raw_status_service = this->create_service<interfaces::srv::SetCameraRawStatus>(
"set_camera_raw_status",
[this](
const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request,
const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> &response
) {
raw_status_callback(request, response);
}
);
}
void CameraRawControlNode::raw_params_callback(
const std::shared_ptr<interfaces::srv::SetCameraRawParams::Request> &request,
const std::shared_ptr<interfaces::srv::SetCameraRawParams::Response> &response
) {
if (request->camera_position == "left") {
string prefix;
if (!left_sign) {
response->success = false;
response->info = "Left camera params service is unavailable";
return;
} else if (request->raw == "color") {
prefix = "rgb_camera";
} else if (request->raw == "depth") {
prefix = "depth_module";
} else {
response->success = false;
response->info = "raw is wrong: " + request->raw;
return;
}
rclcpp::Parameter exposure(prefix + ".exposure", request->exposure);
rclcpp::Parameter gain(prefix + ".gain", request->gain);
auto results = camera_left->set_parameters({exposure, gain});
response->success = true;
for (auto &result : results) {
if (!result.successful) {
response->success = false;
response->info += result.reason + " | ";
}
}
} else if (request->camera_position == "right") {
string prefix;
if (!right_sign) {
response->success = false;
response->info = "Right camera params service is unavailable";
return;
}
if (request->raw == "color") {
prefix = "rgb_camera";
} else if (request->raw == "depth") {
prefix = "depth_module";
} else {
response->success = false;
response->info = "raw is wrong: " + request->raw;
return;
}
rclcpp::Parameter exposure(prefix + ".exposure", request->exposure);
rclcpp::Parameter gain(prefix + ".gain", request->gain);
auto results = camera_right->set_parameters({exposure, gain});
response->success = true;
for (auto &result : results) {
if (!result.successful) {
response->success = false;
response->info += result.reason + " | ";
}
}
} else if (request->camera_position == "head") {
string prefix;
if (!head_sign) {
response->success = false;
response->info = "Head camera params service is unavailable";
return;
}
if (request->raw == "color") {
// /camera/set_color_exposure
// /camera/set_color_gain
auto exposure_request = std::make_shared<raw_params_type::Request>();
auto gain_request = std::make_shared<raw_params_type::Request>();
exposure_request->data = request->exposure;
gain_request->data = request->gain;
auto future_exposure = color_exposure->async_send_request(exposure_request);
auto future_gain = color_gain->async_send_request(gain_request);
response->success = true;
auto result_exposure = future_exposure.get();
auto result_gain = future_gain.get();
response->success = response->success && result_exposure->success;
if (!response->success) {
response->info += result_exposure->message;
}
response->success = response->success && result_gain->success;
if (!response->success) {
response->info += result_gain->message;
}
return;
} else if (request->raw == "depth") {
// /camera/set_depth_exposure
// /camera/set_depth_gain
auto exposure_request = std::make_shared<raw_params_type::Request>();
auto gain_request = std::make_shared<raw_params_type::Request>();
exposure_request->data = request->exposure;
gain_request->data = request->gain;
auto future_exposure = depth_exposure->async_send_request(exposure_request);
auto future_gain = depth_gain->async_send_request(gain_request);
response->success = true;
auto result_exposure = future_exposure.get();
auto result_gain = future_gain.get();
response->success = response->success && result_exposure->success;
if (!response->success) {
response->info += result_exposure->message;
}
response->success = response->success && result_gain->success;
if (!response->success) {
response->info += result_gain->message;
}
return;
} else {
response->success = false;
response->info = "raw is wrong: " + request->raw;
return;
}
} else {
response->success = false;
response->info = ("camera position is wrong: " + request->camera_position);
}
}
void CameraRawControlNode::raw_status_callback(
const std::shared_ptr<interfaces::srv::SetCameraRawStatus::Request> &request,
std::shared_ptr<interfaces::srv::SetCameraRawStatus::Response> const &response
) {
std::shared_ptr<rclcpp::SyncParametersClient> camera = nullptr;
if (request->camera_position == "left") {
if (left_sign) {
camera = camera_left;
} else {
response->success = false;
response->info = "Left camera params service is unavailable.";
return;
}
} else if (request->camera_position == "right") {
if (right_sign) {
camera = camera_right;
} else {
response->success = false;
response->info = "Right camera params service is unavailable.";
return;
}
} else if (request->camera_position == "head") {
if (head_sign) {
auto request_c = std::make_shared<std_srvs::srv::SetBool::Request>();
auto request_d = std::make_shared<std_srvs::srv::SetBool::Request>();
request_c->data = request->color_raw;
request_d->data = request->depth_raw;
auto future_c = color_raw_control->async_send_request(request_c);
auto future_d = depth_raw_control->async_send_request(request_d);
auto result_c = future_c.get();
auto result_d = future_d.get();
response->success = true;
response->success = response->success && result_c->success;
if (!response->success) {
response->info += result_c->message;
}
response->success = response->success && result_d->success;
if (!response->success) {
response->info += " | ";
response->info += result_d->message;
}
return;
} else {
response->success = false;
response->info = "Head camera params service is unavailable.";
return;
}
} else {
response->success = false;
response->info = ("camera position is wrong: " + request->camera_position);
return;
}
rclcpp::Parameter color_raw("enable_color", request->color_raw);
rclcpp::Parameter depth_raw("enable_depth", request->depth_raw);
rclcpp::Parameter ir_raw("enable_ir", request->ir_raw);
auto results = camera->set_parameters({color_raw, depth_raw, ir_raw});
response->success = false;
for (auto &result : results) {
if (!result.successful) {
response->success = response->success || false;
response->info += result.reason + " | ";
} else {
response->success = response->success || true;
}
}
return;
}
int main(int argc, char *argv[]) {
rclcpp::init(argc, argv);
auto node = std::make_shared<CameraRawControlNode>("camera_raw_control_node");
auto executor = rclcpp::executors::MultiThreadedExecutor();
try {
executor.add_node(node);
executor.spin();
executor.remove_node(node);
} catch (...) {
}
node.reset();
if (rclcpp::ok()) {
rclcpp::shutdown();
}
return 0;
}

Binary file not shown.

View File

@@ -1,29 +0,0 @@
{
"info": {},
"warring": {
"0000": "Success",
"1000": "Detected object count is 0",
"1001": "Depth crop is None",
"1003": "Failed to detect a valid pose",
"1100": "Object point cloud contains excessive noise",
"1101": "The point cloud is empty",
"1200": "The number of points is insufficient to compute an OBB",
"1201": "PCA output vector is None",
"1202": "This pose cannot be grab, and position refine fail",
"1300": "E2E model input data 'coors' are fewer than 128",
"1301": "E2E model input data 'point_clouds' are fewer than 128",
"1302": "The 'true num' of points is 0; No graspable points are available",
"1303": "The model returned no predictions",
"1304": "All rotation vector processing failed; no valid rotation matrix was generated"
},
"error": {},
"fatal": {}
}

View File

@@ -0,0 +1,101 @@
{
"node_name": "default_config_detect_service",
"output_boxes": false,
"output_masks": false,
"save_image": true,
"image_save_dir": "~/images",
"calibration": {
"left_hand": "calibration/eye_in_left_hand.json",
"right_hand": "calibration/eye_in_right_hand.json",
"head": "calibration/eye_to_hand.json"
},
"node_mode": "ACTION",
"service_node_configs": {
"service_name": "/vision_object_recognition"
},
"publisher_node_configs": {
"publish_time": 0.1,
"position": "right",
"publisher_name": "/detect/pose"
},
"action_node_configs": {
"action_name": "/vision_object_recognition"
},
"image_source": "DRIVER",
"preprocess_configs": {
"distortion": false,
"denoising": false,
"enhancement": false,
"quality": false,
"quality_threshold": 100.0
},
"driver_source_configs": {
"subscription_name": "/img_msg"
},
"direct_source_configs": {
"position": "right",
"color_image_topic_name": "/camera/color/image_raw",
"depth_image_topic_name": "/camera/depth/image_raw",
"camera_info_topic_name": "/camera/color/camera_info"
},
"topic_source_configs": {
"left": [
"/camera1/camera1/color/image_raw",
"/camera1/camera1/aligned_depth_to_color/image_raw",
"/camera1/camera1/color/camera_info"
],
"right": [
"/camera2/camera2/color/image_raw",
"/camera2/camera2/aligned_depth_to_color/image_raw",
"/camera2/camera2/color/camera_info"
],
"head": [
"/camera/color/image_raw",
"/camera/depth/image_raw",
"/camera/color/camera_info"
]
},
"detect_mode": "OBJECT",
"object_detector_configs": {
"checkpoint_path": "checkpoints/yolo11s-seg.pt",
"confidence": 0.70,
"label_map_path": "map/label/medical_sense.json",
"classes": []
},
"color_detector_configs": {
"distance": 1500,
"color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
},
"crossboard_detector_configs": {
"pattern_size": [8, 5]
},
"estimate_mode": "PCA",
"pca_estimator_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.004
},
"icp_estimator_configs": {
"complete_model_path": "pointclouds/bottle_model.pcd",
"depth_scale": 1000.0,
"depth_trunc": 2.0,
"voxel_size": 0.002,
"ransac_voxel_size": 0.005,
"icp_voxel_radius": [0.004, 0.002, 0.001],
"icp_max_iter": [50, 30, 14]
},
"gsnet_estimator_configs": {
"checkpoint_path": "checkpoints/posenet.pt",
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010,
"collision_thresh": 0.00
},
"refine_mode": "NO"
}

View File

@@ -0,0 +1,84 @@
{
"node_name": "default_config_detect_service",
"output_boxes": false,
"output_masks": false,
"save_image": true,
"image_save_dir": "~/images",
"calibration": {
"left_hand": "calibration/eye_in_left_hand.json",
"right_hand": "calibration/eye_in_right_hand.json",
"head": "calibration/eye_to_hand.json"
},
"node_mode": "SERVICE",
"service_node_configs": {
"service_name": "/vision_object_recognition"
},
"publisher_node_configs": {
"publish_time": 0.1,
"position": "right",
"publisher_name": "/detect/pose"
},
"action_node_configs": {
"action_name": "/vision_object_recognition"
},
"image_source": "DRIVER",
"preprocess_configs": {
"distortion": false,
"denoising": false,
"enhancement": false,
"quality": false,
"quality_threshold": 100.0
},
"driver_source_configs": {
"subscription_name": "/img_msg"
},
"direct_source_configs": {
"position": "right",
"color_image_topic_name": "/camera/color/image_raw",
"depth_image_topic_name": "/camera/depth/image_raw",
"camera_info_topic_name": "/camera/color/camera_info"
},
"detect_mode": "OBJECT",
"object_detector_configs": {
"checkpoint_path": "checkpoints/yolo11s-seg.onnx",
"confidence": 0.70,
"label_map_path": "map/label/medical_sense.json",
"classes": []
},
"color_detector_configs": {
"distance": 1500,
"color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
},
"crossboard_detector_configs": {
"pattern_size": [8, 5]
},
"estimate_mode": "PCA",
"pca_estimator_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.004
},
"icp_estimator_configs": {
"complete_model_path": "pointclouds/bottle_model.pcd",
"depth_scale": 1000.0,
"depth_trunc": 2.0,
"voxel_size": 0.002,
"ransac_voxel_size": 0.005,
"icp_voxel_radius": [0.004, 0.002, 0.001],
"icp_max_iter": [50, 30, 14]
},
"gsnet_estimator_configs": {
"checkpoint_path": "checkpoints/posenet.pt",
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010,
"collision_thresh": 0.00
},
"refine_mode": "FIXED"
}

View File

@@ -25,7 +25,7 @@
"PCA_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.005
"voxel_size": 0.004
},
"E2E_configs": {
"checkpoint_path": "checkpoints/posenet.pt",

View File

@@ -0,0 +1,47 @@
{
"node_name": "source_test_node",
"output_boxes": false,
"output_masks": false,
"save_image": false,
"image_save_dir": "~/images",
"calibration": {
"left_hand": "calibration/eye_in_left_hand.json",
"right_hand": "calibration/eye_in_right_hand.json",
"head": "calibration/eye_to_hand.json"
},
"node_mode": "SERVICE",
"service_node_configs": {
"service_name": "/vision_object_recognition"
},
"publisher_node_configs": {
"publish_time": 0.1,
"position": "right",
"publisher_name": "/source_test/processed_image"
},
"action_node_configs": {
"action_name": "/vision_object_recognition"
},
"image_source": "DIRECT",
"direct_source_configs": {
"position": "right",
"color_image_topic_name": "/camera/color/image_raw",
"depth_image_topic_name": "/camera/depth/image_raw",
"camera_info_topic_name": "/camera/color/camera_info"
},
"preprocess_configs": {},
"detect_mode": "OBJECT",
"object_detector_configs": {},
"color_detector_configs": {},
"crossboard_detector_configs": {},
"estimate_mode": "PCA",
"pca_estimator_configs": {},
"icp_estimator_configs": {},
"gsnet_estimator_configs": {},
"refine_mode": "FIXED"
}

View File

@@ -1,59 +0,0 @@
{
"node_name": "default_config_detect_service",
"output_boxes": "True",
"output_masks": "False",
"calibration": {
"left_hand": "configs/hand_eye_mat/eye_in_left_hand.json",
"right_hand": "configs/hand_eye_mat/eye_in_right_hand.json",
"head": "configs/hand_eye_mat/eye_to_hand.json"
},
"get_camera_mode": "Service",
"Service_configs": {
"service_name": "/vision_object_recognition"
},
"Topic_configs": {
"position": "right",
"color_image_topic_name": "/camera/color/image_raw",
"depth_image_topic_name": "/camera/depth/image_raw",
"camera_info_topic_name": "/camera/color/camera_info"
},
"detect_mode": "Detect",
"Detect_configs": {
"checkpoint_path": "checkpoints/yolo11s-seg.pt",
"confidence": 0.50,
"classes": []
},
"Color_configs": {
"distance": 1500,
"color_range": [[[0, 120, 70], [10, 255, 255]], [[170, 120, 70], [180, 255, 255]]]
},
"Crossboard_configs": {
"pattern_size": [8, 5]
},
"calculate_mode": "PCA",
"PCA_configs": {
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010
},
"ICP_configs": {
"complete_model_path": "pointclouds/bottle_model.pcd",
"depth_scale": 1000.0,
"depth_trunc": 2.0,
"voxel_size": 0.010,
"ransac_voxel_size": 0.005,
"icp_voxel_radius": [0.004, 0.002, 0.001],
"icp_max_iter": [50, 30, 14]
},
"E2E_configs": {
"checkpoint_path": "checkpoints/posenet.pt",
"depth_scale": 1000.0,
"depth_trunc": 3.0,
"voxel_size": 0.010,
"collision_thresh": 0.01
}
}

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/bottle_detect_service_icp.json')
config_dir = os.path.join(share_dir, 'configs/launch/bottle_detect_service_icp.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/bottle_detect_service_pca.json')
config_dir = os.path.join(share_dir, 'configs/launch/bottle_detect_service_pca.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/crossboard_topic_pca.json')
config_dir = os.path.join(share_dir, 'configs/launch/crossboard_topic_pca.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/detect_service_pca.json')
config_dir = os.path.join(share_dir, 'configs/launch/detect_service_pca.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/default_config.json')
config_dir = os.path.join(share_dir, 'configs/launch/default_service_config.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -9,7 +9,7 @@ from ament_index_python.packages import get_package_share_directory
share_dir = get_package_share_directory('vision_detect')
config_dir = os.path.join(share_dir, 'configs/launch_configs/medical_sense.json')
config_dir = os.path.join(share_dir, 'configs/launch/medical_sense.json')
with open(config_dir, "r") as f:
configs = json.load(f)

View File

@@ -0,0 +1,18 @@
from launch import LaunchDescription
from launch.actions import ExecuteProcess, TimerAction
def cmd(_cmd, period=None, output="screen"):
command = ExecuteProcess(cmd=_cmd, output=output, sigterm_timeout='5', sigkill_timeout='5')
if period is None:
return command
else:
return TimerAction(period=float(period), actions=[command])
def generate_launch_description():
return LaunchDescription([
cmd(['ros2', 'launch', 'img_dev', 'img_dev.launch.py']),
cmd(['ros2', 'launch', 'vision_detect', 'test_action_medical_sense.launch.py'], period=2.0),
cmd(['ros2', 'run', 'vision_detect', 'test_action_client'], period=2.0)
])

View File

@@ -0,0 +1,47 @@
from launch import LaunchDescription
from launch_ros.actions import Node
from launch.actions import DeclareLaunchArgument, OpaqueFunction
from launch.substitutions import LaunchConfiguration
import os
import json
from ament_index_python.packages import get_package_share_directory
SHARED_DIR = get_package_share_directory('vision_detect')
CONFIGS_PATH = os.path.join(SHARED_DIR, 'configs/launch/default_action_config.json')
LOGGING_MAP_PATH = os.path.join(SHARED_DIR, 'map/logging/report_logging_define.json')
def get_name(path):
with open(path, "r") as f:
name = json.load(f)["node_name"]
return name
def generate_launch_description():
args_detect = [
DeclareLaunchArgument('logging_map_path', default_value=LOGGING_MAP_PATH),
DeclareLaunchArgument('configs_path', default_value=CONFIGS_PATH)
]
def create_detect_node(context):
logging_map_path = LaunchConfiguration('logging_map_path').perform(context)
configs_path = LaunchConfiguration('configs_path').perform(context)
return [
Node(
package='vision_detect',
executable='detect_node_test',
name=get_name(CONFIGS_PATH),
output="screen",
parameters=[{
'logging_map_path': logging_map_path,
'configs_path': configs_path
}]
)
]
return LaunchDescription(args_detect + [
OpaqueFunction(function=create_detect_node),
])

View File

@@ -0,0 +1,18 @@
from launch import LaunchDescription
from launch.actions import ExecuteProcess, TimerAction
def cmd(_cmd, period=None, output="screen"):
command = ExecuteProcess(cmd=_cmd, output=output, sigterm_timeout='5', sigkill_timeout='5')
if period is None:
return command
else:
return TimerAction(period=float(period), actions=[command])
def generate_launch_description():
return LaunchDescription([
cmd(['ros2', 'launch', 'img_dev', 'img_dev.launch.py']),
cmd(['ros2', 'launch', 'vision_detect', 'test_service_medical_sense.launch.py'], period=2.0),
cmd(['ros2', 'run', 'vision_detect', 'service_client_node'], period=2.0)
])

View File

@@ -0,0 +1,47 @@
from launch import LaunchDescription
from launch_ros.actions import Node
from launch.actions import DeclareLaunchArgument, OpaqueFunction
from launch.substitutions import LaunchConfiguration
import os
import json
from ament_index_python.packages import get_package_share_directory
SHARED_DIR = get_package_share_directory('vision_detect')
CONFIGS_PATH = os.path.join(SHARED_DIR, 'configs/launch/default_service_config.json')
LOGGING_MAP_PATH = os.path.join(SHARED_DIR, 'map/logging/report_logging_define.json')
def get_name(path):
with open(path, "r") as f:
name = json.load(f)["node_name"]
return name
def generate_launch_description():
args_detect = [
DeclareLaunchArgument('logging_map_path', default_value=LOGGING_MAP_PATH),
DeclareLaunchArgument('configs_path', default_value=CONFIGS_PATH)
]
def create_detect_node(context):
logging_map_path = LaunchConfiguration('logging_map_path').perform(context)
configs_path = LaunchConfiguration('configs_path').perform(context)
return [
Node(
package='vision_detect',
executable='detect_node_test',
name=get_name(CONFIGS_PATH),
output="screen",
parameters=[{
'logging_map_path': logging_map_path,
'configs_path': configs_path
}]
)
]
return LaunchDescription(args_detect + [
OpaqueFunction(function=create_detect_node),
])

View File

@@ -0,0 +1,5 @@
{
"bottle_plate": 0,
"medicine_box": 1,
"paper": 2
}

View File

@@ -0,0 +1,56 @@
{
"info": {},
"warning": {
"0000": "Success",
"0100": "Fail to load config file: File is not open or JSON parse error",
"0101": "config file have no attribute 'node_name'",
"0102": "config file have no attribute 'output_boxe' or 'output_mask'",
"0103": "Fail to load source part",
"0104": "Fail to load detect part",
"0105": "Fail to load estimate part",
"0110": "Cannot load calibration file, use E(4, 4)",
"0111": "Calibration file have not KEY: 'T', use E(4, 4)",
"0112": "Calibration file has wrong shape of mat, use E(4, 4)",
"0200": "Have not receive any camera data",
"0201": "Receive wrong position, or this position have no camera data",
"0202": "All input position have no camera data",
"0210": "The image is too blurry.",
"0300": "Worker thread is not alive",
"0301": "Can't submit task, task executor is already stop",
"0302": "Task is aborted",
"0303": "Worker time out",
"0400": "task executor internal error",
"1000": "Detected object count is 0",
"1001": "Depth crop is None",
"1003": "Failed to detect a valid pose",
"1010": "No specified color within the designated distance.",
"1020": "Didn't detect Crossboard",
"1100": "Object point cloud contains excessive noise",
"1101": "The point cloud is empty",
"1102": "Points is too closely",
"1200": "The number of points is insufficient to compute an OBB",
"1201": "PCA output vector is None",
"1202": "This pose cannot be grab, and position refine fail",
"1203": "All pose refine failed",
"1210": "No object can be estimate",
"1300": "E2E model input data 'coors' are fewer than 128",
"1301": "E2E model input data 'point_clouds' are fewer than 128",
"1302": "The 'true num' of points is 0; No graspable points are available",
"1303": "The model returned no predictions",
"1304": "All rotation vector processing failed; no valid rotation matrix was generated"
},
"error": {},
"fatal": {}
}

View File

@@ -17,23 +17,18 @@ data_files = [
('share/' + package_name, ['package.xml']),
('share/' + package_name + '/launch', glob('launch/*.launch.py')),
('share/' + package_name + '/configs', glob('configs/*.json')),
('share/' + package_name + '/configs/flexiv_configs',
glob('configs/flexiv_configs/*.json')),
('share/' + package_name + '/configs/hand_eye_mat', glob('configs/hand_eye_mat/*.json')),
('share/' + package_name + '/configs/launch_configs',
glob('configs/launch_configs/*.json')),
('share/' + package_name + '/configs/error_configs', glob('configs/error_configs/*.json')),
('share/' + package_name + '/configs/flexiv', glob('configs/flexiv/*.json')),
('share/' + package_name + '/configs/launch', glob('configs/launch/*.json')),
('share/' + package_name + '/calibration', glob('calibration/*.json')),
('share/' + package_name + '/map/logging', glob('map/logging/*.json')),
('share/' + package_name + '/map/label', glob('map/label/*.json')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.pt')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.onnx')),
('share/' + package_name + '/checkpoints', glob('checkpoints/*.engine')),
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/pointnet2/pointnet2',
glob('vision_detect/VisionDetect/net/pointnet2/pointnet2/*.so')),
('lib/python3.10/site-packages/' + package_name + '/VisionDetect/net/knn/knn_pytorch',
glob('vision_detect/VisionDetect/net/knn/knn_pytorch/*.so'))
]
('share/' + package_name + '/pointclouds', glob('pointclouds/*.pcd'))
]
data_files.extend(openvino_files)
@@ -41,9 +36,19 @@ setup(
name=package_name,
version='0.0.0',
packages=find_packages(exclude=['test']),
package_data={
'vision_detect': [
'vision_core/model/pointnet2/pointnet2/*.so',
'vision_core/model/knn/knn_pytorch/*.so'
]
},
data_files=data_files,
install_requires=['setuptools'],
zip_safe=True,
install_requires=[
'setuptools',
'numpy >= 1.23.0, < 2.0.0',
'opencv-python > 4.0.0, < 4.12.0',
],
zip_safe=False,
include_package_data=True,
maintainer='lyx',
maintainer_email='lyx@todo.todo',
@@ -59,10 +64,15 @@ setup(
'flexiv_detect_service_node = vision_detect.flexivaidk_detect_service:main',
'sub_pose_node = vision_detect.sub_pose:main',
'service_client_node = vision_detect.service_client:main',
'get_camera_pose_node = vision_detect.get_camera_pose:main',
'detect_node = vision_detect.detect_node:main',
'source_test_node = vision_detect.source_test_node:main',
'service_client_node = vision_detect.service_client:main',
'test_action_client = vision_detect.action_client_node:main',
'once_test_action_client = vision_detect.action_client_once:main',
'concurrent_test_action_client = vision_detect.action_client_once_concurrent:main'
],
},
)

View File

@@ -21,5 +21,5 @@ import pytest
def test_flake8():
rc, errors = main_with_errors(argv=[])
assert rc == 0, \
'Found %d code style errors / warnings:\n' % len(errors) + \
'Found %d enum style errors / warnings:\n' % len(errors) + \
'\n'.join(errors)

View File

@@ -20,4 +20,4 @@ import pytest
@pytest.mark.pep257
def test_pep257():
rc = main(argv=['.', 'test'])
assert rc == 0, 'Found code style errors / warnings'
assert rc == 0, 'Found enum style errors / warnings'

View File

@@ -1,3 +0,0 @@
__all__ = []

View File

@@ -1 +0,0 @@
from .gsnet import *

View File

@@ -1,132 +0,0 @@
import os
import sys
import numpy as np
import argparse
from PIL import Image
import torch
import open3d as o3d
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(ROOT_DIR)
from models.graspnet import GraspNet, pred_decode
import collections.abc as container_abcs
import MinkowskiEngine as ME
from utils.collision_detector import ModelFreeCollisionDetector
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint_path', default='checkpoints/minkuresunet_realsense.tar')
parser.add_argument('--dump_dir', help='Dump dir to save outputs', default='results/')
parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
parser.add_argument('--collision_thresh', type=float, default=0.01,
help='Collision Threshold in collision detection [default: 0.01]')
parser.add_argument('--voxel_size_cd', type=float, default=0.01,
help='Voxel Size for collision detection')
parser.add_argument('--infer', action='store_true', default=True)
parser.add_argument('--vis', action='store_true', default=True)
cfgs = parser.parse_args()
# ------------------------------------------------------------------------- GLOBAL CONFIG BEG
# if not os.path.exists(cfgs.dump_dir):
# os.mkdir(cfgs.dump_dir)
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
[d["feats"] for d in list_data])
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
coordinates_batch, features_batch, return_index=True, return_inverse=True)
res = {
"coors": coordinates_batch,
"feats": features_batch,
"quantize2original": quantize2original
}
def collate_fn_(batch):
if type(batch[0]).__module__ == 'numpy':
return torch.stack([torch.from_numpy(b) for b in batch], 0)
elif isinstance(batch[0], container_abcs.Sequence):
return [[torch.from_numpy(sample) for sample in b] for b in batch]
elif isinstance(batch[0], container_abcs.Mapping):
for key in batch[0]:
if key == 'coors' or key == 'feats':
continue
res[key] = collate_fn_([d[key] for d in batch])
return res
res = collate_fn_(list_data)
return res
def data_process(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
index = 0
camera_poses = np.array([np.eye(4)]) # 相机位姿
align_mat = np.eye(4) # Camera_0 相对桌面的位姿
trans = np.dot(align_mat, camera_poses[int(index)])
pcd.transform(trans)
points = np.asarray(pcd.points)
ret_dict = {
'point_clouds': points.astype(np.float32),
'coors': points.astype(np.float32) / voxel_size,
'feats': np.ones_like(points).astype(np.float32),
}
return ret_dict
def get_grasp_dict(preds):
grasp_dict = {
"score": preds[:, 0],
"width": preds[:, 1],
"height": preds[:, 2],
"depth": preds[:, 3],
"rotation": preds[:, 4:13].reshape(-1, 3, 3),
"translation": preds[:, 13:16].reshape(-1, 3),
"object_id": preds[:, 16]
}
return grasp_dict
def inference(pcd:o3d.geometry.PointCloud, voxel_size:float = 0.005):
data_input = data_process(pcd, voxel_size)
batch_data = minkowski_collate_fn([data_input])
net = GraspNet(seed_feat_dim=512, is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
net.to(device)
# Load checkpoint
checkpoint_path = os.path.join(ROOT_DIR, cfgs.checkpoint_path)
net.load_state_dict(checkpoint_path)
net.eval()
for key in batch_data:
batch_data[key] = batch_data[key].to(device)
# Forward pass
with torch.no_grad():
end_points = net(batch_data)
grasp_preds = pred_decode(end_points)
preds = grasp_preds[0].detach().cpu().numpy()
sorted_index = np.argsort(-preds[:, 0])
preds = preds[sorted_index]
preds = preds[:10]
# collision detection
if cfgs.collision_thresh > 0:
cloud = data_input['point_clouds']
mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
collision_mask = mfcdetector.detect(get_grasp_dict(preds), approach_dist=0.05,
collision_thresh=cfgs.collision_thresh)
preds = preds[~collision_mask]
return preds
if __name__ == '__main__':
data_dict = data_process()
inference(data_dict)

View File

@@ -1,3 +0,0 @@
from .detect_node import DetectNode
__all__ = ['detect_node']

View File

@@ -1,144 +0,0 @@
import os
import json
from ament_index_python.packages import get_package_share_directory
from rclpy.node import Node
import numpy as np
import open3d as o3d
import yaml
share_dir = get_package_share_directory('vision_detect')
__all__ = [
"ConfigBase"
]
class ConfigBase(Node):
SHARE_DIR = get_package_share_directory('vision_detect')
with open(os.path.join(
SHARE_DIR, "configs/error_configs/report_logging_define.json"), "r"
) as f:
WARNING_LOG_MAP = json.load(f)["warring"]
def __init__(self, name):
super().__init__(name)
"""init parameter"""
self.confidence = None
self.depth_image_topic_name = None
self.color_image_topic_name = None
self.camera_info_topic_name = None
self.service_name = None
self.pattern_size = None
self.device = None
self.configs = None
self.calculate_configs = None
self.checkpoint_path = None
self.output_boxes = None
self.output_masks = None
self.function = None
self.source = None
self.server = None
self.model = None
self.k = self.d = None
self.eye_in_left_hand_mat = None
self.eye_in_right_hand_mat = None
self.eye_to_hand_mat = None
self.hand_eye_mat = None
self.get_camera_mode = None
self.detect_mode = None
self.calculate_mode = None
self.camera_size = None
self.position = None
self.e2e_model = None
self.map1 = self.map2 = None
self.calculate_function = None
self.fx = self.fy = 0.5
self.camera_data = {}
self.distance = 1500
self.color_range = [
[[0, 120, 70], [10, 255, 255]],
[[170, 120, 70], [180, 255, 255]]
]
self._get_param()
def _get_param(self):
"""init parameter"""
self.declare_parameter(
'configs_path',
os.path.join(share_dir, "configs/launch_configs/default_config.json")
)
configs_path = self.get_parameter('configs_path').value
with open(configs_path, 'r') as f:
self.configs = json.load(f)
self.output_boxes = self.configs['output_boxes'].lower() == "true"
self.output_masks = self.configs['output_masks'].lower() == "true"
self.get_camera_mode = self.configs['get_camera_mode']
self.detect_mode = self.configs['detect_mode']
self.calculate_mode = self.configs['calculate_mode']
# self._read_calibration_mat()
if self.get_camera_mode == "Service":
self.service_name = self.configs["Service_configs"]["service_name"]
elif self.get_camera_mode == "Topic":
topic_configs = self.configs['Topic_configs']
self.color_image_topic_name = topic_configs["color_image_topic_name"]
self.depth_image_topic_name = topic_configs["depth_image_topic_name"]
self.camera_info_topic_name = topic_configs["camera_info_topic_name"]
self.position = topic_configs["position"]
else:
self.service_name = self.configs["Service"]["service_name"]
if self.detect_mode == 'Detect':
detect_configs = self.configs['Detect_configs']
self.confidence = detect_configs["confidence"]
self.classes = detect_configs["classes"]
if not self.classes:
self.classes = None
self.checkpoint_path = os.path.join(share_dir, detect_configs["checkpoint_path"])
elif self.detect_mode == 'Color':
self.color_range = self.configs["Color_configs"]["color_range"]
self.distance = self.configs["Color_configs"]["distance"]
self.color_range = [[np.array(lower), np.array(upper)] for lower, upper in
self.color_range]
elif self.detect_mode == 'Crossboard':
self.pattern_size = self.configs["Crossboard_configs"]["pattern_size"]
else:
self.get_logger().warning("Unknown mode, use detect")
detect_configs = self.configs['Detect_configs']
self.confidence = detect_configs["confidence"]
self.classes = detect_configs["classes"]
if not self.classes:
self.classes = None
self.checkpoint_path = detect_configs["checkpoint_path"]
if self.calculate_mode == "PCA":
self.calculate_configs = self.configs['PCA_configs']
elif self.calculate_mode == "ICP" and self.detect_mode == 'Detect':
self.calculate_configs = self.configs['ICA_configs']
source = o3d.io.read_point_cloud(
os.path.join(share_dir, self.calculate_configs['complete_model_path'])
)
self.calculate_configs["source"] = source
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
self.calculate_configs = self.configs['E2E_configs']
else:
self.get_logger().warning("Unknown calculate_mode, use PCA")
self.calculate_configs = self.configs['PCA_configs']
self.get_logger().info("Get parameters done")

View File

@@ -1,420 +0,0 @@
"""Vision Detect Node"""
import os
import threading
import time
import cv2
import numpy as np
import open3d as o3d
import rclpy
from cv_bridge import CvBridge
from sensor_msgs.msg import CameraInfo
from geometry_msgs.msg import Pose, Point, Quaternion
from interfaces.msg import PoseClassAndID, PoseArrayClassAndID
from ..utils import distortion_correction, crop_imgs_box_xywh, draw_box, draw_mask, rmat2quat, \
crop_imgs_mask, get_map, create_o3d_pcd, save_img, refine_grasp_pose
from .init_node import InitBase
E2E_SIGN = True
try:
from ..utils import calculate_pose_e2e
except (ImportError, ModuleNotFoundError):
E2E_SIGN = False
class DetectNode(InitBase):
"""Detect Node"""
def __init__(self, name):
super().__init__(name)
self.cv_bridge = CvBridge()
self.lock = threading.Lock()
def _camera_info_callback(self, msg: CameraInfo):
"""Get camera info"""
self.k = msg.k
self.d = msg.d
self.camera_size = [msg.width, msg.height]
if self.k is not None and len(self.k) > 0 and self.d is not None and len(self.d) > 0:
self.map1, self.map2, self.k = get_map(msg.k, msg.d, self.camera_size)
if not self.future.done():
self.future.set_result(True)
self.destroy_subscription(self.sub_camera_info)
else:
self.get_logger().warning("K and d are not defined")
def _service_sub_callback(self, msgs):
"""同步回调函数"""
with self.lock:
# self.get_logger().info("get msgs")
self.camera_data[msgs.position] = [
msgs.image_color,
msgs.image_depth,
msgs.karr,
msgs.darr
]
def _sync_sub_callback(self, color_img_ros, depth_img_ros):
"""同步回调函数"""
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, self.map1, self.map2)
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
# masks为空结束这一帧
if img is None:
img = self.cv_bridge.cv2_to_imgmsg(color_img_cv, "bgr8")
if self.output_boxes or self.output_masks:
self.pub_detect_image.publish(img)
if pose_list:
pose_list_all = PoseArrayClassAndID()
for item in pose_list:
pose_list_all.objects.append(
PoseClassAndID(
class_name = item["class_name"],
class_id = item["class_id"],
pose = item["pose"],
grab_width = item["grab_width"]
)
)
pose_list_all.header.stamp = self.get_clock().now().to_msg()
pose_list_all.header.frame_id = "pose_list"
self.pub_pose_list.publish(pose_list_all)
def _service_callback(self, request, response):
print(" \n ")
print("========================== < start > ==========================")
response.header.stamp = self.get_clock().now().to_msg()
response.header.frame_id = "camera_detect"
with self.lock:
if request.camera_position in self.camera_data:
color_img_ros, depth_img_ros, self.k, d = self.camera_data[request.camera_position]
else:
if len(self.camera_data) == 0:
response.success = False
response.info = "Camera data have not objects"
response.objects = []
print("=========================== < end > ===========================")
return response
response.success = False
response.info = f"Name is wrong: {request.camera_position}"
response.objects = []
print("=========================== < end > ===========================")
return response
if request.camera_position == 'left':
self.hand_eye_mat = self.eye_in_left_hand_mat
self.p = "left"
elif request.camera_position == 'right':
self.hand_eye_mat = self.eye_in_right_hand_mat
self.p = "right"
else:
self.hand_eye_mat = self.eye_to_hand_mat
self.p = "head"
self.camera_size = [color_img_ros.width, color_img_ros.height]
color_img_cv = self.cv_bridge.imgmsg_to_cv2(color_img_ros, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(depth_img_ros, '16UC1')
map1, map2, self.k = get_map(self.k, d, self.camera_size)
color_img_cv, depth_img_cv = distortion_correction(color_img_cv, depth_img_cv, map1, map2)
img, pose_list, sign = self.function(color_img_cv, depth_img_cv)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(pose_list)])
response.info = "pose dict is empty"
response.success = False
response.objects = []
else:
response.info = "Success get pose"
response.success = True
for item in pose_list:
response.objects.append(
PoseClassAndID(
class_name = item["class_name"],
class_id = item["class_id"],
pose = item["pose"],
grab_width = item["grab_width"]
)
)
# publish detect image
if self.output_boxes or self.output_masks:
if img is None:
img = color_img_ros
self.pub_detect_image.publish(img)
print("=========================== < end > ===========================")
return response
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
pose_list = []
home = os.path.expanduser("~")
save_dir = os.path.join(home, "images")
save_img(rgb_img.copy(), "orign_image.png", save_dir=save_dir, mark_cur_time=True)
# Get Predict Results
time1 = time.time()
results = self.model(rgb_img, retina_masks=True, conf=self.confidence, classes=self.classes)
time2 = time.time()
result = results[0]
# Get masks
if result.masks is None or len(result.masks) == 0:
return None, 1000, False
masks = result.masks.data.cpu().numpy()
# Get boxes
boxes = result.boxes.xywh.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy()
labels = result.names
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
sorted_index = np.lexsort((-y_centers, x_centers))
masks = masks[sorted_index]
boxes = boxes[sorted_index]
class_ids = class_ids[sorted_index]
time3 = time.time()
self.get_logger().info(f"Detect object num: {len(masks)}")
full_points = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
)
time_full_points = time.time()
print(f"create full points: {(time_full_points - time3) * 1000}")
if self.calculate_mode == "E2E" and self.detect_mode == 'Detect' and E2E_SIGN:
self.calculate_configs["orign_point_clouds"] = create_o3d_pcd(
depth_img, self.camera_size, self.k, **self.calculate_configs
)
for i, (mask, box) in enumerate(zip(masks, boxes)):
imgs_crop, (x_min, y_min) = crop_imgs_box_xywh([depth_img, mask], box, True)
depth_crop, mask_crop = imgs_crop
if depth_crop is None:
self.get_logger().warning("Depth crop is None")
continue
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, grab_width, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics,
calculate_grab_width=True, **self.calculate_configs
)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
time_c = time.time()
if self.p == "left" or self.p == "right":
position = rmat[0:3, 3]
rmat, sign = refine_grasp_pose(
full_points, self.calculate_configs.get("voxel_size"), position,
search_mode=True
)
if not sign:
self.get_logger().warning(self.WARNING_LOG_MAP[str(rmat)])
continue
time_e = time.time()
print(f"Refine: {(time_e - time_c) * 1000} ms")
self.get_logger().info(f"grab_width: {grab_width}")
x, y, z, rw, rx, ry, rz = rmat2quat(self.hand_eye_mat @ rmat)
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
self.get_logger().info(f"xyz, wxyz: {x, y, z, rw, rx, ry, rz}")
pose_list.append(
{
"class_id": int(class_ids[i]),
"class_name": labels[class_ids[i]],
"pose": pose,
"grab_width": grab_width
}
)
time4 = time.time()
if not pose_list:
return None, 1003, False
self.get_logger().info(f'{(time2 - time1) * 1000} ms, model predict')
self.get_logger().info(f'{(time4 - time3) * 1000} ms, calculate all mask PCA')
self.get_logger().info(f'{(time4 - time1) * 1000} ms, completing a picture entire process')
# mask_img and box_img is or not output
if not self.output_boxes and not self.output_masks:
return None, pose_list, True
if self.output_boxes:
draw_box(rgb_img, result, save_dir=save_dir, mark_time = True)
if self.output_masks:
draw_mask(rgb_img, result)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
def _seg_color(self, rgb_img: np.ndarray, depth_img: np.ndarray):
"""Use segmentation model"""
pose_list = []
hsv_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2HSV)
depth_filter_mask = np.zeros_like(depth_img, dtype=np.uint8)
depth_filter_mask[(depth_img > 0) & (depth_img < self.distance)] = 1
hsv_img[depth_filter_mask == 0] = 0
mask_part_list = [cv2.inRange(hsv_img, color[0], color[1]) for color in self.color_range]
mask = sum(mask_part_list[1:], mask_part_list[0])
mask = mask // 255
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([255, 0, 0]) * 0.5
imgs_crop, mask_crop, (x_min, y_min) = crop_imgs_mask([depth_img], mask, True)
depth_crop = imgs_crop[0]
if depth_crop is None:
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
return None, 1001, False
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, _, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics, **self.calculate_configs
)
if not sign:
# self.get_logger().warning("Color Area point cloud have too many noise")
return None, 1100, False
T = self.hand_eye_mat @ rmat
x, y, z, rw, rx, ry, rz = rmat2quat(T)
if (x, y, z) != (0.0, 0.0, 0.0):
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_list.append(
{
"class_id": int(98),
"class_name": "red",
"pose": pose,
"grab_width": 0.0
}
)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
def _seg_crossboard(self, rgb_img, depth_img):
pose_list = []
rgb_img_gray = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(rgb_img_gray,
self.pattern_size, cv2.CALIB_CB_FAST_CHECK)
if ret:
# 角点亚像素精确化(提高标定精度)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
corners_subpix = corners_subpix.reshape(self.pattern_size[1], self.pattern_size[0], 2)
mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
for i in range(0, self.pattern_size[1] - 1):
for j in range(0, self.pattern_size[0] - 1):
pts = np.array([
corners_subpix[i, j],
corners_subpix[i, j + 1],
corners_subpix[i + 1, j + 1],
corners_subpix[i + 1, j]
], dtype=np.int32)
cv2.fillConvexPoly(mask, pts, 1)
rgb_img[mask > 0] = rgb_img[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
img_crop, mask_crop, (x_min, y_min) = crop_imgs_mask([depth_img], mask)
depth_crop = img_crop[0]
if depth_crop is None:
self.get_logger().warning(self.WARNING_LOG_MAP[str(1001)])
return None, 1001, False
intrinsics = o3d.camera.PinholeCameraIntrinsic(
int(self.camera_size[0]), int(self.camera_size[1]),
self.k[0], self.k[4],
self.k[2] - x_min, self.k[5] - y_min
)
rmat, _, sign = self.calculate_function(
mask_crop, depth_crop, intrinsics, **self.calculate_configs
)
if not sign:
# self.get_logger().warning("Corssboard point cloud have too many noise")
return None, 1100, False
x, y, z, rw, rx, ry, rz = rmat2quat(rmat)
self.get_logger().info(f"{x}, {y}, {z}, {rw}, {rx}, {ry}, {rz}")
pose = Pose()
pose.position = Point(x=x, y=y, z=z)
pose.orientation = Quaternion(w=rw, x=rx, y=ry, z=rz)
pose_list.append(
{
"class_id": int(99),
"class_name": 'crossboard',
"pose": pose,
"grab_width": 0.0
}
)
cv2.putText(
rgb_img, f'cs: x: {x:.3f}, y: {y:.3f}, z: {z:.3f}',
(0, 0 + 30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
)
cv2.putText(
rgb_img, f'quat: rw: {rw:.3f}, rx: {rx:.3f}, ry: {ry:.3f}, rz: {rz:.3f}',
(0, 0 + 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
)
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), pose_list, True
return self.cv_bridge.cv2_to_imgmsg(rgb_img, "bgr8"), None, True
if __name__ == '__main__':
rclpy.init(args=None)
node = DetectNode('detect')
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()

View File

@@ -1,213 +0,0 @@
import os
from ament_index_python.packages import get_package_share_directory
import torch
import numpy as np
from ultralytics import YOLO
import rclpy
from rclpy.task import Future
from message_filters import Subscriber, ApproximateTimeSynchronizer
from sensor_msgs.msg import Image, CameraInfo
from interfaces.msg import PoseArrayClassAndID, ImgMsg
from interfaces.srv import VisionObjectRecognition
from ..utils import calculate_pose_pca, calculate_pose_icp, read_calibration_mat
from .config_node import ConfigBase
E2E_SIGN = True
try:
from ..net import GraspNet
from ..utils import calculate_pose_e2e
except (ImportError, ModuleNotFoundError):
E2E_SIGN = False
class InitBase(ConfigBase):
def __init__(self, name):
super().__init__(name)
self.future = Future()
self._read_calibration_mat()
if self.get_camera_mode == "Service":
self._init_service()
elif self.get_camera_mode == "Topic":
if self.position == 'left':
self.hand_eye_mat = self.eye_in_left_hand_mat
elif self.position == 'right':
self.hand_eye_mat = self.eye_in_right_hand_mat
else:
self.hand_eye_mat = self.eye_to_hand_mat
else:
self._init_service()
if self.detect_mode == 'Detect':
self.function = self._seg_image
if not self.classes:
self.classes = None
self._init_model()
elif self.detect_mode == 'Color':
self.function = self._seg_color
elif self.detect_mode == 'Crossboard':
self.function = self._seg_crossboard
else:
self.function = self._seg_image
if not self.classes:
self.classes = None
self._init_model()
if self.calculate_mode == "PCA":
self.calculate_function = calculate_pose_pca
elif self.calculate_mode == "ICP" and self.detect_mode == 'Detect':
self.calculate_function = calculate_pose_icp
elif self.calculate_mode == "E2E" and self.detect_mode == 'Detect':
self.calculate_function = calculate_pose_e2e
self.e2e_model = GraspNet(seed_feat_dim=512, is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.e2e_model.to(device)
self.e2e_model.load_state_dict(
torch.load(
os.path.join(self.SHARE_DIR, self.calculate_configs["checkpoint_path"]),
map_location=device
)
)
self.e2e_model.eval()
self.calculate_configs["model"] = self.e2e_model
else:
self.calculate_function = calculate_pose_pca
self._init_publisher()
self._init_subscriber()
self.get_logger().info("Initialize done")
def _init_model(self):
"""init model"""
if self.checkpoint_path.endswith('-seg.pt'):
device_model = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(self.checkpoint_path).to(device_model)
except Exception as e:
self.get_logger().error(f'Failed to load YOLO model: {e}')
raise
elif self.checkpoint_path.endswith('.onnx') or self.checkpoint_path.endswith('.engine'):
self.model = YOLO(self.checkpoint_path)
else:
self.function = None
self.get_logger().fatal(f'Unknown checkpoint: {self.checkpoint_path}')
raise ValueError(f'Unknown checkpoint: {self.checkpoint_path}')
self.get_logger().info(f'Loading checkpoint from: {self.checkpoint_path}')
self.get_logger().info("Initialize model done")
def _init_publisher(self):
"""init publisher"""
if self.output_boxes or self.output_masks:
self.pub_detect_image = self.create_publisher(Image, '/image/detect_image', 10)
if self.get_camera_mode == "Topic":
self.pub_pose_list = self.create_publisher(PoseArrayClassAndID, '/pose/cv_detect_pose',
10)
self.get_logger().info("Initialize publisher done")
def _init_service(self):
"""init service server"""
self.server = self.create_service(
VisionObjectRecognition,
self.service_name,
self._service_callback
)
self.get_logger().info("Initialize service done")
def _init_subscriber(self):
"""init subscriber"""
if self.get_camera_mode == "Service":
self.sub_img = self.create_subscription(
ImgMsg,
"/img_msg",
self._service_sub_callback,
10
)
elif self.get_camera_mode == "Topic":
self.sub_camera_info = self.create_subscription(
CameraInfo,
self.camera_info_topic_name,
self._camera_info_callback,
10
)
self.get_logger().info("Waiting for camera info...")
rclpy.spin_until_future_complete(self, self.future)
self.get_logger().info("Camera info received.")
# sync get color and depth img
self.sub_color_image = Subscriber(self, Image, self.color_image_topic_name)
self.sub_depth_image = Subscriber(self, Image, self.depth_image_topic_name)
self.sync_subscriber = ApproximateTimeSynchronizer(
[self.sub_color_image, self.sub_depth_image],
queue_size=10,
slop=0.1
)
self.sync_subscriber.registerCallback(self._sync_sub_callback)
else:
self.get_logger().warning("get_camera_mode is wrong, use service")
self.sub_img = self.create_subscription(
ImgMsg,
"/img_msg",
self._service_sub_callback,
10
)
self.get_logger().info("Initialize subscriber done")
def _read_calibration_mat(self):
eye_in_left_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["left_hand"])
eye_in_right_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["right_hand"])
eye_to_hand_path = os.path.join(self.SHARE_DIR, self.configs["calibration"]["head"])
self.eye_in_left_hand_mat, info, sign = read_calibration_mat(eye_in_left_hand_path)
self.get_logger().info(f"left_hand_mat: {self.eye_in_left_hand_mat}")
if not sign:
self.get_logger().warning(f"left_mat: {info}")
self.eye_in_right_hand_mat, info, sign = read_calibration_mat(eye_in_right_hand_path)
self.get_logger().info(f"right_hand_mat: {self.eye_in_right_hand_mat}")
if not sign:
self.get_logger().warning(f"right_mat: {info}")
self.eye_to_hand_mat, info, sign = read_calibration_mat(eye_to_hand_path)
self.get_logger().info(f"head_mat: {self.eye_to_hand_mat}")
if not sign:
self.get_logger().warning(f"head_mat: {info}")
self.get_logger().info("Read calibration mat file done")
def _camera_info_callback(self, msg: CameraInfo):
pass
def _service_sub_callback(self, msgs):
pass
def _sync_sub_callback(self, color_img_ros, depth_img_ros):
pass
def _service_callback(self, request, response):
pass
def _seg_image(self, rgb_img: np.ndarray, depth_img: np.ndarray):
pass
def _seg_color(self, rgb_img: np.ndarray, depth_img: np.ndarray):
pass
def _seg_crossboard(self, rgb_img, depth_img):
pass

View File

@@ -1,6 +0,0 @@
from .image_tools import *
from .draw_tools import *
from .calculate_tools import *
from .file_tools import *
from .pointclouds_tools import *
from .grasp_refine import *

View File

@@ -1,411 +0,0 @@
"""计算工具"""
import logging
# import cv2
import numpy as np
import open3d as o3d
import transforms3d as tfs
from .object_icp import *
from .pointclouds_tools import *
E2E_SIGN = True
try:
import torch
import MinkowskiEngine as ME
import collections.abc as container_abcs
from ..net import pred_decode, ModelFreeCollisionDetector
except (ImportError, OSError, RuntimeError):
logging.warning("ImportError or OSError or RuntimeError")
E2E_SIGN = False
__all__ = [
"calculate_pose_pca", "calculate_pose_icp",
"rmat2quat", "quat2rmat", "calculate_pose_e2e"
]
if not E2E_SIGN:
__all__.remove("calculate_pose_e2e")
def pca(data: np.ndarray, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
def calculate_pose_pca(
mask,
depth_img: np.ndarray,
intrinsics,
calculate_grab_width: bool = False,
**kwargs
):
"""点云主成分分析法计算位态
----------
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool
**kwargs:
output:
rmat: np.ndarray (4, 4)
grab_width: list
sign: bool
"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
if not sign:
return point_cloud, [], False
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
#
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
# depth=depth_o3d,
# intrinsic=intrinsics,
# depth_scale=kwargs.get("depth_scale", 1000.0),
# depth_trunc=kwargs.get("depth_trunc", 3.0),
# )
#
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
# if not sign:
# return 1100, [], False
#
# if len(point_cloud.points) == 0:
# # logging.warning("point_cloud is empty")
# return 1101, [], False
center = point_cloud.get_center()
x, y, z = center
if calculate_grab_width:
if np.asarray(point_cloud.points).shape[0] < 4:
# logging.warning("点数不足,不能算 OBB")
return 1200, [0.0, 0.0, 0.0], False
obb = point_cloud.get_oriented_bounding_box()
extent = obb.extent
order = np.argsort(-extent)
grab_width = extent[order]
# z = z + grab_width * 0.20
v = obb.R
v = v[:, order]
if v is None:
return 1201, [0.0, 0.0, 0.0], False
grab_width = grab_width * 1.05
else:
w, v = pca(np.asarray(point_cloud.points))
if w is None or v is None:
return 1201, 0.0, False
grab_width = [0.0, 0.0, 0.0]
vx, vy, vz = v[:,0], v[:,1], v[:,2]
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
R = np.column_stack((vx, vy, vz))
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
# draw(point_cloud_u, rmat)
# draw(point_cloud, rmat)
return rmat, grab_width, True
def calculate_pose_icp(
mask,
depth_img: np.ndarray,
intrinsics,
calculate_grab_width: bool = False,
**kwargs
):
"""点云配准法计算位姿"""
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
point_cloud, sign = create_o3d_denoised_pcd(depth_img_mask, intrinsics, **kwargs)
if not sign:
return point_cloud, [], False
# depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
#
# point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
# depth=depth_o3d,
# intrinsic=intrinsics,
# depth_scale=kwargs.get("depth_scale", 1000.0),
# depth_trunc=kwargs.get("depth_trunc", 3.0)
# )
#
# point_cloud, sign = point_cloud_denoising(point_cloud, kwargs.get("voxel_size", 0.002))
# if not sign:
# return 1100, [], False
#
# if len(point_cloud.points) == 0:
# # logging.warning("clean_pcd is empty")
# return 1101, [0.0, 0.0, 0.0], False
if calculate_grab_width:
pass
rmat = object_icp(
kwargs.get("source"),
point_cloud,
ransac_voxel_size=kwargs.get("ransac_voxel_size", 0.005),
icp_voxel_radius=kwargs.get("icp_voxel_radius", [0.004, 0.002, 0.001]),
icp_max_iter=kwargs.get("icp_max_iter", [50, 30, 14])
)
grab_width = [0.0, 0.0, 0.0]
return rmat, grab_width, True
if E2E_SIGN:
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
[d["feats"] for d in list_data])
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
coordinates_batch, features_batch, return_index=True, return_inverse=True)
res = {
"coors": coordinates_batch,
"feats": features_batch,
"quantize2original": quantize2original,
"point_clouds": torch.stack([torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
}
return res
def calculate_pose_e2e(
mask,
depth_img: np.ndarray,
intrinsics,
calculate_grab_width: bool = False,
**kwargs
):
"""
点云抓取姿态预测模型计算位态
-----
input:
mask: np.ndarray
depth_img: np.ndarray
intrinsics: o3d.camera.PinholeCameraIntrinsic
calculate_grab_width: bool (abandon)
**kwargs:
"depth_scale": float
"depth_trunc": float
"voxel_size": float
"model":
"collision_thresh": float
"""
# logging.error("stage 1")
depth_img_mask = np.zeros_like(depth_img)
depth_img_mask[mask > 0] = depth_img[mask > 0]
# 点云创建
depth_o3d = o3d.geometry.Image(depth_img_mask.astype(np.uint16))
point_cloud = o3d.geometry.PointCloud.create_from_depth_image(
depth=depth_o3d,
intrinsic=intrinsics,
depth_scale=kwargs.get("depth_scale", 1000.0),
depth_trunc=kwargs.get("depth_trunc", 3.0),
)
point_cloud = point_cloud.remove_non_finite_points()
# 点云去噪过程
down_pcd = point_cloud.voxel_down_sample(voxel_size=kwargs.get("voxel_size", 0.002))
clean_pcd, _ = down_pcd.remove_radius_outlier(
nb_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
radius=kwargs.get("voxel_size", 0.002) * 10
)
clean_pcd, _ = clean_pcd.remove_statistical_outlier(
nb_neighbors=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3),
std_ratio=2.0
)
points = np.asarray(clean_pcd.points)
clean_pcd.points = o3d.utility.Vector3dVector(points[points[:, 2] >= 0.2])
labels = np.array(
clean_pcd.cluster_dbscan(
eps=kwargs.get("voxel_size", 0.002) * 10,
min_points=max(int(round(10 * kwargs.get("voxel_size", 0.002) / 0.005)), 3)
)
)
# 点云簇过滤
points = np.asarray(clean_pcd.points)
cluster_label = set(labels)
point_cloud_clusters = []
for label in cluster_label:
if label == -1:
continue
idx = np.where(labels == label)[0]
point_cloud_cluster = clean_pcd.select_by_index(idx)
points_cluster_z = points[idx, 2]
z_avg = np.mean(points_cluster_z)
if z_avg < 0.2:
continue
point_cloud_clusters.append((point_cloud_cluster, z_avg))
point_cloud_clusters.sort(key=lambda x: x[1])
point_cloud = point_cloud_clusters[0][0]
# 判断点云是否为空
if len(point_cloud.points) == 0:
return 1101, [0.0, 0.0, 0.0], False
# # 点云补齐15000个点
# points = np.asarray(point_cloud.points)
# if len(points) >= 15000:
# idxs = np.random.choice(len(points), 15000, replace=False)
# else:
# idxs1 = np.arange(len(points))
# idxs2 = np.random.choice(len(points), 15000 - len(points), replace=True)
# idxs = np.concatenate([idxs1, idxs2], axis=0)
# points = points[idxs]
# 构建推理数据结构
ret_dict = {
'point_clouds': points.astype(np.float32),
'coors': points.astype(np.float32) / kwargs.get("voxel_size", 0.002),
'feats': np.ones_like(points).astype(np.float32),
}
batch_data = minkowski_collate_fn([ret_dict])
# 将数据置于对应的设备上
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for key in batch_data:
batch_data[key] = batch_data[key].to(device)
logging.warning(f'points num: {len(points)}')
# 点云数量判断,是否返回
if batch_data['coors'].shape[0] < 128: # 例如 128 / 256
return 1300, [0.0], False
if batch_data["point_clouds"].shape[1] < 128: # 例如 128 / 256
return 1301, [0.0], False
# 梯度置0进入推理
with torch.no_grad():
end_points = kwargs.get("model")(batch_data)
if end_points is None:
return 1302, [0.0, 0.0, 0.0], False
grasp_preds = pred_decode(end_points)
# 推理结果后处理
preds = grasp_preds[0].detach().cpu().numpy()
sorted_index = np.argsort(-preds[:, 0])
preds = preds[sorted_index]
preds = preds[:10]
if kwargs.get("collision_thresh", 0.01) > 0:
cloud = kwargs.get("orign_point_clouds")
if cloud is not None and len(preds) > 0:
mfcdetector = ModelFreeCollisionDetector(
cloud,
voxel_size=kwargs.get("voxel_size", 0.002)
)
collision_mask = mfcdetector.detect(
preds, approach_dist=0.05,
collision_thresh=kwargs.get("collision_thresh", 0.01)
)
preds = preds[~collision_mask]
# logging.error("stage 8")
Rs = preds[:, 4:13].reshape(-1, 3, 3)
centers = preds[:, 13:16].reshape(-1, 3)
grab_width = preds[:, 1]
if not len(Rs):
return 1303, [0.0, 0.0, 0.0], False
# logging.error("stage 9")
rmat = []
for r, center in zip(Rs, centers):
vz, vx, vy = r[:, 0], r[:, 1], r[:, 2]
if vx[0] < 0:
vx = -vx
if vz[2] < 0:
vz = -vz
if not np.allclose(np.cross(vx, vy), vz):
vy = -vy
R = np.column_stack((vx, vy, vz))
rmat.append(tfs.affines.compose(np.squeeze(np.asarray(center)), R, [1, 1, 1]))
if len(rmat) == 0:
return 1304, [0.0, 0.0, 0.0], False
return rmat[0], [grab_width[0]], True
def rmat2quat(rmat):
"""Convert rotation matrix to quaternion."""
x, y, z = rmat[0:3, 3:4].flatten()
rw, rx, ry, rz = tfs.quaternions.mat2quat(rmat[0:3, 0:3])
quat = [x, y, z, rw, rx, ry, rz]
return quat
def quat2rmat(quat):
"""Convert quaternion to rotation matrix."""
x, y, z, rw, rx, ry, rz = quat
r = tfs.quaternions.quat2mat([rw, rx, ry, rz])
rmat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), r, [1, 1, 1])
return rmat
# def draw(pcd, mat):
# R = mat[0:3, 0:3]
# point = mat[0:3, 3:4].flatten()
# x, y, z = R[:, 0], R[:, 1], R[:, 2]
#
# points = [
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
# point, point + x, point + y, point + z
#
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3],
# [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1],
# [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
#
# o3d.visualization.draw_geometries([pcd, line_set])

View File

@@ -1,26 +0,0 @@
"""Utility functions for file operations."""
import os
import json
import numpy as np
__all__ = [
"read_calibration_mat",
]
def read_calibration_mat(mat_path):
"""Read calibration matrix from a json file."""
sign = True
info = "Success"
if not os.path.exists(mat_path):
info = f"{mat_path} not found, use E(4, 4)"
sign = False
mat = np.eye(4)
else:
with open(mat_path, "r", encoding="utf-8") as f:
mat = np.array(json.load(f)["T"])
if mat.shape != (4, 4):
info = "The shape is wrong, use E(4, 4)"
sign = False
mat = np.eye(4)
return mat, info, sign

View File

@@ -1,140 +0,0 @@
import numpy as np
import open3d as o3d
from typing import Union, List, Tuple
__all__ = [
"object_icp",
]
def _draw_pointcloud(pcd, T):
R = T[0:3, 0:3]
point = T[0:3, 3:4].flatten()
x, y, z = R[:, 0], R[:, 1], R[:, 2]
points = [
[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
point, point + x, point + y, point + z
] # 画点:原点、第一主成分、第二主成分
lines = [
[0, 1], [0, 2], [0, 3],
[4, 5], [4, 6], [4, 7]
] # 画出三点之间两两连线
colors = [
[1, 0, 0], [0, 1, 0], [0, 0, 1],
[1, 0, 0], [0, 1, 0], [0, 0, 1]
]
line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points), lines=o3d.utility.Vector2iVector(lines))
line_set.colors = o3d.utility.Vector3dVector(colors)
pcd.append(line_set)
o3d.visualization.draw_geometries(pcd)
def _preprocess_point_cloud(pcd, voxel_size):
pcd_down = pcd.voxel_down_sample(voxel_size)
radius_normal = voxel_size * 2
pcd_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
radius_feature = voxel_size * 5
pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
pcd_down,
o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
)
return pcd_down, pcd_fpfh
def _prepare_dataset(source, target, voxel_size):
trans_init = np.identity(4)
source.transform(trans_init)
source_down, source_fpfh = _preprocess_point_cloud(source, voxel_size)
target_down, target_fpfh = _preprocess_point_cloud(target, voxel_size)
return source_down, target_down, source_fpfh, target_fpfh
def _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size):
distance_threshold = voxel_size * 1.5
result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
source_down,
target_down,
source_fpfh,
target_fpfh,
True,
distance_threshold,
o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
3,
[
o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
],
o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
return result.transformation
def object_icp(
target: o3d.geometry.PointCloud,
source: Union[o3d.geometry.PointCloud, str],
ransac_voxel_size: float = 0.005,
icp_voxel_radius: Union[List[float], Tuple[float, ...], None] = (0.004, 0.002, 0.001),
icp_max_iter: Union[List[int], Tuple[int, ...], None] = (50, 30, 14),
):
if isinstance(source, str):
source = o3d.io.read_point_cloud(source)
elif isinstance(source, o3d.geometry.PointCloud):
pass
else:
raise TypeError(f"Unsupported Type {type(source)}")
voxel_size = 0.005 # means 5mm for this dataset
source_down, target_down, source_fpfh, target_fpfh = _prepare_dataset(source, target, voxel_size)
T = _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh, ransac_voxel_size)
for scale in range(3):
iter = icp_max_iter[scale]
radius = icp_voxel_radius[scale]
# print([iter, radius, scale])
source_down = source.voxel_down_sample(radius)
target_down = target.voxel_down_sample(radius)
source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
result_icp = o3d.pipelines.registration.registration_icp(
source_down,
target_down,
radius * 5,
T,
o3d.pipelines.registration.TransformationEstimationPointToPoint(),
o3d.pipelines.registration.ICPConvergenceCriteria(
relative_fitness=1e-6,
relative_rmse=1e-6,
max_iteration=iter
)
)
T = result_icp.transformation
_draw_pointcloud([source.transform(T), target], T)
return T
if __name__ == "__main__":
target = o3d.io.read_point_cloud("pointcloud/pointcloud_0.pcd")
source = o3d.io.read_point_cloud("pointcloud/bottle_model.pcd")
Rmat = object_icp(target, source)

View File

@@ -0,0 +1,72 @@
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from interfaces.action import VisionObjectRecognition # 修改为你的包名
class VisionObjectRecognitionClient(Node):
def __init__(self):
super().__init__('vision_object_recognition_client')
self._action_client = ActionClient(
self,
VisionObjectRecognition,
'/vision_object_recognition'
)
# 示例目标参数
self.camera_position = ["left"]
self.classes = ["medical_box"]
# 创建 1 秒定时器
self.timer = self.create_timer(1.0, self.timer_callback)
def timer_callback(self):
if not self._action_client.server_is_ready():
self.get_logger().info("Waiting for action server...")
return
# 发送目标
goal_msg = VisionObjectRecognition.Goal()
goal_msg.camera_position = self.camera_position
goal_msg.classes = self.classes
self.get_logger().info(f"Sending goal: camera_position={self.camera_position}, classes={self.classes}")
future = self._action_client.send_goal_async(
goal_msg,
feedback_callback=self.feedback_callback
)
future.add_done_callback(self.goal_response_callback)
def feedback_callback(self, feedback_msg):
feedback = feedback_msg.feedback
self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
def goal_response_callback(self, future_response):
goal_handle = future_response.result()
if not goal_handle.accepted:
self.get_logger().warn("Goal rejected by server")
return
self.get_logger().info("Goal accepted by server")
result_future = goal_handle.get_result_async()
result_future.add_done_callback(self.result_callback)
def result_callback(self, future_result):
result = future_result.result().result
self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
for obj in result.objects:
self.get_logger().info(
f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
)
def main(args=None):
rclpy.init(args=args)
client = VisionObjectRecognitionClient()
rclpy.spin(client)
client.destroy_node()
if rclpy.ok():
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,76 @@
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from interfaces.action import VisionObjectRecognition
class VisionObjectRecognitionClient(Node):
def __init__(self):
super().__init__('vision_object_recognition_client')
self._action_client = ActionClient(
self,
VisionObjectRecognition,
'/vision_object_recognition'
)
# 示例目标参数
self.camera_position = ["left"]
self.classes = ["medicine_box"]
# self.classes = []
# 创建 1 秒定时器
self.timer = self.create_timer(5.0, self.timer_callback)
def timer_callback(self):
if not self._action_client.server_is_ready():
self.get_logger().info("Waiting for action server...")
return
self.timer.cancel()
# 发送目标
goal_msg = VisionObjectRecognition.Goal()
goal_msg.camera_position = self.camera_position
goal_msg.classes = self.classes
self.get_logger().info(f"Sending goal: camera_position={self.camera_position}, classes={self.classes}")
future = self._action_client.send_goal_async(
goal_msg,
feedback_callback=self.feedback_callback
)
future.add_done_callback(self.goal_response_callback)
def feedback_callback(self, feedback_msg):
feedback = feedback_msg.feedback
self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
def goal_response_callback(self, future_response):
goal_handle = future_response.result()
if not goal_handle.accepted:
self.get_logger().warn("Goal rejected by server")
return
self.get_logger().info("Goal accepted by server")
result_future = goal_handle.get_result_async()
result_future.add_done_callback(self.result_callback)
def result_callback(self, future_result):
result = future_result.result().result
self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
for obj in result.objects:
self.get_logger().info(
f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
)
def main(args=None):
rclpy.init(args=args)
client = VisionObjectRecognitionClient()
rclpy.spin(client)
client.destroy_node()
if rclpy.ok():
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,82 @@
import rclpy
from rclpy.node import Node
from rclpy.action import ActionClient
from interfaces.action import VisionObjectRecognition
class VisionObjectRecognitionClient(Node):
def __init__(self):
super().__init__('vision_object_recognition_client')
self._action_client = ActionClient(
self,
VisionObjectRecognition,
'/vision_object_recognition'
)
# 示例目标参数
self.camera_position = ["left"]
self.classes = ["medicine_box"]
# self.classes = []
# 创建 1 秒定时器
self.timer = self.create_timer(5.0, self.timer_callback)
def timer_callback(self):
if not self._action_client.server_is_ready():
self.get_logger().info("Waiting for action server...")
return
self.timer.cancel()
# 发送目标
for i in range(2):
goal_msg = VisionObjectRecognition.Goal()
goal_msg.camera_position = self.camera_position
goal_msg.classes = self.classes
self.get_logger().info(
f"Sending goal {i}: "
f"camera_position={self.camera_position}, "
f"classes={self.classes}"
)
future = self._action_client.send_goal_async(
goal_msg,
feedback_callback=self.feedback_callback
)
future.add_done_callback(self.goal_response_callback)
def feedback_callback(self, feedback_msg):
feedback = feedback_msg.feedback
self.get_logger().info(f"Feedback: status={feedback.status}, info={feedback.info}")
def goal_response_callback(self, future_response):
goal_handle = future_response.result()
if not goal_handle.accepted:
self.get_logger().warn("Goal rejected by server")
return
self.get_logger().info("Goal accepted by server")
result_future = goal_handle.get_result_async()
result_future.add_done_callback(self.result_callback)
def result_callback(self, future_result):
result = future_result.result().result
self.get_logger().info(f"Result received: success={result.success}, info={result.info}")
for obj in result.objects:
self.get_logger().info(
f"Object: class={obj.class_name}, id={obj.class_id}, pose={obj.pose}, grab_width={obj.grab_width}"
)
def main(args=None):
rclpy.init(args=args)
client = VisionObjectRecognitionClient()
rclpy.spin(client)
client.destroy_node()
if rclpy.ok():
rclpy.shutdown()
if __name__ == '__main__':
main()

View File

@@ -1,14 +1,18 @@
import rclpy
from rclpy.executors import MultiThreadedExecutor
from vision_detect.vision_core import NodeManager
from vision_detect.VisionDetect.node import DetectNode
def main(args=None):
rclpy.init(args=args)
node = DetectNode('detect')
node = NodeManager('detect')
try:
rclpy.spin(node)
executor = MultiThreadedExecutor()
rclpy.spin(node, executor=executor)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if rclpy.ok():
rclpy.shutdown()

View File

@@ -116,9 +116,9 @@ class DetectNode(Node):
self.declare_parameter('output_masks', False)
self.output_masks = self.get_parameter('output_masks').value
self.declare_parameter('config_name', 'default_config.json')
self.declare_parameter('config_name', 'default_service_config.json')
self.config_name = self.get_parameter('config_name').value
self.config_dir = os.path.join(share_dir, 'configs/flexiv_configs', self.config_name)
self.config_dir = os.path.join(share_dir, 'configs/flexiv', self.config_name)
self.declare_parameter('set_confidence', 0.5)
self.set_confidence = self.get_parameter('set_confidence').value

View File

@@ -0,0 +1,36 @@
"""
source_test 节点的创建与启动入口。
节点类定义位于 vision_core.node_test.source_test。
"""
import sys
import rclpy
from rclpy.executors import MultiThreadedExecutor
from vision_core import SourceTestNode
def main(args=None):
rclpy.init(args=args)
# 支持通过 --config 指定配置文件
config_path = None
for i, arg in enumerate(sys.argv):
if arg == "--config" and i + 1 < len(sys.argv):
config_path = sys.argv[i + 1]
break
node = SourceTestNode(config_path=config_path)
try:
executor = MultiThreadedExecutor()
rclpy.spin(node, executor=executor)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
if rclpy.ok():
rclpy.shutdown()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,4 @@
from .node import NodeManager
from .node_test import *
__all__ = ["NodeManager", "SourceTestNode"]

View File

@@ -0,0 +1,2 @@
from .runtime import *
from .enum import *

View File

@@ -0,0 +1,4 @@
from . import detectors
from . import estimators
from . import image_providers
from . import refiners

View File

@@ -0,0 +1,5 @@
from .object_detector import *
from .color_detector import *
from .crossboard_detector import *
# from .detector_baseline import *

View File

@@ -0,0 +1,40 @@
import cv2
import numpy as np
from ...enum import Status
from ...data_struct import ImageData, DetectData
from .detector_baseline import DetectorBaseline
__all__ = ["ColorDetector"]
class ColorDetector(DetectorBaseline):
def __init__(self, config, _logger):
super().__init__()
self.distance = config["distance"]
self.color_range = config["color_range"]
def _detect(self, classes_name, image_data: ImageData) -> tuple[DetectData | None, int]:
if image_data.status != Status.SUCCESS:
return None, image_data.status
color_image = image_data.color_image
depth_image = image_data.depth_image
hsv_img = cv2.cvtColor(color_image, cv2.COLOR_BGR2HSV)
depth_filter_mask = np.zeros_like(depth_image, dtype=np.uint8)
depth_filter_mask[(depth_image > 0) & (depth_image < self.distance)] = 1
hsv_img[depth_filter_mask == 0] = 0
mask_part_list = [cv2.inRange(hsv_img, color[0], color[1]) for color in self.color_range]
mask = sum(mask_part_list[1:], mask_part_list[0])
mask = mask // 255
if mask is None or not np.any(mask):
return None, Status.NO_COLOR
color_image[mask > 0] = color_image[mask > 0] * 0.5 + np.array([255, 0, 0]) * 0.5
return DetectData.create_mask_only_data(masks=[mask]), Status.SUCCESS

View File

@@ -0,0 +1,46 @@
import numpy as np
import cv2
from ...enum import Status
from ...data_struct import ImageData, DetectData
from .detector_baseline import DetectorBaseline
__all__ = ["CrossboardDetector"]
class CrossboardDetector(DetectorBaseline):
def __init__(self, config, _logger):
super().__init__()
self.pattern_size = config["pattern_size"] # (columns, rows)
def _detect(self, classes_name, image_data: ImageData) -> tuple[DetectData | None, int]:
color_image = image_data.color_image
depth_image = image_data.depth_image
rgb_img_gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(
rgb_img_gray, self.pattern_size, cv2.CALIB_CB_FAST_CHECK
)
if not ret:
return None, Status.NO_CROSSBOARD
# 角点亚像素精确化(提高标定精度)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
corners_subpix = cv2.cornerSubPix(rgb_img_gray, corners, (11, 11), (-1, -1), criteria)
corners_subpix = corners_subpix.reshape(self.pattern_size[1], self.pattern_size[0], 2)
mask = np.zeros(rgb_img_gray.shape, dtype=np.uint8)
for i in range(0, self.pattern_size[1] - 1):
for j in range(0, self.pattern_size[0] - 1):
pts = np.array([
corners_subpix[i, j],
corners_subpix[i, j + 1],
corners_subpix[i + 1, j + 1],
corners_subpix[i + 1, j]
], dtype=np.int32)
cv2.fillConvexPoly(mask, pts, 1)
color_image[mask > 0] = color_image[mask > 0] * 0.5 + np.array([0, 0, 255]) * 0.5
corners = corners.reshape(-1, 2)
return DetectData.create_mask_only_data(masks=[mask]), Status.SUCCESS

View File

@@ -0,0 +1,14 @@
from ...data_struct import DetectData, ImageDataContainer
__all__ = ["DetectorBaseline"]
class DetectorBaseline:
def __init__(self):
pass
def _detect(self, classes_name, image_data) -> tuple[DetectData | None, int]:
pass
def get_masks(self, position, classes_name, image_data: ImageDataContainer) -> tuple[DetectData | None, int]:
return self._detect(classes_name, image_data[position])

View File

@@ -0,0 +1,65 @@
import os
import json
import torch
from ultralytics import YOLO
from ament_index_python.packages import get_package_share_directory
from ...enum import Status
from .detector_baseline import DetectorBaseline
from ...data_struct import ImageData, DetectData
__all__ = ["ObjectDetector"]
SHARE_DIR = get_package_share_directory('vision_detect')
class ObjectDetector(DetectorBaseline):
def __init__(self, config, _logger):
super().__init__()
self.logger = _logger
self.confidence = config["confidence"]
with open(os.path.join(SHARE_DIR, config["label_map_path"]), "r") as f:
self.labels_map = json.load(f)
"""init model"""
checkpoint_path = str(os.path.join(SHARE_DIR, config["checkpoint_path"]))
self._device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
try:
self.model = YOLO(checkpoint_path)
except Exception as e:
raise ValueError(f'Failed to load YOLO model: {e}')
def _detect(
self,
classes_name: list[str],
image_data: ImageData
) -> tuple[DetectData | None, int]:
if image_data.status != Status.SUCCESS:
return None, image_data.status
classes = []
for _class in classes_name:
if _class in self.labels_map:
classes.append(self.labels_map[_class])
if not classes:
self.logger.warning("No legal classes name")
classes = None
color_image = image_data.color_image
with torch.no_grad():
results = self.model.predict(
color_image,
device=self._device,
retina_masks=True,
conf=self.confidence,
classes=classes,
)
if results[0].masks is None or len(results[0].masks) == 0:
return None, Status.NO_DETECT
return DetectData.create_data(results=results), Status.SUCCESS

View File

@@ -0,0 +1,5 @@
from .pca_estimator import *
from .icp_estimator import *
from .gsnet_estimator import *
# from .estimator_baseline import *

View File

@@ -0,0 +1,28 @@
from ...data_struct import ImageData, ImageDataContainer, DetectData, PoseData
__all__ = ["EstimatorBaseline"]
class EstimatorBaseline:
def __init__(self, config):
self._config = config
def _estimate(
self,
image_data: ImageData,
detect_data: DetectData,
get_grab_width: bool = True
) -> tuple[PoseData | None, int]:
pass
def get_poses(
self,
position: str,
image_data: ImageDataContainer,
detect_data: DetectData,
get_grab_width: bool = True
) -> tuple[PoseData | None, int]:
return self._estimate(
image_data=image_data[position],
detect_data=detect_data,
get_grab_width=get_grab_width
)

View File

@@ -0,0 +1,158 @@
import torch
import numpy as np
import transforms3d as tfs
import MinkowskiEngine as ME
from ...enum import Status
from ...utils import pointcloud, image
from ...data_struct import ImageData, DetectData, PoseData
from .estimator_baseline import EstimatorBaseline
from ..model.gsnet import pred_decode, ModelFreeCollisionDetector
__all__ = ["GSNetEstimator"]
def minkowski_collate_fn(list_data):
coordinates_batch, features_batch = ME.utils.sparse_collate([d["coors"] for d in list_data],
[d["feats"] for d in list_data])
coordinates_batch = np.ascontiguousarray(coordinates_batch, dtype=np.int32)
coordinates_batch, features_batch, _, quantize2original = ME.utils.sparse_quantize(
coordinates_batch, features_batch, return_index=True, return_inverse=True)
res = {
"coors": coordinates_batch,
"feats": features_batch,
"quantize2original": quantize2original,
"point_clouds": torch.stack(
[torch.from_numpy(b) for b in [d["point_clouds"] for d in list_data]], 0)
}
return res
class GSNetEstimator(EstimatorBaseline):
def __init__(self, config):
super().__init__(config)
self.collision_sign = True if self._config.get("collision_thresh", 0.00) > 0 else False
def _estimate(
self,
image_data: ImageData,
detect_data: DetectData,
get_grab_width: bool = True
) -> tuple[PoseData | None, int]:
depth_image = image_data.depth_image
karr = image_data.karr
image_size = depth_image.shape[:2][::-1]
masks = detect_data.masks
# check boxes data
boxes = detect_data.boxes
box_sign = False if boxes is None else True
if self.collision_sign:
full_pcd = pointcloud.create_o3d_pcd(
image_data.depth_image, image_size, image_data.karr, **self._config
)
full_points = np.asarray(full_pcd.points)
n = 0
pose_data = PoseData()
for i, mask in enumerate(masks):
karr_mask = karr.copy()
if box_sign:
crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
else:
crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
depth_img_mask = np.zeros_like(crops[0])
depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
karr_mask[2] -= p[0]
karr_mask[5] -= p[1]
pcd, CODE = pointcloud.create_o3d_denoised_pcd(
depth_img_mask, image_size, karr_mask, **self._config
)
if CODE != 0:
pose_data.add_data(CODE)
continue
# 构建推理数据结构
points = np.asarray(pcd.points)
ret_dict = {
'point_clouds': points.astype(np.float32),
'coors': points.astype(np.float32) / self._config.get("voxel_size", 0.002),
'feats': np.ones_like(points).astype(np.float32),
}
batch_data = minkowski_collate_fn([ret_dict])
# 将数据置于对应的设备上
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for key in batch_data:
batch_data[key] = batch_data[key].to(device)
# 点云数量判断,是否返回
if batch_data['coors'].shape[0] < 128: # 例如 128 / 256
pose_data.add_data(Status.COORS_TOO_FEW)
continue
if batch_data["point_clouds"].shape[1] < 128: # 例如 128 / 256
pose_data.add_data(Status.POINT_CLOUDS_TOO_FEW)
continue
# 梯度置0进入推理
with torch.no_grad():
end_points = self._config.get("model")(batch_data)
if end_points is None:
pose_data.add_data(Status.ZERO_TRUE_NUM)
continue
grasp_preds = pred_decode(end_points)
# 推理结果后处理
preds = grasp_preds[0].detach().cpu().numpy()
sorted_index = np.argsort(-preds[:, 0])
preds = preds[sorted_index]
preds = preds[:10]
if self.collision_sign:
mfcdetector = ModelFreeCollisionDetector(
full_points,
voxel_size=self._config.get("voxel_size", 0.002)
)
collision_mask = mfcdetector.detect(
preds, approach_dist=0.05,
collision_thresh=self._config.get("collision_thresh", 0.01)
)
preds = preds[~collision_mask]
Rs = preds[:, 4:13].reshape(-1, 3, 3)
centers = preds[:, 13:16].reshape(-1, 3)
grab_width = preds[:, 1]
if not len(Rs):
pose_data.add_data(Status.E2E_NO_PREDICTION)
continue
pose_rmat = []
for r, center in zip(Rs, centers):
vz, vx, vy = r[:, 0], r[:, 1], r[:, 2]
if vx[0] < 0:
vx = -vx
if vz[2] < 0:
vz = -vz
if not np.allclose(np.cross(vx, vy), vz):
vy = -vy
R = np.column_stack((vx, vy, vz))
pose_rmat.append(tfs.affines.compose(np.squeeze(np.asarray(center)), R, [1, 1, 1]))
if len(pose_rmat) == 0:
pose_data.add_data(Status.E2E_NO_VALID_MATRIX)
continue
pose_data.add_data(Status.SUCCESS, pose_rmat[0], [grab_width[0]])
n += 1
if n == 0:
return None, Status.CANNOT_ESTIMATE
return pose_data, Status.SUCCESS

View File

@@ -0,0 +1,215 @@
from pathlib import Path
import numpy as np
import open3d as o3d
from ament_index_python.packages import get_package_share_directory
from ...enum import Status
from ...utils import pointcloud, image
from .estimator_baseline import EstimatorBaseline
from ...data_struct import ImageData, DetectData, PoseData
__all__ = ["ICPEstimator"]
SHARE_DIR = Path(get_package_share_directory('vision_detect'))
# def _draw_pointcloud(pcd, T):
# R = T[0:3, 0:3]
# point = T[0:3, 3:4].flatten()
# x, y, z = R[:, 0], R[:, 1], R[:, 2]
#
# points = [
# [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
# point, point + x, point + y, point + z
#
# ] # 画点:原点、第一主成分、第二主成分
# lines = [
# [0, 1], [0, 2], [0, 3],
# [4, 5], [4, 6], [4, 7]
# ] # 画出三点之间两两连线
# colors = [
# [1, 0, 0], [0, 1, 0], [0, 0, 1],
# [1, 0, 0], [0, 1, 0], [0, 0, 1]
# ]
#
# line_set = o3d.geometry.LineSet(points=o3d.utility.Vector3dVector(points),
# lines=o3d.utility.Vector2iVector(lines))
# line_set.colors = o3d.utility.Vector3dVector(colors)
#
# pcd.append(line_set)
# o3d.visualization.draw_geometries(pcd)
def _preprocess_point_cloud(
pcd: o3d.geometry.PointCloud,
voxel_size: float
):
pcd_down = pcd.voxel_down_sample(voxel_size)
radius_normal = voxel_size * 2
pcd_down.estimate_normals(
o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30)
)
radius_feature = voxel_size * 5
pcd_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
pcd_down,
o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100)
)
return pcd_down, pcd_fpfh
def _prepare_dataset(source, target, voxel_size):
trans_init = np.identity(4)
source.transform(trans_init)
source_down, source_fpfh = _preprocess_point_cloud(source, voxel_size)
target_down, target_fpfh = _preprocess_point_cloud(target, voxel_size)
return source_down, target_down, source_fpfh, target_fpfh
def _execute_global_registration(
source_down,
target_down,
source_fpfh,
target_fpfh,
voxel_size
):
distance_threshold = voxel_size * 1.5
result = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
source_down,
target_down,
source_fpfh,
target_fpfh,
True,
distance_threshold,
o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
3,
[
o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(distance_threshold)
],
o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999))
return result.transformation
def _object_icp(
target: o3d.geometry.PointCloud,
source: o3d.geometry.PointCloud,
ransac_voxel_size: float = 0.005,
icp_voxel_radius: list[float] | tuple[float, ...] | None = (0.004, 0.002, 0.001),
icp_max_iter: list[int] | tuple[int, ...] | None = (50, 30, 14),
):
# # check is PointCloud
# if isinstance(source, str):
# source = o3d.io.read_point_cloud(source)
# elif isinstance(source, o3d.geometry.PointCloud):
# pass
# else:
# raise TypeError(f"Unsupported Type {type(source)}")
voxel_size = 0.005 # means 5mm for this dataset
source_down, target_down, source_fpfh, target_fpfh = _prepare_dataset(source, target,
voxel_size)
T = _execute_global_registration(source_down, target_down, source_fpfh, target_fpfh,
ransac_voxel_size)
for scale in range(3):
_iter = icp_max_iter[scale]
radius = icp_voxel_radius[scale]
# print([_iter, radius, scale])
source_down = source.voxel_down_sample(radius)
target_down = target.voxel_down_sample(radius)
source_down.estimate_normals(
o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
target_down.estimate_normals(
o3d.geometry.KDTreeSearchParamHybrid(radius=radius * 2, max_nn=30))
result_icp = o3d.pipelines.registration.registration_icp(
source_down,
target_down,
radius * 5,
T,
o3d.pipelines.registration.TransformationEstimationPointToPoint(),
o3d.pipelines.registration.ICPConvergenceCriteria(
relative_fitness=1e-6,
relative_rmse=1e-6,
max_iteration=_iter
)
)
T = result_icp.transformation
# # draw pcd and source
# _draw_pointcloud([source.transform(T), target], T)
return T
class ICPEstimator(EstimatorBaseline):
def __init__(self, config):
super().__init__(config)
self._pcd_source_full = o3d.io.read_point_cloud(SHARE_DIR / config["complete_model_path"])
def _estimate(
self,
image_data: ImageData,
detect_data: DetectData,
get_grab_width: bool = True
) -> tuple[PoseData | None, int]:
depth_image = image_data.depth_image
karr = image_data.karr
image_size = depth_image.shape[:2][::-1]
masks = detect_data.masks
boxes = detect_data.boxes
box_sign = False if boxes is None else True
n = 0
pose_data = PoseData()
for i, mask in enumerate(masks):
karr_mask = karr.copy()
if box_sign:
crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
else:
crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
depth_img_mask = np.zeros_like(crops[0])
depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
karr_mask[2] -= p[0]
karr_mask[5] -= p[1]
pcd, CODE = pointcloud.create_o3d_denoised_pcd(
depth_img_mask, image_size, karr_mask, **self._config
)
if CODE != 0:
pose_data.add_data(CODE)
continue
pose_rmat = _object_icp(
self._pcd_source_full.get("source"),
pcd,
ransac_voxel_size=self._config.get("ransac_voxel_size", 0.005),
icp_voxel_radius=self._config.get("icp_voxel_radius", [0.004, 0.002, 0.001]),
icp_max_iter=self._config.get("icp_max_iter", [50, 30, 14])
)
grab_width = [0.0, 0.0, 0.0]
pose_data.add_data(Status.SUCCESS, pose_rmat, tuple(grab_width))
n += 1
if n == 0:
return pose_data, Status.CANNOT_ESTIMATE
return pose_data, Status.SUCCESS

View File

@@ -0,0 +1,117 @@
import numpy as np
import transforms3d as tfs
from ...enum import Status
from ...utils import pointcloud, image
from .estimator_baseline import EstimatorBaseline
from ...data_struct import ImageData, DetectData, PoseData
__all__ = ["PCAEstimator"]
def pca(data: np.ndarray, sort=True):
"""主成分分析 """
center = np.mean(data, axis=0)
centered_points = data - center # 去中心化
try:
cov_matrix = np.cov(centered_points.T) # 转置
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
except np.linalg.LinAlgError:
return None, None
if sort:
sort = eigenvalues.argsort()[::-1] # 降序排列
eigenvalues = eigenvalues[sort] # 索引
eigenvectors = eigenvectors[:, sort]
return eigenvalues, eigenvectors
class PCAEstimator(EstimatorBaseline):
def __init__(self, config):
super().__init__(config)
def _estimate(
self,
image_data: ImageData,
detect_data: DetectData,
get_grab_width: bool = True,
) -> tuple[PoseData | None, int]:
depth_image = image_data.depth_image
karr = image_data.karr
image_size = depth_image.shape[:2][::-1]
masks = detect_data.masks
# check boxes data
boxes = detect_data.boxes
box_sign = False if boxes is None else True
n = 0
pose_data = PoseData()
for i, mask in enumerate(masks):
karr_mask = karr.copy()
if box_sign:
crops, p = image.crop_imgs_xywh([depth_image, mask], boxes[i], same_sign=True)
else:
crops, p = image.crop_imgs_mask([depth_image], mask, same_sign=True)
depth_img_mask = np.zeros_like(crops[0])
depth_img_mask[crops[1] > 0] = crops[0][crops[1] > 0]
karr_mask[2] -= p[0]
karr_mask[5] -= p[1]
pcd, CODE = pointcloud.create_o3d_denoised_pcd(
depth_img_mask, image_size, karr_mask, **self._config
)
if CODE != 0:
pose_data.add_data(CODE)
continue
x, y, z = pcd.get_center()
if get_grab_width:
if np.asarray(pcd.points).shape[0] < 4:
pose_data.add_data(Status.TOO_FEW_POINTS_OBB)
continue
obb = pcd.get_oriented_bounding_box()
extent = obb.extent
order = np.argsort(-extent)
grab_width = extent[order]
v = obb.R
v = v[:, order]
if v is None:
pose_data.add_data(Status.PCA_NO_VECTOR)
continue
grab_width = grab_width * 1.05
else:
w, v = pca(np.asarray(pcd.points))
if w is None or v is None:
pose_data.add_data(Status.PCA_NO_VECTOR)
continue
grab_width = (0.0, 0.0, 0.0)
vx, vy, vz = v[:, 0], v[:, 1], v[:, 2]
if vx[0] < 0:
vx = -vx
if vy[1] < 0:
vy = -vy
if not np.allclose(np.cross(vx, vy), vz):
vz = -vz
R = np.column_stack((vx, vy, vz))
pose_mat = tfs.affines.compose(np.squeeze(np.asarray((x, y, z))), R, [1, 1, 1])
pose_data.add_data(Status.SUCCESS, pose_mat, tuple(grab_width))
n += 1
if n == 0:
return None, Status.CANNOT_ESTIMATE
return pose_data, Status.SUCCESS

View File

@@ -0,0 +1,5 @@
from .driver_source import *
from .single_topic_source import *
from .muli_topic_source import *
# from .source_baseline import *

View File

@@ -0,0 +1,28 @@
from rclpy.node import Node
from interfaces.msg import ImgMsg
from .source_baseline import SourceBaseline
__all__ = ["DriverSource"]
class DriverSource(SourceBaseline):
def __init__(self, config, node: Node):
super().__init__(config)
self._sub = node.create_subscription(
ImgMsg,
config["subscription_name"],
self._subscription_callback,
10
)
def _subscription_callback(self, msg):
with self._lock:
self._images_buffer.save_data(
position=msg.position,
color=msg.image_color,
depth=msg.image_depth,
karr=msg.karr,
darr=msg.darr
)

View File

@@ -0,0 +1,80 @@
import threading
from rclpy.node import Node
from message_filters import Subscriber, ApproximateTimeSynchronizer
from sensor_msgs.msg import Image, CameraInfo
from .source_baseline import SourceBaseline
__all__ = ["MuliTopicSource"]
class TopicSubscriber:
def __init__(
self,
position,
topic_configs,
node: Node,
lock: threading.Lock,
image_buffer
):
self.lock = lock
self.position = position
self.image_buffer = image_buffer
self.event = threading.Event()
self.camera_info = []
self.sub_camera_info = node.create_subscription(
CameraInfo,
topic_configs[2],
self._camera_info_callback,
10
)
node.get_logger().info("Waiting for camera info...")
self.event.wait()
node.destroy_subscription(self.sub_camera_info)
node.get_logger().info("Camera info received.")
self.sub_color = Subscriber(node, Image, topic_configs[0])
self.sub_depth = Subscriber(node, Image, topic_configs[1])
self.sync_subscriber = ApproximateTimeSynchronizer(
[self.sub_color, self.sub_depth],
queue_size=10,
slop=0.1
)
self.sync_subscriber.registerCallback(self._sync_sub_callback)
def _sync_sub_callback(self, color, depth):
with self.lock:
self.image_buffer.save_data(
position=self.position,
color=color,
depth=depth,
karr=self.camera_info[0],
darr=self.camera_info[1]
)
def _camera_info_callback(self, msg):
if len(self.camera_info) == 2:
return
if msg.k is not None and len(msg.k) > 0 and msg.k is not None and len(msg.k) > 0:
self.camera_info = [msg.k, msg.d]
self.event.set()
class MuliTopicSource(SourceBaseline):
def __init__(self, config, node: Node):
super().__init__(config)
self.subscribers = [
TopicSubscriber(
key,
value,
node,
self._lock,
self._images_buffer
) for key, value in config
]

View File

@@ -0,0 +1,56 @@
import threading
from rclpy.node import Node
from message_filters import Subscriber, ApproximateTimeSynchronizer
from sensor_msgs.msg import Image, CameraInfo
from .source_baseline import SourceBaseline
__all__ = ["SingleTopicSource"]
class SingleTopicSource(SourceBaseline):
def __init__(self, config, node: Node):
super().__init__(config)
self.position = config["position"]
self.event = threading.Event()
self.camera_info = []
self.sub_camera_info = node.create_subscription(
CameraInfo,
config["camera_info_topic_name"],
self._camera_info_callback,
10
)
node.get_logger().info("Waiting for camera info...")
self.event.wait()
node.destroy_subscription(self.sub_camera_info)
node.get_logger().info("Camera info received.")
self.sub_color_image = Subscriber(node, Image, config["color_image_topic_name"])
self.sub_depth_image = Subscriber(node, Image, config["depth_image_topic_name"])
self.sync_subscriber = ApproximateTimeSynchronizer(
[self.sub_color_image, self.sub_depth_image],
queue_size=10,
slop=0.1
)
self.sync_subscriber.registerCallback(self._sync_sub_callback)
def _camera_info_callback(self, msg):
if len(self.camera_info) == 2:
return
if msg.k is not None and len(msg.k) > 0 and msg.k is not None and len(msg.k) > 0:
self.camera_info = [msg.k, msg.d]
self.event.set()
def _sync_sub_callback(self, color, depth):
with self._lock:
self._images_buffer.save_data(
position=self.position,
color=color,
depth=depth,
karr=self.camera_info[0],
darr=self.camera_info[1]
)

View File

@@ -0,0 +1,155 @@
import time
import threading
from dataclasses import dataclass, field
import cv2
from numpy.typing import NDArray
from cv_bridge import CvBridge
from ...enum import Status
from ...data_struct import ImageDataContainer
from ...utils import image
__all__ = ['SourceBaseline']
@dataclass(slots=True)
class _BufferData:
image_color: NDArray | None
image_depth: NDArray | None
karr: NDArray | list[float] | None
darr: NDArray | list[float] | None
def is_empty(self):
return (self.image_color is None or self.image_depth is None
or self.karr is None or self.darr is None)
@dataclass(slots=True)
class _ImageBuffer:
data_dict: dict[str, _BufferData] = field(default_factory=dict)
def __setitem__(self, key, value):
raise AttributeError
def __getitem__(self, key):
return self.data_dict[key]
def __contains__(self, key):
return key in self.data_dict
def __len__(self):
return len(self.data_dict)
def save_data(
self,
position: str,
*,
color: NDArray | None,
depth: NDArray | None,
karr: list[float] | None,
darr: list[float] | None,
):
self.data_dict[position] = _BufferData(color, depth, karr, darr)
class SourceBaseline:
def __init__(self, config):
self._images_buffer = _ImageBuffer()
self.cv_bridge = CvBridge()
self._lock = threading.Lock()
self._clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
self.distortion_switch = config.get("distortion", False)
self.denoising_switch = config.get("denoising", False)
self.enhancement_switch = config.get("enhancement", False)
self.quality_switch = config.get("quality", False)
self.quality_threshold = config.get("quality_threshold", 100.0)
def get_images(self, positions: tuple[str, ...]) -> tuple[ImageDataContainer | None, int]:
time_start = time.time()
with self._lock:
image_data = ImageDataContainer()
if len(self._images_buffer) == 0:
return None, Status.NO_CAMERA_DATA
buffer_data_list = []
for position in positions:
if not (position in self._images_buffer):
# image_data.add_data(position, Status.NO_POSITION_DATA)
buffer_data_list.append(Status.NO_POSITION_DATA)
continue
if self._images_buffer[position].is_empty():
# image_data.add_data(position, Status.NO_POSITION_DATA)
buffer_data_list.append(Status.NO_POSITION_DATA)
continue
buffer_data_list.append(self._images_buffer[position])
time_1 = time.time()
valid_positions = 0
for data in buffer_data_list:
if data == Status.NO_POSITION_DATA:
image_data.add_data(position, Status.NO_POSITION_DATA)
continue
color_img_cv = self.cv_bridge.imgmsg_to_cv2(data.image_color, "bgr8")
depth_img_cv = self.cv_bridge.imgmsg_to_cv2(data.image_depth, '16UC1')
if self.quality_switch:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
laplacian_source = cv2.Laplacian(gray, cv2.CV_64F).var()
if laplacian_source < self.quality_threshold:
image_data.add_data(position, Status.IMAGE_QUALITY_LOW)
continue
# 畸变矫正
if self.distortion_switch:
camera_size = color_img_cv.shape[:2][::-1]
color_img_cv, depth_img_cv, k = image.distortion_correction(
color_img_cv,
depth_img_cv,
data.karr,
data.darr,
camera_size
)
else:
k = data.karr
# 彩色图像双边滤波
if self.denoising_switch:
color_img_cv = cv2.bilateralFilter(color_img_cv, 9, 75, 75)
# 彩色图像增强
if self.enhancement_switch:
lab = cv2.cvtColor(color_img_cv, cv2.COLOR_BGR2LAB)
ch = cv2.split(lab)
avg_luminance = cv2.mean(ch[0])[0]
ch[0] = self._clahe.apply(ch[0])
current_avg_luminance = cv2.mean(ch[0])[0]
alpha = avg_luminance / current_avg_luminance
ch[0] = cv2.convertScaleAbs(ch[0], alpha=alpha, beta=0)
lab = cv2.merge(ch)
color_img_cv = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
image_data.add_data(
position=position,
status=Status.SUCCESS,
color_image=color_img_cv,
depth_image=depth_img_cv,
karr=list(k),
darr=tuple(self._images_buffer[position].darr)
)
valid_positions += 1
time_end = time.time()
print(f"get_data: {(time_1 - time_start) * 1000} ms")
print(f"img_cv_process: {(time_end - time_1) * 1000} ms")
if valid_positions == 0:
return None, Status.NO_ALL_POSITION_DATA
else:
return image_data, Status.SUCCESS

View File

@@ -0,0 +1,3 @@
from ...model import gsnet
__all__ = ['gsnet']

View File

@@ -0,0 +1,2 @@
from .fixed_orientation_refiner import *
from .no_refiner import *

View File

@@ -1,10 +1,14 @@
import numpy as np
import transforms3d as tfs
# import open3d as o3d
__all__ = ["refine_grasp_pose"]
from ...enum import Status
from ...utils import pointcloud, transforms
from ...data_struct import ImageData, PoseData
from .refiner_baseline import RefinerBaseline
__all__ = ['FixedOrientationRefiner']
def collision_detector(
points: np.ndarray,
refine: np.ndarray,
@@ -60,6 +64,8 @@ def collision_detector(
return 2
if hand_top_box.any():
return 3
else:
iou *= 0.5
left_finger_box = (
(points[2] < (z + left_size[2])) & (points[2] > z -0.05)
@@ -161,9 +167,9 @@ def refine_grasp_pose(
rmat = tfs.affines.compose(
np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
)
return rmat, True
return rmat, Status.SUCCESS
if collision_code == 6:
return 1202, False
return None, Status.REFINE_FAIL
if collision_code == 1:
refine[2] -= 0.008
@@ -193,10 +199,16 @@ def refine_grasp_pose(
print("x + 0.002")
continue
step = 0.004
if collision_code == 4:
y_p = True
y_n = False
refine[1] += 0.004
if not y_p and y_n:
refine[1] -= step
step /= 2
if step <= 0.001:
return None, Status.REFINE_FAIL
refine[1] += step
left_num += 1
print("y + 0.004")
continue
@@ -204,17 +216,109 @@ def refine_grasp_pose(
if collision_code == 5:
y_p = False
y_n = True
refine[1] -= 0.004
if y_p and not y_n:
refine[1] += step
step /= 2
if step <= 0.001:
return None, Status.REFINE_FAIL
refine[1] -= step
right_num += 1
print("y - 0.004")
continue
else:
return 1202, False
return None, Status.REFINE_FAIL
# max_moves = 20
# step = 0.004
# x_moves, y_moves, z_moves = 0, 0, 0
# last_collision_direction = None # 记录最后一次碰撞发生的方向
# first_up_collision = False # 标记是否第一次发生上端碰撞
# first_down_collision = False # 标记是否第一次发生下端碰撞
# last_left_position = None # 记录左指碰撞前的位置
# last_right_position = None # 记录右指碰撞前的位置
#
# while x_moves < max_moves and y_moves < max_moves and z_moves < max_moves:
# # 每次进入循环时先进行碰撞检测
# collision = collision_detector(points, refine, volume=[left_volume, right_volume], **kwargs)
#
# # 如果没有碰撞,且上次碰撞是在左右方向,继续按方向移动
# if collision == 0:
# if last_collision_direction == "left":
# refine[0] += step # 向左指碰撞的方向移动
# elif last_collision_direction == "right":
# refine[0] -= step # 向右指碰撞的方向移动
# if search_mode:
# break
# position = target_position + (expect_orientation @ refine.T).T
# rmat = tfs.affines.compose(
# np.squeeze(np.asarray(position)), expect_orientation, [1, 1, 1]
# )
# return rmat, Status.SUCCESS
#
# if collision == 1: # 掌心或手掌上下两端同时碰撞
# refine[2] -= step # 在z方向调整位置
# z_moves += 1
# elif collision == 3: # 手掌上端碰撞
# if not first_up_collision: # 第一次发生上端碰撞
# refine[2] -= step
# first_up_collision = True # 记录第一次发生上端碰撞
# z_moves += 1
# elif x_moves < max_moves: # 后续上端碰撞调整x轴
# refine[0] += step
# x_moves += 1
# elif collision == 2: # 手掌下端碰撞
# if not first_down_collision: # 第一次发生下端碰撞
# refine[2] -= step
# first_down_collision = True # 记录第一次发生下端碰撞
# z_moves += 1
# elif x_moves < max_moves: # 后续下端碰撞调整x轴
# refine[0] -= step
# x_moves += 1
# elif collision == 6: # 双指碰撞
# return None
#
# # 根据碰撞类型处理调整方向
# elif collision == 4: # 左指碰撞
# if last_right_position is not None:
# step /= 2 # 步长减半
# if step <= 0.001:
# return None, Status.REFINE_FAIL
# refine = last_right_position.copy() # 回到右指碰撞前的位置
# last_right_position = None
# last_collision_direction = "right" # 设置为左指方向
# refine[0] -= step
# x_moves += 1
# continue # 继续进行下一次碰撞检测
# last_collision_direction = "left"
# last_left_position = refine.copy() # 记录左指碰撞前的位置
# refine[0] += step # 向左指方向移动
# x_moves += 1
#
# elif collision == 5: # 右指碰撞
# if last_left_position is not None:
# step /= 2
# if step <= 0.001:
# return None, Status.REFINE_FAIL
# refine = last_left_position.copy() # 回到左指碰撞前的位置
# last_left_position = None
# last_collision_direction = "left" # 设置为右指方向
# refine[0] += step
# x_moves += 1
# continue # 继续进行下一次碰撞检测
# last_collision_direction = "right"
# last_right_position = refine.copy() # 记录右指碰撞前的位置
# refine[0] -= step # 向右指方向移动
# x_moves += 1
# else:
# return None, Status.REFINE_FAIL
if search_mode:
right_num = left_num = 0
# already in left side
if left_last and not right_last:
y_min = refine[1]
y_min = refine[1] - 0.004
while left_num < 10:
left_num += 1
refine[1] += 0.004
@@ -225,11 +329,12 @@ def refine_grasp_pose(
refine[1] = (refine[1] - 0.004 + y_min) / 2
break
else:
refine[1] = y_min + 0.2
refine[1] = y_min + 0.02
print(f"left_num = {left_num}")
elif not left_last and right_last:
y_max = refine[1]
# already in right side
y_max = refine[1] + 0.004
while right_num < 10:
right_num += 1
refine[1] -= 0.004
@@ -240,12 +345,13 @@ def refine_grasp_pose(
refine[1] = (refine[1] + 0.004 + y_max) / 2
break
else:
refine[1] = y_max - 0.2
refine[1] = y_max - 0.02
print(f"right_num = {right_num}")
elif not left_last and not right_last:
# in middle
y_cur = refine[1]
while left_num < 6:
while left_num < 10:
left_num += 1
refine[1] += 0.004
collision_code = collision_detector(
@@ -255,11 +361,11 @@ def refine_grasp_pose(
y_max = refine[1] - 0.004
break
else:
y_max = refine[1] + 0.024
y_max = y_cur + 0.040
print(f"left_num = {left_num}")
refine[1] = y_cur
while right_num < 6:
while right_num < 10:
right_num += 1
refine[1] -= 0.004
collision_code = collision_detector(
@@ -269,7 +375,7 @@ def refine_grasp_pose(
refine[1] = (refine[1] + 0.004 + y_max) / 2
break
else:
refine[1] = (y_cur - 0.024 + y_max) / 2
refine[1] = (y_cur - 0.040 + y_max) / 2
print(f"right_num = {right_num}")
position = target_position + (expect_orientation @ refine.T).T
@@ -278,5 +384,42 @@ def refine_grasp_pose(
expect_orientation,
[1, 1, 1]
)
return rmat, True
return 1202, False
return rmat, Status.SUCCESS
return None, Status.REFINE_FAIL
class FixedOrientationRefiner(RefinerBaseline):
def __init__(self, config):
super().__init__()
self.config = config
def _refine(self, image_data: ImageData, pose_data: PoseData, calibration_mat: np.ndarray,
**kwargs) -> tuple[PoseData, int]:
image_size = image_data.depth_image.shape[:2][::-1]
full_pcd = pointcloud.create_o3d_pcd(
image_data.depth_image, image_size, image_data.karr, **self.config
)
full_points = np.asarray(full_pcd.points)
n = 0
for i, (status, pose_mat, grasp_width) in enumerate(pose_data):
if status != 0:
continue
pose_mat, CODE = refine_grasp_pose(
full_points, self.config.get("voxel_size"), pose_mat[0:3, 3], search_mode=True
)
if CODE != 0:
pose_data.set_data(i, CODE)
continue
pose_mat = calibration_mat @ pose_mat
quat = transforms.rmat2quat(pose_mat)
pose_data.set_data(i, Status.SUCCESS, quat, grasp_width)
n += 1
if n == 0:
return pose_data, Status.ALL_POSE_REFINE_FAILED
return pose_data, Status.SUCCESS

View File

@@ -0,0 +1,29 @@
from typing import Optional, Tuple
import numpy as np
from ...enum import Status
from .refiner_baseline import RefinerBaseline
from ...data_struct import ImageDataContainer, PoseData
from ...utils import transforms
__all__ = ["NoRefiner"]
SUCCESS = 0
class NoRefiner(RefinerBaseline):
def __init__(self, config):
super().__init__()
def _refine(self, image_data: Optional[ImageDataContainer], pose_data: Optional[PoseData],
calibration_mat: Optional[np.ndarray], **kwargs) -> Tuple[Optional[PoseData], int]:
for i, (status, pose_mat, grasp_width) in enumerate(pose_data):
if status != 0:
continue
pose_mat = calibration_mat @ pose_mat
quat = transforms.rmat2quat(pose_mat)
pose_data.set_data(i, Status.SUCCESS, quat, grasp_width)
return pose_data, Status.SUCCESS

View File

@@ -0,0 +1,34 @@
from numpy.typing import NDArray
from ...data_struct import ImageData, ImageDataContainer, PoseData
__all__ = ['RefinerBaseline']
class RefinerBaseline:
def __init__(self):
pass
def _refine(
self,
image_data:ImageData,
pose_data: PoseData,
calibration_mat: NDArray,
**kwargs
) -> tuple[PoseData | None, int]:
pass
def get_refine(
self,
position: str,
image_data_container:ImageDataContainer,
pose_data: PoseData,
calibration_mat_dict: dict[str, NDArray],
**kwargs
) -> tuple[PoseData | None, int]:
return self._refine(
image_data=image_data_container[position],
pose_data=pose_data,
calibration_mat=calibration_mat_dict[position],
**kwargs
)

View File

@@ -0,0 +1,3 @@
from .image_data import *
from .detect_data import *
from .pose_data import *

View File

@@ -0,0 +1,102 @@
from dataclasses import dataclass, field
from numpy.typing import NDArray
import numpy as np
__all__ = ["DetectData"]
SUCCESS = 0
@dataclass(slots=True)
class DetectData:
status: int
results: list | None = None
# General
masks: list[NDArray] | None = field(init=False, default=None)
boxes: list[NDArray] | None = field(init=False, default=None)
class_ids: list[int] | None = field(init=False, default=None)
confidences: list[float] | None = field(init=False, default=None)
labels_map: list[str] | None = field(init=False, default=None)
# Crossboard special
board_points: NDArray | None = field(init=False, default=None) # board model points
camera_points: NDArray | None = field(init=False, default=None) # camera get board points
def __iter__(self):
"""遍历获得id和标签名"""
if (self.masks is None or self.boxes is None or self.confidences is None
or self.class_ids is None or self.labels_map is None or len(self.class_ids) == 0):
return iter(())
return (
(mask, box, conf, cid, self.labels_map[int(cid)])
for mask, box, cid, conf in zip(self.masks, self.boxes,
self.class_ids, self.confidences)
)
def __getitem__(self, index: int) -> tuple:
return (
self.masks[index], self.boxes[index],
self.confidences[index], self.class_ids[index],
self.labels_map[int(self.class_ids[index])]
)
def __setitem__(self, key, value):
raise AttributeError
def __len__(self) -> int:
return 0 if self.masks is None else len(self.masks)
def __post_init__(self):
if self.status == SUCCESS and self.results is not None:
if len(self.results) != 1:
raise ValueError("results must only contain exactly one element")
self._analysis()
def _analysis(self):
result = self.results[0]
masks = result.masks.data.cpu().numpy()
# Get boxes
boxes = result.boxes.xywh.cpu().numpy()
class_ids = result.boxes.cls.cpu().numpy().astype(np.int32)
confidences = result.boxes.conf.cpu().numpy()
self.labels_map = result.names
# Sort
x_centers, y_centers = boxes[:, 0], boxes[:, 1]
sorted_index = np.lexsort((-y_centers, x_centers))
self.masks = masks[sorted_index]
self.boxes = boxes[sorted_index]
self.class_ids = class_ids[sorted_index].tolist()
self.confidences = confidences[sorted_index].tolist()
@classmethod
def create_mask_only_data(cls, masks: list[NDArray]):
obj = cls(status=SUCCESS, results=None)
obj.masks = masks
if obj.masks is None or len(obj.masks) == 0:
raise ValueError
return obj
@classmethod
def create_data(cls, results):
obj = cls(status=SUCCESS, results=results)
if obj.masks is None or len(obj.masks) == 0:
raise ValueError
return obj
@classmethod
def create_crossboard_data(cls, detect_data, board_data):
# obj = cls(status=SUCCESS, results=None)
# obj.masks = masks
# if obj.masks is None or len(obj.masks) == 0:
# raise ValueError
# return obj
pass
@dataclass(slots=True)
class DetectDataContainer:
pass

View File

@@ -0,0 +1,62 @@
from dataclasses import dataclass, field
from numpy.typing import NDArray
from ..enum import Status
from ..utils import LOGGING_MAP
__all__ = ["ImageDataContainer", "ImageData"]
@dataclass(slots=True)
class ImageData:
status: int
color_image: NDArray | None = None
depth_image: NDArray | None = None
karr: list[float] | NDArray | None = None
darr: tuple[float, ...] | NDArray | None = None
def status(self) -> str:
return f"{self.status:04d}"
@dataclass(slots=True)
class ImageDataContainer:
_data_dict: dict[str, ImageData] = field(default_factory=dict)
def __getitem__(self, position: str) -> ImageData:
item = self._data_dict.get(position)
if item is None:
raise KeyError(f"Position '{position}' not found in ImageDataContainer")
return item
def __setitem__(self, position: str, value):
raise AttributeError
def __len__(self) -> int:
return len(self._data_dict)
def __iter__(self):
return iter(self._data_dict.items())
def add_data(
self,
position: str,
status: int,
color_image: NDArray | None = None,
depth_image: NDArray | None = None,
karr: list[float] | NDArray | None = None,
darr: tuple[float, ...] | NDArray | None = None
):
if status == Status.SUCCESS:
self._data_dict[position] = (ImageData(status, color_image, depth_image, karr, darr))
else:
self._data_dict[position] = (ImageData(status, None, None, None, None))
def check_data_status(self, logger):
for position, data in self._data_dict.items():
if data.status == Status.SUCCESS:
continue
logger.warning(
f"{position} Image: {LOGGING_MAP.get(data.status(), f'unknown code: {data.status()}')}"
)

View File

@@ -0,0 +1,68 @@
from dataclasses import dataclass, field
from typing import Iterator
from numpy.typing import NDArray
__all__ = ["PoseData"]
SUCCESS = 0
PoseDataItemType = tuple[int, tuple[float, ...] | NDArray | None, tuple[float, ...] | None]
@dataclass(slots=True)
class _PoseDataItem:
status: int
pose: tuple[float, ...] | NDArray | None
grasp_width: tuple[float, ...] | None
def as_tuple(self) -> PoseDataItemType:
return self.status, self.pose, self.grasp_width
@dataclass(slots=True)
class PoseData:
# _data_list: [[status, pose, grasp_width]]
_data_list: list[_PoseDataItem] = field(default_factory=list)
def __iter__(self) -> Iterator[PoseDataItemType]:
for item in self._data_list:
yield item.status, item.pose, item.grasp_width
def __getitem__(self, index: int) -> PoseDataItemType:
return self._data_list[index].as_tuple()
def __len__(self) -> int:
return len(self._data_list)
def add_data(
self,
status: int,
pose: tuple[float, ...] | NDArray | None = None,
grasp_width: tuple[float, ...] | None = None
):
if status == SUCCESS:
if pose is None or grasp_width is None:
raise ValueError("pose and grasp_width cannot be None when status is SUCCESS")
self._data_list.append(_PoseDataItem(status, pose, grasp_width))
else:
self._data_list.append(_PoseDataItem(status, None, None))
def set_data(
self,
index: int,
status: int,
pose: tuple[float, ...] | NDArray | None = None,
grasp_width: tuple[float, ...] | None = None
):
if status == SUCCESS:
if pose is None or grasp_width is None:
raise ValueError("pose and grasp_width cannot be None when status is SUCCESS")
self._data_list[index] = _PoseDataItem(status, pose, grasp_width)
else:
self._data_list[index] = _PoseDataItem(status, None, None)
@dataclass(slots=True)
class PoseDataContainer:
pass

View File

@@ -0,0 +1,2 @@
from .component_mode_enum import *
from .logging_code_enum import *

View File

@@ -0,0 +1,34 @@
from enum import IntEnum
__all__ = [
"NodeType", "SourceType", "DetectorType", "RefinerType", "EstimatorType"
]
class NodeType(IntEnum):
SERVICE = 0
PUBLISHER = 1
ACTION = 2
class SourceType(IntEnum):
DRIVER = 0
DIRECT = 1
MUILTOPIC = 2
class DetectorType(IntEnum):
OBJECT = 0
COLOR = 1
CROSSBOARD = 2
class EstimatorType(IntEnum):
PCA = 0
ICP = 1
GSNET = 2
class RefinerType(IntEnum):
NO = 0
FIXED = 1

View File

@@ -0,0 +1,60 @@
from enum import IntEnum
__all__ = ["Status"]
class Status(IntEnum):
SUCCESS = 0
FAIL_LOAD_CONFIG = 100
NO_NODE_NAME = 101
NO_OUTPUT_ATTRIBUTE = 102
FAIL_LOAD_SOURCE_CONFIG = 103
FAIL_LOAD_DETECT_CONFIG = 104
FAIL_LOAD_ESTIMATE_CONFIG = 105
FAIL_LOAD_CALIBRATION_FILE = 110
WRONG_KEY = 111
WRONG_SHAPE = 112
NO_CAMERA_DATA = 200
NO_POSITION_DATA = 201
NO_ALL_POSITION_DATA = 202
IMAGE_QUALITY_LOW = 210
WORKER_NOT_ALIVE = 300
EXECUTOR_ALREADY_STOP = 301
TASK_ABORTED = 302
WORKER_TIMEOUT = 303
TASK_EXECUTOR_INTERNAL_ERROR = 400
NO_DETECT = 1000
NO_DEPTH_CROP = 1001
FAIL_DETECT_VALID_POSE = 1003
NO_COLOR = 1010
NO_CROSSBOARD = 1020
TOO_MACH_NOISE = 1100
POINTS_EMPTY = 1101
POINTS_TOO_CLOSELY = 1102
TOO_FEW_POINTS_OBB = 1200
PCA_NO_VECTOR = 1201
REFINE_FAIL = 1202
ALL_POSE_REFINE_FAILED = 1203
CANNOT_ESTIMATE = 1210
COORS_TOO_FEW = 1300
POINT_CLOUDS_TOO_FEW = 1301
ZERO_TRUE_NUM = 1302
E2E_NO_PREDICTION = 1303
E2E_NO_VALID_MATRIX = 1304

View File

@@ -0,0 +1 @@
from .managers import *

View File

@@ -0,0 +1,2 @@
from .configs_name_map import *
from .components_type_map import *

View File

@@ -0,0 +1,33 @@
from ...components import image_providers, detectors, estimators, refiners
from ...enum import SourceType, DetectorType, EstimatorType, RefinerType
__all__ = [
"SOURCE_MAP", "DETECTOR_MAP",
"ESTIMATOR_MAP", "REFINER_MAP"
]
SOURCE_MAP = {
SourceType.DRIVER: image_providers.DriverSource,
SourceType.DIRECT: image_providers.SingleTopicSource
}
DETECTOR_MAP = {
DetectorType.OBJECT: detectors.ObjectDetector,
DetectorType.COLOR: detectors.ColorDetector,
DetectorType.CROSSBOARD: detectors.CrossboardDetector
}
ESTIMATOR_MAP = {
EstimatorType.PCA: estimators.PCAEstimator,
EstimatorType.ICP: estimators.ICPEstimator,
# EstimatorType.GSNET: estimators.E2EEstimator,
}
REFINER_MAP = {
RefinerType.NO: refiners.NoRefiner,
RefinerType.FIXED: refiners.FixedOrientationRefiner
}

View File

@@ -0,0 +1,32 @@
from ...enum import NodeType, SourceType, DetectorType, EstimatorType
__all__ = [
"CONFIG_MAP"
]
CONFIG_MAP = {
"mode":{},
"node": {
NodeType.PUBLISHER: "publisher_node_configs",
NodeType.SERVICE: "service_node_configs",
NodeType.ACTION: "action_node_configs",
},
"source":{
SourceType.DRIVER: "driver_source_configs",
SourceType.DIRECT: "direct_source_configs"
},
"detector":{
DetectorType.OBJECT: "object_detector_configs",
DetectorType.COLOR: "color_detector_configs",
DetectorType.CROSSBOARD: "crossboard_detector_configs"
},
"estimator":{
(EstimatorType.PCA, None): "pca_estimator_configs",
(EstimatorType.ICP, DetectorType.OBJECT): "icp_estimator_configs",
(EstimatorType.GSNET, DetectorType.OBJECT): "gsnet_estimator_configs"
},
"refiner":{}
}

View File

@@ -0,0 +1,6 @@
from .config_manager import *
from .detector_manager import *
from .estimator_manager import *
from .refiner_manager import *
from .resource_manager import *
from .source_manager import *

View File

@@ -0,0 +1,152 @@
import os
import json
import logging
from ...utils import io
from ...utils import format
from ..manager_map import CONFIG_MAP
from ...utils import LOGGING_MAP
from ...enum import SourceType, DetectorType, EstimatorType, RefinerType, NodeType
__all__ = ["ConfigManager"]
class ConfigManager:
def __init__(self, logging_map_path, logger=None):
self.logger = logger or logging
with open(logging_map_path, 'r') as f:
self.logging_map = json.load(f)["warning"]
self.left = None
self.right = None
self.top = None
self.node_name = None
self.output_boxes = None
self.output_masks = None
self.save_image = None
self.image_save_dir = None
# Didn't need storage
self.node_mode = None
self.source_mode = None
self.detector_mode = None
self.estimator_mode = None
self.refiner_mode = None
self.node_config = None
self.source_config = None
self.detector_config = None
self.estimator_config = None
self.refiner_config = None
self.calibration = None
def load_config(self, config_path):
with open(config_path, 'r') as f:
config = json.load(f)
self.calibration = config["calibration"]
self.node_name = config["node_name"]
self.output_boxes = config.get("output_boxes", False)
self.output_masks = config.get("output_masks", False)
self.save_image = config.get("save_image", True)
if self.save_image:
self.image_save_dir = os.path.expanduser(config.get("image_save_dir", "~/images"))
if not os.path.exists(self.image_save_dir):
os.mkdir(self.image_save_dir)
node_mode = config["node_mode"]
image_source = config["image_source"]
detect_mode = config["detect_mode"]
estimate_mode = config["estimate_mode"]
refine_mode = config["refine_mode"]
try:
self.node_mode = NodeType[node_mode]
self.node_config = config.get(CONFIG_MAP["node"].get(self.node_mode))
if self.node_config is None:
raise KeyError(f"node_mode: {node_mode}")
except (KeyError, ValueError):
self.node_mode = NodeType.SERVICE
self.node_config = config.get(CONFIG_MAP["node"].get(self.node_mode))
try:
self.source_mode = SourceType[image_source]
self.source_config = config.get(CONFIG_MAP["source"].get(self.source_mode))
if self.source_config is None:
raise KeyError(f"image_source: {image_source}")
except (KeyError, ValueError):
self.source_mode = SourceType.DRIVER
self.source_config = config.get(CONFIG_MAP["source"].get(self.source_mode))
finally:
preprocess_configs = config.get("preprocess_configs")
self.source_config.update(preprocess_configs)
try:
self.detector_mode = DetectorType[detect_mode]
self.detector_config = config.get(CONFIG_MAP["detector"].get(self.detector_mode))
if self.detector_config is None:
raise KeyError(f"detect_mode: {detect_mode}")
except (KeyError, ValueError):
self.detector_mode = DetectorType.OBJECT
self.detector_config = config.get(CONFIG_MAP["detector"].get(self.detector_mode))
try:
self.estimator_mode = EstimatorType[estimate_mode]
self.estimator_config = config.get(CONFIG_MAP["estimator"].get(
(self.estimator_mode, self.detector_mode)
))
if self.estimator_config is None:
raise KeyError(f"estimate_mode: {estimate_mode}")
except (KeyError, ValueError):
self.estimator_mode = EstimatorType.PCA
self.estimator_config = config.get(CONFIG_MAP["estimator"].get(
(self.estimator_mode, None)
))
try:
self.refiner_mode = RefinerType[refine_mode]
if self.refiner_mode == RefinerType.NO:
self.refiner_config = {}
elif (self.refiner_mode == RefinerType.FIXED
and self.detector_mode == DetectorType.OBJECT
and self.estimator_mode == EstimatorType.PCA):
self.refiner_config = {
"depth_scale": self.estimator_config.get("depth_scale", 1000.0),
"depth_trunc": self.estimator_config.get("depth_trunc", 3.0),
"voxel_size": self.estimator_config.get("voxel_size", 0.002)
}
else: raise KeyError(f"refine_mode: {refine_mode}")
except KeyError or ValueError:
self.refiner_mode = RefinerType.NO
self.refiner_config = {}
self.logger.info(f"node: {self.node_mode}")
self.logger.info(f"source: {self.source_mode}")
self.logger.info(f"detector: {self.detector_mode}")
self.logger.info(f"estimator: {self.estimator_mode}")
self.logger.info(f"refiner: {self.refiner_mode}")
def load_calibration(self, shared_dir):
eye_in_left_hand_path = os.path.join(shared_dir, self.calibration["left_hand"])
eye_in_right_hand_path = os.path.join(shared_dir, self.calibration["right_hand"])
eye_to_hand_path = os.path.join(shared_dir, self.calibration["head"])
self.left, CODE = io.load_calibration_mat(eye_in_left_hand_path)
self.logger.info(f"\nleft_hand_mat: \n{format.np_mat2str(self.left)}")
if CODE != 0:
self.logger.warning(f"left_hand: {LOGGING_MAP[f'{CODE:04d}']}")
self.right, CODE = io.load_calibration_mat(eye_in_right_hand_path)
self.logger.info(f"\nright_hand_mat: \n{format.np_mat2str(self.right)}")
if CODE != 0:
self.logger.warning(f"right_hand: {LOGGING_MAP[f'{CODE:04d}']}")
self.top, CODE = io.load_calibration_mat(eye_to_hand_path)
self.logger.info(f"\ntop_mat: \n{format.np_mat2str(self.top)}")
if CODE != 0:
self.logger.warning(f"top: {LOGGING_MAP[f'{CODE:04d}']}")

View File

@@ -0,0 +1,25 @@
import logging
from ...components import detectors
from ..manager_map import DETECTOR_MAP
from ...data_struct import ImageDataContainer, DetectData
__all__ = ["DetectorManager"]
class DetectorManager:
def __init__(self, mode, config, logger=None):
_logger = logger or logging
try:
self.detector = DETECTOR_MAP.get(mode)(config, _logger)
except Exception as e:
self.detector = detectors.ObjectDetector(config, _logger)
def get_masks(
self,
position,
classes_name,
image_data: ImageDataContainer
) -> tuple[DetectData | None, int]:
return self.detector.get_masks(
position=position, classes_name=classes_name, image_data=image_data)

View File

@@ -0,0 +1,27 @@
from ...components import estimators
from ..manager_map import ESTIMATOR_MAP
from ...data_struct import ImageDataContainer, DetectData
__all__ = ['EstimatorManager']
class EstimatorManager:
def __init__(self, mode, config):
try:
self.estimator = ESTIMATOR_MAP[mode](config)
except Exception as e:
self.estimator = estimators.PCAEstimator(config)
def get_poses(
self,
position: str,
image_data:ImageDataContainer,
detect_data: DetectData,
get_grab_width: bool = True,
):
return self.estimator.get_poses(
position=position,
image_data=image_data,
detect_data=detect_data,
get_grab_width=get_grab_width
)

View File

@@ -0,0 +1,31 @@
from numpy.typing import NDArray
from ...components import refiners
from ..manager_map import REFINER_MAP
from ...data_struct import ImageDataContainer, PoseData
__all__ = ["RefinerManager"]
class RefinerManager:
def __init__(self, mode, config):
try:
self.refiner = REFINER_MAP[mode](config)
except Exception as e:
self.refiner = refiners.NoRefiner(config)
def get_refine(
self,
position: str,
image_data_container: ImageDataContainer,
pose_data: PoseData,
calibration_mat_dict: dict[str, NDArray],
**kwargs
):
return self.refiner.get_refine(
position=position,
image_data_container=image_data_container,
pose_data=pose_data,
calibration_mat_dict=calibration_mat_dict,
**kwargs
)

View File

@@ -0,0 +1,22 @@
from .config_manager import ConfigManager
from ...utils import LOGGING_MAP
__all__ = ["ResourceManager"]
class ResourceManager:
def __init__(self, config_manager: ConfigManager):
self.logging_map = config_manager.logging_map
self.node_name = config_manager.node_name
self.output_boxes = config_manager.output_boxes
self.output_masks = config_manager.output_masks
self.calibration_matrix = {
"left": config_manager.left,
"right": config_manager.right,
"top": config_manager.top
}
self.save_image = config_manager.save_image
if self.save_image:
self.image_save_dir = config_manager.image_save_dir

View File

@@ -0,0 +1,63 @@
from typing import Tuple
from rclpy.node import Node
from interfaces.srv import SaveCameraImages
from ... import Status
from ...components import image_providers
from ..manager_map import SOURCE_MAP
from ...data_struct import ImageDataContainer
from ...utils import LOGGING_MAP, io
class SourceManager:
"""
register source
"""
def __init__(self, mode, config, node: Node):
self.node = node
try:
self.source = SOURCE_MAP.get(mode)(config, node)
except Exception as e:
self.source = image_providers.DriverSource(config, node)
self._save_image_service = node.create_service(
SaveCameraImages, "/save_camera_images", self.save_image_service_callback
)
def save_image_service_callback(self, request, response):
image_types = request.image_types
positions = request.camera_positions
save_dir = request.save_dir
save_type : str = request.save_type
if not save_type.startswith("."):
save_type = "." + save_type
if save_type not in [".png", ".jpg", ".jpeg"]:
response.success = False
response.info = "Type error"
return response
image_container = self.get_images(positions)
for position, data in image_container:
if data.status != Status.SUCCESS:
self.node.get_logger().warn(
f"Failed save {position}: {LOGGING_MAP.get(data.status())}")
continue
if "color" in image_types:
io.save_img(
data.color_image, "color_image"+save_type, save_dir, mark_cur_time=True)
if "depth" in image_types:
io.save_img(
data.color_image, "depth_image"+save_type, save_dir, mark_cur_time=True)
response.success = True
return response
def get_images(self, positions) -> Tuple[ImageDataContainer | None, int]:
return self.source.get_images(positions=positions)

View File

@@ -0,0 +1,2 @@
from ..utils import *
from ..logging_map import *

View File

@@ -0,0 +1 @@
from .logging_map import *

View File

@@ -0,0 +1,45 @@
import os
import json
from typing import Any
from ament_index_python.packages import get_package_share_directory
__all__ = ["LOGGING_MAP"]
def get_logging_map():
json_file_path = os.path.join(
get_package_share_directory('vision_detect'),
'map/logging/report_logging_define.json'
)
with open(json_file_path, "r", encoding="utf-8") as json_file:
logging_map = json.load(json_file)
return logging_map
# LOGGING_MAP = get_logging_map()
class LoggingMap:
_LOGGING_MAP = get_logging_map()
def __getitem__(self, item: str | int) -> str:
if isinstance(item, int):
if f"{item:04d}" in self._LOGGING_MAP:
return self._LOGGING_MAP[f"{item:04d}"]
else:
raise KeyError(f"logging map has this key: {item}")
elif isinstance(item, str):
if len(item) == 4 and item in self._LOGGING_MAP:
return self._LOGGING_MAP[item]
else:
raise KeyError(f"logging map has this key: {item}")
else:
raise TypeError(f"logging map input this wrong type: {type(item)}")
def __setitem__(self, key: str, value: Any) -> None:
raise TypeError("logging map does not support item assignment")
LOGGING_MAP = LoggingMap()

Some files were not shown because too many files have changed in this diff Show More