first commit

This commit is contained in:
2026-04-03 07:30:54 +03:00
commit 1a083e043e
1138 changed files with 1705 additions and 0 deletions

280
detect_box.py Normal file
View File

@@ -0,0 +1,280 @@
"""
Module V: Box detection and classification for RMK-1 manipulator.
Usage:
python3 detect_box.py --target 1
Target classes (per competition spec):
1 - hammer (молоток)
2 - wrench (гаечный ключ)
3 - pliers (пассатижи)
This script:
1. Subscribes to camera topic /RMC1/arm95/camera_gripper/image_color
2. Classifies visible boxes using trained YOLOv8 model
3. Finds the target box and computes its position relative to Base_link
4. Publishes coordinates for manipulator control
"""
import argparse
import os
import sys
import cv2
import numpy as np
from ultralytics import YOLO
# Class mapping: YOLO class index -> competition ID
# YOLO trains alphabetically: hammer=0, pliers=1, wrench=2
YOLO_TO_COMPETITION = {
0: 1, # hammer -> 1
1: 3, # pliers -> 3
2: 2, # wrench -> 2
}
COMPETITION_TO_YOLO = {v: k for k, v in YOLO_TO_COMPETITION.items()}
CLASS_NAMES = {
1: "hammer (молоток)",
2: "wrench (гаечный ключ)",
3: "pliers (пассатижи)",
}
# YOLO class names (alphabetical order)
YOLO_CLASS_NAMES = {0: "hammer", 1: "pliers", 2: "wrench"}
def load_model(model_path=None):
if model_path is None:
base_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(base_dir, "best.pt")
return YOLO(model_path)
def classify_image(model, image):
"""
Classify an image and return (class_id, confidence).
class_id is competition ID (1=hammer, 2=wrench, 3=pliers).
"""
results = model(image, imgsz=224, verbose=False)
if results and results[0].probs is not None:
probs = results[0].probs
yolo_class = probs.top1
confidence = probs.top1conf.item()
comp_class = YOLO_TO_COMPETITION.get(yolo_class, -1)
return comp_class, confidence
return -1, 0.0
def detect_boxes_in_frame(image, target_id):
"""
Given a top-down camera frame, segment individual boxes and classify each.
Returns list of (competition_class_id, confidence, center_x, center_y, bbox).
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Adaptive threshold to find box regions
thresh = cv2.adaptiveThreshold(
blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 21, 5
)
# Morphological operations to clean up
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
h, w = image.shape[:2]
min_area = (h * w) * 0.01 # minimum 1% of frame
max_area = (h * w) * 0.5 # maximum 50% of frame
boxes = []
for cnt in contours:
area = cv2.contourArea(cnt)
if area < min_area or area > max_area:
continue
x, y, bw, bh = cv2.boundingRect(cnt)
# Extract box region
roi = image[y:y+bh, x:x+bw]
if roi.size == 0:
continue
boxes.append({
"roi": roi,
"bbox": (x, y, bw, bh),
"center": (x + bw // 2, y + bh // 2),
"area": area,
})
return boxes
def find_target_box(model, image, target_id):
"""
Find the target box in the camera frame.
Returns (center_x, center_y, confidence) in pixel coordinates, or None.
"""
boxes = detect_boxes_in_frame(image, target_id)
best_match = None
best_conf = 0.0
for box in boxes:
comp_class, conf = classify_image(model, box["roi"])
if comp_class == target_id and conf > best_conf:
best_conf = conf
best_match = box
if best_match and best_conf > 0.5:
cx, cy = best_match["center"]
return cx, cy, best_conf, best_match["bbox"]
return None
def pixel_to_base_link(cx, cy, img_w, img_h,
camera_x=0.0, camera_y=0.0, camera_z=0.7,
fov_x=0.6, fov_y=0.6):
"""
Convert pixel coordinates to approximate Base_link coordinates.
Assumes camera looks straight down from (camera_x, camera_y, camera_z).
fov_x/fov_y: field of view in meters at camera_z height.
These parameters should be calibrated for the actual setup.
"""
# Normalize to [-0.5, 0.5]
nx = (cx / img_w) - 0.5
ny = (cy / img_h) - 0.5
# Map to world coordinates relative to camera
world_x = camera_x + nx * fov_x
world_y = camera_y + ny * fov_y
world_z = 0.0 # boxes are on the shelf surface
return world_x, world_y, world_z
def demo_on_image(image_path, target_id, model):
"""Demo classification on a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: cannot read {image_path}")
return
comp_class, confidence = classify_image(model, image)
print(f"Image: {image_path}")
print(f" Predicted: {CLASS_NAMES.get(comp_class, 'unknown')} (ID={comp_class})")
print(f" Confidence: {confidence:.4f}")
print(f" Target: {CLASS_NAMES.get(target_id, 'unknown')} (ID={target_id})")
print(f" Match: {'YES' if comp_class == target_id else 'NO'}")
def main():
parser = argparse.ArgumentParser(description="Module V: Box Detection")
parser.add_argument("--target", type=int, required=True, choices=[1, 2, 3],
help="Target box ID: 1=hammer, 2=wrench, 3=pliers")
parser.add_argument("--model", type=str, default=None,
help="Path to model weights (default: best.pt)")
parser.add_argument("--image", type=str, default=None,
help="Test on a single image instead of ROS2 camera")
parser.add_argument("--ros", action="store_true",
help="Run in ROS2 mode (subscribe to camera topic)")
args = parser.parse_args()
print(f"Target: {CLASS_NAMES[args.target]}")
model = load_model(args.model)
print("Model loaded successfully.")
if args.image:
demo_on_image(args.image, args.target, model)
return
if args.ros:
run_ros2(model, args.target)
return
# Default: demo on source images
base_dir = os.path.dirname(os.path.abspath(__file__))
images_dir = os.path.join(base_dir, "images")
for name in ["Hammer.jpg", "Wrench.jpg", "Pliers.jpg"]:
path = os.path.join(images_dir, name)
if os.path.exists(path):
demo_on_image(path, args.target, model)
print()
def run_ros2(model, target_id):
"""ROS2 node that subscribes to camera and finds target box."""
try:
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import Image
from geometry_msgs.msg import PoseStamped
from cv_bridge import CvBridge
except ImportError:
print("Error: ROS2 packages not found. Install rclpy, cv_bridge, etc.")
sys.exit(1)
class BoxDetectorNode(Node):
def __init__(self):
super().__init__("box_detector")
self.bridge = CvBridge()
self.model = model
self.target_id = target_id
self.detected = False
self.subscription = self.create_subscription(
Image,
"/RMC1/arm95/camera_gripper/image_color",
self.image_callback,
10
)
self.get_logger().info(
f"Box detector started. Target: {CLASS_NAMES[target_id]}"
)
def image_callback(self, msg):
if self.detected:
return
cv_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="bgr8")
h, w = cv_image.shape[:2]
result = find_target_box(self.model, cv_image, self.target_id)
if result is not None:
cx, cy, conf, bbox = result
wx, wy, wz = pixel_to_base_link(cx, cy, w, h)
self.get_logger().info(
f"TARGET FOUND: {CLASS_NAMES[self.target_id]} "
f"at pixel ({cx}, {cy}), conf={conf:.2f}"
)
self.get_logger().info(
f"Base_link coords: x={wx:.3f}, y={wy:.3f}, z={wz:.3f}"
)
self.detected = True
else:
# Try classifying the whole frame
comp_class, conf = classify_image(self.model, cv_image)
self.get_logger().info(
f"Frame classified as: {CLASS_NAMES.get(comp_class, 'unknown')} "
f"(conf={conf:.2f})"
)
rclpy.init()
node = BoxDetectorNode()
try:
rclpy.spin(node)
except KeyboardInterrupt:
pass
finally:
node.destroy_node()
rclpy.shutdown()
if __name__ == "__main__":
main()