first commit

2026-04-03 07:30:54 +03:00
commit 1a083e043e
1138 changed files with 1705 additions and 0 deletions
--- a/detect_box.py
+++ b/detect_box.py
@@ -0,0 +1,280 @@
+"""
+Module V: Box detection and classification for RMK-1 manipulator.
+
+Usage:
+  python3 detect_box.py --target 1
+
+Target classes (per competition spec):
+  1 - hammer (молоток)
+  2 - wrench (гаечный ключ)
+  3 - pliers (пассатижи)
+
+This script:
+  1. Subscribes to camera topic /RMC1/arm95/camera_gripper/image_color
+  2. Classifies visible boxes using trained YOLOv8 model
+  3. Finds the target box and computes its position relative to Base_link
+  4. Publishes coordinates for manipulator control
+"""
+
+import argparse
+import os
+import sys
+import cv2
+import numpy as np
+from ultralytics import YOLO
+
+# Class mapping: YOLO class index -> competition ID
+# YOLO trains alphabetically: hammer=0, pliers=1, wrench=2
+YOLO_TO_COMPETITION = {
+    0: 1,  # hammer -> 1
+    1: 3,  # pliers -> 3
+    2: 2,  # wrench -> 2
+}
+
+COMPETITION_TO_YOLO = {v: k for k, v in YOLO_TO_COMPETITION.items()}
+
+CLASS_NAMES = {
+    1: "hammer (молоток)",
+    2: "wrench (гаечный ключ)",
+    3: "pliers (пассатижи)",
+}
+
+# YOLO class names (alphabetical order)
+YOLO_CLASS_NAMES = {0: "hammer", 1: "pliers", 2: "wrench"}
+
+
+def load_model(model_path=None):
+    if model_path is None:
+        base_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(base_dir, "best.pt")
+    return YOLO(model_path)
+
+
+def classify_image(model, image):
+    """
+    Classify an image and return (class_id, confidence).
+    class_id is competition ID (1=hammer, 2=wrench, 3=pliers).
+    """
+    results = model(image, imgsz=224, verbose=False)
+    if results and results[0].probs is not None:
+        probs = results[0].probs
+        yolo_class = probs.top1
+        confidence = probs.top1conf.item()
+        comp_class = YOLO_TO_COMPETITION.get(yolo_class, -1)
+        return comp_class, confidence
+    return -1, 0.0
+
+
+def detect_boxes_in_frame(image, target_id):
+    """
+    Given a top-down camera frame, segment individual boxes and classify each.
+    Returns list of (competition_class_id, confidence, center_x, center_y, bbox).
+    """
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+
+    # Adaptive threshold to find box regions
+    thresh = cv2.adaptiveThreshold(
+        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY_INV, 21, 5
+    )
+
+    # Morphological operations to clean up
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+
+    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    h, w = image.shape[:2]
+    min_area = (h * w) * 0.01  # minimum 1% of frame
+    max_area = (h * w) * 0.5   # maximum 50% of frame
+
+    boxes = []
+    for cnt in contours:
+        area = cv2.contourArea(cnt)
+        if area < min_area or area > max_area:
+            continue
+
+        x, y, bw, bh = cv2.boundingRect(cnt)
+        # Extract box region
+        roi = image[y:y+bh, x:x+bw]
+        if roi.size == 0:
+            continue
+
+        boxes.append({
+            "roi": roi,
+            "bbox": (x, y, bw, bh),
+            "center": (x + bw // 2, y + bh // 2),
+            "area": area,
+        })
+
+    return boxes
+
+
+def find_target_box(model, image, target_id):
+    """
+    Find the target box in the camera frame.
+    Returns (center_x, center_y, confidence) in pixel coordinates, or None.
+    """
+    boxes = detect_boxes_in_frame(image, target_id)
+
+    best_match = None
+    best_conf = 0.0
+
+    for box in boxes:
+        comp_class, conf = classify_image(model, box["roi"])
+        if comp_class == target_id and conf > best_conf:
+            best_conf = conf
+            best_match = box
+
+    if best_match and best_conf > 0.5:
+        cx, cy = best_match["center"]
+        return cx, cy, best_conf, best_match["bbox"]
+
+    return None
+
+
+def pixel_to_base_link(cx, cy, img_w, img_h,
+                       camera_x=0.0, camera_y=0.0, camera_z=0.7,
+                       fov_x=0.6, fov_y=0.6):
+    """
+    Convert pixel coordinates to approximate Base_link coordinates.
+    Assumes camera looks straight down from (camera_x, camera_y, camera_z).
+    fov_x/fov_y: field of view in meters at camera_z height.
+
+    These parameters should be calibrated for the actual setup.
+    """
+    # Normalize to [-0.5, 0.5]
+    nx = (cx / img_w) - 0.5
+    ny = (cy / img_h) - 0.5
+
+    # Map to world coordinates relative to camera
+    world_x = camera_x + nx * fov_x
+    world_y = camera_y + ny * fov_y
+    world_z = 0.0  # boxes are on the shelf surface
+
+    return world_x, world_y, world_z
+
+
+def demo_on_image(image_path, target_id, model):
+    """Demo classification on a single image."""
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"Error: cannot read {image_path}")
+        return
+
+    comp_class, confidence = classify_image(model, image)
+    print(f"Image: {image_path}")
+    print(f"  Predicted: {CLASS_NAMES.get(comp_class, 'unknown')} (ID={comp_class})")
+    print(f"  Confidence: {confidence:.4f}")
+    print(f"  Target: {CLASS_NAMES.get(target_id, 'unknown')} (ID={target_id})")
+    print(f"  Match: {'YES' if comp_class == target_id else 'NO'}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Module V: Box Detection")
+    parser.add_argument("--target", type=int, required=True, choices=[1, 2, 3],
+                        help="Target box ID: 1=hammer, 2=wrench, 3=pliers")
+    parser.add_argument("--model", type=str, default=None,
+                        help="Path to model weights (default: best.pt)")
+    parser.add_argument("--image", type=str, default=None,
+                        help="Test on a single image instead of ROS2 camera")
+    parser.add_argument("--ros", action="store_true",
+                        help="Run in ROS2 mode (subscribe to camera topic)")
+    args = parser.parse_args()
+
+    print(f"Target: {CLASS_NAMES[args.target]}")
+    model = load_model(args.model)
+    print("Model loaded successfully.")
+
+    if args.image:
+        demo_on_image(args.image, args.target, model)
+        return
+
+    if args.ros:
+        run_ros2(model, args.target)
+        return
+
+    # Default: demo on source images
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    images_dir = os.path.join(base_dir, "images")
+    for name in ["Hammer.jpg", "Wrench.jpg", "Pliers.jpg"]:
+        path = os.path.join(images_dir, name)
+        if os.path.exists(path):
+            demo_on_image(path, args.target, model)
+            print()
+
+
+def run_ros2(model, target_id):
+    """ROS2 node that subscribes to camera and finds target box."""
+    try:
+        import rclpy
+        from rclpy.node import Node
+        from sensor_msgs.msg import Image
+        from geometry_msgs.msg import PoseStamped
+        from cv_bridge import CvBridge
+    except ImportError:
+        print("Error: ROS2 packages not found. Install rclpy, cv_bridge, etc.")
+        sys.exit(1)
+
+    class BoxDetectorNode(Node):
+        def __init__(self):
+            super().__init__("box_detector")
+            self.bridge = CvBridge()
+            self.model = model
+            self.target_id = target_id
+            self.detected = False
+
+            self.subscription = self.create_subscription(
+                Image,
+                "/RMC1/arm95/camera_gripper/image_color",
+                self.image_callback,
+                10
+            )
+            self.get_logger().info(
+                f"Box detector started. Target: {CLASS_NAMES[target_id]}"
+            )
+
+        def image_callback(self, msg):
+            if self.detected:
+                return
+
+            cv_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="bgr8")
+            h, w = cv_image.shape[:2]
+
+            result = find_target_box(self.model, cv_image, self.target_id)
+            if result is not None:
+                cx, cy, conf, bbox = result
+                wx, wy, wz = pixel_to_base_link(cx, cy, w, h)
+
+                self.get_logger().info(
+                    f"TARGET FOUND: {CLASS_NAMES[self.target_id]} "
+                    f"at pixel ({cx}, {cy}), conf={conf:.2f}"
+                )
+                self.get_logger().info(
+                    f"Base_link coords: x={wx:.3f}, y={wy:.3f}, z={wz:.3f}"
+                )
+
+                self.detected = True
+            else:
+                # Try classifying the whole frame
+                comp_class, conf = classify_image(self.model, cv_image)
+                self.get_logger().info(
+                    f"Frame classified as: {CLASS_NAMES.get(comp_class, 'unknown')} "
+                    f"(conf={conf:.2f})"
+                )
+
+    rclpy.init()
+    node = BoxDetectorNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()