test_detect.py

"""
Test detection on test.png - find individual boxes and classify each.
Saves annotated result to test_result.png
"""

import cv2
import numpy as np
import os
from ultralytics import YOLO

BASE_DIR = os.path.dirname(os.path.abspath(__file__))

YOLO_TO_COMPETITION = {0: 1, 1: 3, 2: 2}  # hammer=1, pliers=3, wrench=2
CLASS_NAMES = {1: "hammer", 2: "wrench", 3: "pliers"}
CLASS_COLORS = {1: (0, 255, 0), 2: (255, 165, 0), 3: (0, 0, 255)}


def find_boxes(image):
    """Find box-like regions in the top-down camera view."""
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    h, w = image.shape[:2]

    # The boxes are white/light colored rectangles on a brown/dark shelf
    # Try multiple approaches and combine

    boxes = []

    # Approach 1: Look for bright rectangular regions
    # Boxes appear as light-colored rectangles
    _, bright_mask = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY)

    # Approach 2: Saturation-based (boxes are less saturated than shelf)
    _, sat_mask = cv2.threshold(hsv[:, :, 1], 60, 255, cv2.THRESH_BINARY_INV)

    # Approach 3: Value channel - boxes are brighter
    _, val_mask = cv2.threshold(hsv[:, :, 2], 150, 255, cv2.THRESH_BINARY)

    # Combine masks
    combined = cv2.bitwise_and(bright_mask, sat_mask)
    combined = cv2.bitwise_and(combined, val_mask)

    # Clean up
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel, iterations=3)
    combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel, iterations=2)

    # Save debug mask
    cv2.imwrite(os.path.join(BASE_DIR, "debug_mask.png"), combined)

    contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    min_area = (h * w) * 0.005  # 0.5% of frame
    max_area = (h * w) * 0.15   # 15% of frame

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < min_area or area > max_area:
            continue

        rect = cv2.minAreaRect(cnt)
        box_points = cv2.boxPoints(rect)
        box_points = np.intp(box_points)

        x, y, bw, bh = cv2.boundingRect(cnt)

        # Aspect ratio filter - boxes should be somewhat rectangular
        aspect = max(bw, bh) / (min(bw, bh) + 1e-5)
        if aspect > 5:
            continue

        # Pad the bounding box slightly
        pad = 5
        x1 = max(0, x - pad)
        y1 = max(0, y - pad)
        x2 = min(w, x + bw + pad)
        y2 = min(h, y + bh + pad)

        roi = image[y1:y2, x1:x2]
        if roi.size == 0:
            continue

        boxes.append({
            "roi": roi,
            "bbox": (x1, y1, x2 - x1, y2 - y1),
            "center": ((x1 + x2) // 2, (y1 + y2) // 2),
            "area": area,
            "contour": cnt,
        })

    # Sort by area descending
    boxes.sort(key=lambda b: b["area"], reverse=True)

    # NMS: remove boxes that overlap significantly with a larger box
    filtered = []
    for box in boxes:
        x1, y1, bw1, bh1 = box["bbox"]
        keep = True
        for kept in filtered:
            x2, y2, bw2, bh2 = kept["bbox"]
            # Compute IoU
            ix1 = max(x1, x2)
            iy1 = max(y1, y2)
            ix2 = min(x1 + bw1, x2 + bw2)
            iy2 = min(y1 + bh1, y2 + bh2)
            if ix2 > ix1 and iy2 > iy1:
                inter = (ix2 - ix1) * (iy2 - iy1)
                area_small = min(bw1 * bh1, bw2 * bh2)
                # If intersection covers >40% of the smaller box, drop it
                if inter / (area_small + 1e-5) > 0.4:
                    keep = False
                    break
        if keep:
            filtered.append(box)

    return filtered


def classify_roi(model, roi):
    """Classify a single ROI."""
    results = model(roi, imgsz=224, verbose=False)
    if results and results[0].probs is not None:
        probs = results[0].probs
        yolo_class = probs.top1
        confidence = probs.top1conf.item()
        comp_class = YOLO_TO_COMPETITION.get(yolo_class, -1)
        return comp_class, confidence
    return -1, 0.0


def main():
    model = YOLO(os.path.join(BASE_DIR, "best.pt"))
    image = cv2.imread(os.path.join(BASE_DIR, "test.png"))
    if image is None:
        print("Error: cannot read test.png")
        return

    print(f"Image size: {image.shape[1]}x{image.shape[0]}")

    # Find box regions
    boxes = find_boxes(image)
    print(f"Found {len(boxes)} box regions")

    # Classify each box
    annotated = image.copy()
    results_list = []

    for i, box in enumerate(boxes):
        comp_class, conf = classify_roi(model, box["roi"])
        name = CLASS_NAMES.get(comp_class, "unknown")
        results_list.append((comp_class, conf, box["center"], box["bbox"]))

        print(f"  Box {i+1}: {name} (ID={comp_class}, conf={conf:.4f}) "
              f"center=({box['center'][0]}, {box['center'][1]}) "
              f"area={box['area']:.0f}")

        # Draw on annotated image
        x, y, bw, bh = box["bbox"]
        color = CLASS_COLORS.get(comp_class, (128, 128, 128))
        cv2.rectangle(annotated, (x, y), (x + bw, y + bh), color, 2)
        label = f"{name} {conf:.2f}"
        cv2.putText(annotated, label, (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Save annotated result
    out_path = os.path.join(BASE_DIR, "test_result.png")
    cv2.imwrite(out_path, annotated)
    print(f"\nAnnotated result saved to: {out_path}")

    # Summary
    print("\n--- Summary ---")
    class_counts = {}
    for comp_class, conf, center, bbox in results_list:
        name = CLASS_NAMES.get(comp_class, "unknown")
        class_counts[name] = class_counts.get(name, 0) + 1
    for name, count in class_counts.items():
        print(f"  {name}: {count}")

    # If segmentation didn't work well, also try sliding window approach
    if len(boxes) < 3:
        print("\n--- Fallback: sliding window approach ---")
        sliding_window_detect(model, image)


def sliding_window_detect(model, image):
    """Fallback: use sliding window to find boxes."""
    h, w = image.shape[:2]
    window_sizes = [(h // 3, w // 4), (h // 2, w // 3)]
    step_ratio = 0.3

    all_detections = []

    for wh, ww in window_sizes:
        step_y = int(wh * step_ratio)
        step_x = int(ww * step_ratio)

        for y in range(0, h - wh + 1, step_y):
            for x in range(0, w - ww + 1, step_x):
                roi = image[y:y+wh, x:x+ww]
                comp_class, conf = classify_roi(model, roi)
                if conf > 0.8:
                    cx, cy = x + ww // 2, y + wh // 2
                    all_detections.append((comp_class, conf, cx, cy, x, y, ww, wh))

    # NMS-like: keep highest confidence per class in non-overlapping regions
    if all_detections:
        all_detections.sort(key=lambda d: d[1], reverse=True)
        kept = []
        for det in all_detections:
            cx, cy = det[2], det[3]
            overlap = False
            for k in kept:
                if abs(cx - k[2]) < w // 5 and abs(cy - k[3]) < h // 5:
                    overlap = True
                    break
            if not overlap:
                kept.append(det)

        annotated = image.copy()
        for comp_class, conf, cx, cy, x, y, ww, wh in kept:
            name = CLASS_NAMES.get(comp_class, "unknown")
            color = CLASS_COLORS.get(comp_class, (128, 128, 128))
            cv2.rectangle(annotated, (x, y), (x + ww, y + wh), color, 2)
            label = f"{name} {conf:.2f}"
            cv2.putText(annotated, label, (x, y - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            print(f"  {name} (conf={conf:.2f}) at ({cx}, {cy})")

        out_path = os.path.join(BASE_DIR, "test_result_sliding.png")
        cv2.imwrite(out_path, annotated)
        print(f"  Saved to: {out_path}")


if __name__ == "__main__":
    main()
first commit 2026-04-03 07:30:54 +03:00			`"""`
			`Test detection on test.png - find individual boxes and classify each.`
			`Saves annotated result to test_result.png`
			`"""`

			`import cv2`
			`import numpy as np`
			`import os`
			`from ultralytics import YOLO`

			`BASE_DIR = os.path.dirname(os.path.abspath(__file__))`

			`YOLO_TO_COMPETITION = {0: 1, 1: 3, 2: 2} # hammer=1, pliers=3, wrench=2`
			`CLASS_NAMES = {1: "hammer", 2: "wrench", 3: "pliers"}`
			`CLASS_COLORS = {1: (0, 255, 0), 2: (255, 165, 0), 3: (0, 0, 255)}`


			`def find_boxes(image):`
			`"""Find box-like regions in the top-down camera view."""`
			`hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)`
			`gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)`
			`h, w = image.shape[:2]`

			`# The boxes are white/light colored rectangles on a brown/dark shelf`
			`# Try multiple approaches and combine`

			`boxes = []`

			`# Approach 1: Look for bright rectangular regions`
			`# Boxes appear as light-colored rectangles`
			`_, bright_mask = cv2.threshold(gray, 160, 255, cv2.THRESH_BINARY)`

			`# Approach 2: Saturation-based (boxes are less saturated than shelf)`
			`_, sat_mask = cv2.threshold(hsv[:, :, 1], 60, 255, cv2.THRESH_BINARY_INV)`

			`# Approach 3: Value channel - boxes are brighter`
			`_, val_mask = cv2.threshold(hsv[:, :, 2], 150, 255, cv2.THRESH_BINARY)`

			`# Combine masks`
			`combined = cv2.bitwise_and(bright_mask, sat_mask)`
			`combined = cv2.bitwise_and(combined, val_mask)`

			`# Clean up`
			`kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))`
			`combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel, iterations=3)`
			`combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel, iterations=2)`

			`# Save debug mask`
			`cv2.imwrite(os.path.join(BASE_DIR, "debug_mask.png"), combined)`

			`contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)`

			`min_area = (h * w) * 0.005 # 0.5% of frame`
			`max_area = (h * w) * 0.15 # 15% of frame`

			`for cnt in contours:`
			`area = cv2.contourArea(cnt)`
			`if area < min_area or area > max_area:`
			`continue`

			`rect = cv2.minAreaRect(cnt)`
			`box_points = cv2.boxPoints(rect)`
			`box_points = np.intp(box_points)`

			`x, y, bw, bh = cv2.boundingRect(cnt)`

			`# Aspect ratio filter - boxes should be somewhat rectangular`
			`aspect = max(bw, bh) / (min(bw, bh) + 1e-5)`
			`if aspect > 5:`
			`continue`

			`# Pad the bounding box slightly`
			`pad = 5`
			`x1 = max(0, x - pad)`
			`y1 = max(0, y - pad)`
			`x2 = min(w, x + bw + pad)`
			`y2 = min(h, y + bh + pad)`

			`roi = image[y1:y2, x1:x2]`
			`if roi.size == 0:`
			`continue`

			`boxes.append({`
			`"roi": roi,`
			`"bbox": (x1, y1, x2 - x1, y2 - y1),`
			`"center": ((x1 + x2) // 2, (y1 + y2) // 2),`
			`"area": area,`
			`"contour": cnt,`
			`})`

			`# Sort by area descending`
			`boxes.sort(key=lambda b: b["area"], reverse=True)`

			`# NMS: remove boxes that overlap significantly with a larger box`
			`filtered = []`
			`for box in boxes:`
			`x1, y1, bw1, bh1 = box["bbox"]`
			`keep = True`
			`for kept in filtered:`
			`x2, y2, bw2, bh2 = kept["bbox"]`
			`# Compute IoU`
			`ix1 = max(x1, x2)`
			`iy1 = max(y1, y2)`
			`ix2 = min(x1 + bw1, x2 + bw2)`
			`iy2 = min(y1 + bh1, y2 + bh2)`
			`if ix2 > ix1 and iy2 > iy1:`
			`inter = (ix2 - ix1) * (iy2 - iy1)`
			`area_small = min(bw1 * bh1, bw2 * bh2)`
			`# If intersection covers >40% of the smaller box, drop it`
			`if inter / (area_small + 1e-5) > 0.4:`
			`keep = False`
			`break`
			`if keep:`
			`filtered.append(box)`

			`return filtered`


			`def classify_roi(model, roi):`
			`"""Classify a single ROI."""`
			`results = model(roi, imgsz=224, verbose=False)`
			`if results and results[0].probs is not None:`
			`probs = results[0].probs`
			`yolo_class = probs.top1`
			`confidence = probs.top1conf.item()`
			`comp_class = YOLO_TO_COMPETITION.get(yolo_class, -1)`
			`return comp_class, confidence`
			`return -1, 0.0`


			`def main():`
			`model = YOLO(os.path.join(BASE_DIR, "best.pt"))`
			`image = cv2.imread(os.path.join(BASE_DIR, "test.png"))`
			`if image is None:`
			`print("Error: cannot read test.png")`
			`return`

			`print(f"Image size: {image.shape[1]}x{image.shape[0]}")`

			`# Find box regions`
			`boxes = find_boxes(image)`
			`print(f"Found {len(boxes)} box regions")`

			`# Classify each box`
			`annotated = image.copy()`
			`results_list = []`

			`for i, box in enumerate(boxes):`
			`comp_class, conf = classify_roi(model, box["roi"])`
			`name = CLASS_NAMES.get(comp_class, "unknown")`
			`results_list.append((comp_class, conf, box["center"], box["bbox"]))`

			`print(f" Box {i+1}: {name} (ID={comp_class}, conf={conf:.4f}) "`
			`f"center=({box['center'][0]}, {box['center'][1]}) "`
			`f"area={box['area']:.0f}")`

			`# Draw on annotated image`
			`x, y, bw, bh = box["bbox"]`
			`color = CLASS_COLORS.get(comp_class, (128, 128, 128))`
			`cv2.rectangle(annotated, (x, y), (x + bw, y + bh), color, 2)`
			`label = f"{name} {conf:.2f}"`
			`cv2.putText(annotated, label, (x, y - 5),`
			`cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)`

			`# Save annotated result`
			`out_path = os.path.join(BASE_DIR, "test_result.png")`
			`cv2.imwrite(out_path, annotated)`
			`print(f"\nAnnotated result saved to: {out_path}")`

			`# Summary`
			`print("\n--- Summary ---")`
			`class_counts = {}`
			`for comp_class, conf, center, bbox in results_list:`
			`name = CLASS_NAMES.get(comp_class, "unknown")`
			`class_counts[name] = class_counts.get(name, 0) + 1`
			`for name, count in class_counts.items():`
			`print(f" {name}: {count}")`

			`# If segmentation didn't work well, also try sliding window approach`
			`if len(boxes) < 3:`
			`print("\n--- Fallback: sliding window approach ---")`
			`sliding_window_detect(model, image)`


			`def sliding_window_detect(model, image):`
			`"""Fallback: use sliding window to find boxes."""`
			`h, w = image.shape[:2]`
			`window_sizes = [(h // 3, w // 4), (h // 2, w // 3)]`
			`step_ratio = 0.3`

			`all_detections = []`

			`for wh, ww in window_sizes:`
			`step_y = int(wh * step_ratio)`
			`step_x = int(ww * step_ratio)`

			`for y in range(0, h - wh + 1, step_y):`
			`for x in range(0, w - ww + 1, step_x):`
			`roi = image[y:y+wh, x:x+ww]`
			`comp_class, conf = classify_roi(model, roi)`
			`if conf > 0.8:`
			`cx, cy = x + ww // 2, y + wh // 2`
			`all_detections.append((comp_class, conf, cx, cy, x, y, ww, wh))`

			`# NMS-like: keep highest confidence per class in non-overlapping regions`
			`if all_detections:`
			`all_detections.sort(key=lambda d: d[1], reverse=True)`
			`kept = []`
			`for det in all_detections:`
			`cx, cy = det[2], det[3]`
			`overlap = False`
			`for k in kept:`
			`if abs(cx - k[2]) < w // 5 and abs(cy - k[3]) < h // 5:`
			`overlap = True`
			`break`
			`if not overlap:`
			`kept.append(det)`

			`annotated = image.copy()`
			`for comp_class, conf, cx, cy, x, y, ww, wh in kept:`
			`name = CLASS_NAMES.get(comp_class, "unknown")`
			`color = CLASS_COLORS.get(comp_class, (128, 128, 128))`
			`cv2.rectangle(annotated, (x, y), (x + ww, y + wh), color, 2)`
			`label = f"{name} {conf:.2f}"`
			`cv2.putText(annotated, label, (x, y - 5),`
			`cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)`
			`print(f" {name} (conf={conf:.2f}) at ({cx}, {cy})")`

			`out_path = os.path.join(BASE_DIR, "test_result_sliding.png")`
			`cv2.imwrite(out_path, annotated)`
			`print(f" Saved to: {out_path}")`


			`if __name__ == "__main__":`
			`main()`