generate_dataset.py

"""
Generate augmented classification dataset from 3 source images.
Classes: hammer (0), wrench (1), pliers (2)

Improved: composites tools onto various backgrounds simulating
camera view of boxes on a shelf (brown, gray, dark surfaces).
"""

import os
import cv2
import numpy as np
from PIL import Image
import albumentations as A
import random

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
IMAGES_DIR = os.path.join(BASE_DIR, "images")
DATASET_DIR = os.path.join(BASE_DIR, "dataset")

CLASSES = {
    "hammer": "Hammer.jpg",
    "wrench": "Wrench.jpg",
    "pliers": "Pliers.jpg",
}

TRAIN_COUNT = 300  # per class
VAL_COUNT = 60     # per class
IMG_SIZE = 224

# Background colors to simulate shelf/box surfaces
BG_COLORS = [
    (255, 255, 255),  # white box
    (240, 240, 240),  # light gray
    (220, 220, 220),  # gray
    (200, 190, 170),  # beige/cardboard
    (180, 160, 130),  # brown shelf
    (160, 140, 110),  # dark brown
    (140, 130, 120),  # dark shelf
    (100, 100, 100),  # dark gray
    (80, 80, 80),     # very dark
    (245, 240, 230),  # off-white
    (230, 225, 210),  # cream
]


def create_background(size, bg_type="random"):
    """Create a background image."""
    h, w = size
    if bg_type == "random":
        choice = random.random()
        if choice < 0.4:
            # Solid color
            color = random.choice(BG_COLORS)
            bg = np.full((h, w, 3), color, dtype=np.uint8)
            # Add slight noise
            noise = np.random.randint(-10, 10, (h, w, 3), dtype=np.int16)
            bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)
        elif choice < 0.7:
            # Gradient
            color1 = np.array(random.choice(BG_COLORS), dtype=np.float32)
            color2 = np.array(random.choice(BG_COLORS), dtype=np.float32)
            gradient = np.linspace(0, 1, h).reshape(-1, 1, 1)
            bg = (color1 * (1 - gradient) + color2 * gradient).astype(np.uint8)
            bg = np.broadcast_to(bg, (h, w, 3)).copy()
        else:
            # Textured (simulating wood/metal surface)
            base_color = random.choice(BG_COLORS)
            bg = np.full((h, w, 3), base_color, dtype=np.uint8)
            # Add texture noise
            noise = np.random.randint(-20, 20, (h, w, 3), dtype=np.int16)
            bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)
            bg = cv2.GaussianBlur(bg, (3, 3), 0)
    return bg


def extract_tool_mask(img):
    """Create a mask separating the tool from white background."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Tool is non-white
    _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    # Dilate to include edges
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask = cv2.dilate(mask, kernel, iterations=1)
    return mask


def composite_on_background(tool_img, bg, mask=None):
    """Place tool image on background, simulating box on shelf."""
    bh, bw = bg.shape[:2]
    th, tw = tool_img.shape[:2]

    # Random scale for the tool on the "box"
    scale = random.uniform(0.3, 0.8)
    new_w = int(tw * scale * bw / max(tw, th))
    new_h = int(th * scale * bh / max(tw, th))
    new_w = min(new_w, int(bw * 0.85))
    new_h = min(new_h, int(bh * 0.85))

    if new_w < 10 or new_h < 10:
        return bg

    tool_resized = cv2.resize(tool_img, (new_w, new_h))

    if mask is not None:
        mask_resized = cv2.resize(mask, (new_w, new_h))
    else:
        mask_resized = np.ones((new_h, new_w), dtype=np.uint8) * 255

    # Random position
    max_x = bw - new_w
    max_y = bh - new_h
    if max_x <= 0 or max_y <= 0:
        return bg

    x = random.randint(0, max_x)
    y = random.randint(0, max_y)

    result = bg.copy()

    # Sometimes draw a white "box/card" under the tool
    if random.random() < 0.6:
        pad = random.randint(5, 15)
        bx1 = max(0, x - pad)
        by1 = max(0, y - pad)
        bx2 = min(bw, x + new_w + pad)
        by2 = min(bh, y + new_h + pad)
        box_color = random.choice([(255, 255, 255), (245, 245, 245), (240, 240, 240)])
        cv2.rectangle(result, (bx1, by1), (bx2, by2), box_color, -1)

    # Composite tool
    mask_3ch = cv2.merge([mask_resized, mask_resized, mask_resized])
    mask_f = mask_3ch.astype(np.float32) / 255.0

    roi = result[y:y+new_h, x:x+new_w].astype(np.float32)
    tool_f = tool_resized.astype(np.float32)
    blended = tool_f * mask_f + roi * (1 - mask_f)
    result[y:y+new_h, x:x+new_w] = blended.astype(np.uint8)

    return result


# Augmentation on the final composite
train_aug = A.Compose([
    A.Rotate(limit=45, p=0.7, border_mode=cv2.BORDER_REFLECT_101),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.Perspective(scale=(0.02, 0.06), p=0.3),
    A.OneOf([
        A.GaussianBlur(blur_limit=(3, 5)),
        A.MotionBlur(blur_limit=(3, 5)),
    ], p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=25, val_shift_limit=25, p=0.4),
    A.GaussNoise(p=0.2),
    A.ImageCompression(quality_range=(60, 95), p=0.2),
    A.Resize(IMG_SIZE, IMG_SIZE),
])

val_aug = A.Compose([
    A.Rotate(limit=30, p=0.5, border_mode=cv2.BORDER_REFLECT_101),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3),
    A.Resize(IMG_SIZE, IMG_SIZE),
])

# Simple augmentation - just the tool on white bg (like original)
simple_aug = A.Compose([
    A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.5, 1.0), ratio=(0.75, 1.33)),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.Rotate(limit=180, p=0.8, border_mode=cv2.BORDER_CONSTANT, fill=255),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.4),
    A.GaussianBlur(blur_limit=(3, 5), p=0.2),
])


def generate_images(img_path, output_dir, count, is_train=True):
    os.makedirs(output_dir, exist_ok=True)
    img = cv2.imread(img_path)
    if img is None:
        raise FileNotFoundError(f"Cannot read {img_path}")

    mask = extract_tool_mask(img)

    # Pad to square
    h, w = img.shape[:2]
    max_side = max(h, w)
    padded = np.full((max_side, max_side, 3), 255, dtype=np.uint8)
    padded_mask = np.zeros((max_side, max_side), dtype=np.uint8)
    y_off = (max_side - h) // 2
    x_off = (max_side - w) // 2
    padded[y_off:y_off+h, x_off:x_off+w] = img
    padded_mask[y_off:y_off+h, x_off:x_off+w] = mask

    aug = train_aug if is_train else val_aug

    for i in range(count):
        if is_train and random.random() < 0.6:
            # Composite on background (simulates box on shelf)
            bg = create_background((IMG_SIZE + 60, IMG_SIZE + 60))
            composite = composite_on_background(padded, bg, padded_mask)
            result = aug(image=composite)["image"]
        else:
            # Simple augmentation (tool on white/clean bg)
            result = simple_aug(image=padded)["image"]

        out_path = os.path.join(output_dir, f"img_{i:04d}.jpg")
        cv2.imwrite(out_path, result)


def main():
    import shutil
    # Clean old dataset
    if os.path.exists(DATASET_DIR):
        shutil.rmtree(DATASET_DIR)

    print("Generating improved dataset with background compositing...")
    for class_name, filename in CLASSES.items():
        img_path = os.path.join(IMAGES_DIR, filename)

        train_dir = os.path.join(DATASET_DIR, "train", class_name)
        generate_images(img_path, train_dir, TRAIN_COUNT, is_train=True)
        print(f"  {class_name}: {TRAIN_COUNT} train images")

        val_dir = os.path.join(DATASET_DIR, "val", class_name)
        generate_images(img_path, val_dir, VAL_COUNT, is_train=False)
        print(f"  {class_name}: {VAL_COUNT} val images")

    print(f"\nDataset: {DATASET_DIR}")
    print(f"  Train: {TRAIN_COUNT * len(CLASSES)} images")
    print(f"  Val: {VAL_COUNT * len(CLASSES)} images")


if __name__ == "__main__":
    main()
first commit 2026-04-03 07:30:54 +03:00			`"""`
			`Generate augmented classification dataset from 3 source images.`
			`Classes: hammer (0), wrench (1), pliers (2)`

			`Improved: composites tools onto various backgrounds simulating`
			`camera view of boxes on a shelf (brown, gray, dark surfaces).`
			`"""`

			`import os`
			`import cv2`
			`import numpy as np`
			`from PIL import Image`
			`import albumentations as A`
			`import random`

			`SEED = 42`
			`random.seed(SEED)`
			`np.random.seed(SEED)`

			`BASE_DIR = os.path.dirname(os.path.abspath(__file__))`
			`IMAGES_DIR = os.path.join(BASE_DIR, "images")`
			`DATASET_DIR = os.path.join(BASE_DIR, "dataset")`

			`CLASSES = {`
			`"hammer": "Hammer.jpg",`
			`"wrench": "Wrench.jpg",`
			`"pliers": "Pliers.jpg",`
			`}`

			`TRAIN_COUNT = 300 # per class`
			`VAL_COUNT = 60 # per class`
			`IMG_SIZE = 224`

			`# Background colors to simulate shelf/box surfaces`
			`BG_COLORS = [`
			`(255, 255, 255), # white box`
			`(240, 240, 240), # light gray`
			`(220, 220, 220), # gray`
			`(200, 190, 170), # beige/cardboard`
			`(180, 160, 130), # brown shelf`
			`(160, 140, 110), # dark brown`
			`(140, 130, 120), # dark shelf`
			`(100, 100, 100), # dark gray`
			`(80, 80, 80), # very dark`
			`(245, 240, 230), # off-white`
			`(230, 225, 210), # cream`
			`]`


			`def create_background(size, bg_type="random"):`
			`"""Create a background image."""`
			`h, w = size`
			`if bg_type == "random":`
			`choice = random.random()`
			`if choice < 0.4:`
			`# Solid color`
			`color = random.choice(BG_COLORS)`
			`bg = np.full((h, w, 3), color, dtype=np.uint8)`
			`# Add slight noise`
			`noise = np.random.randint(-10, 10, (h, w, 3), dtype=np.int16)`
			`bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)`
			`elif choice < 0.7:`
			`# Gradient`
			`color1 = np.array(random.choice(BG_COLORS), dtype=np.float32)`
			`color2 = np.array(random.choice(BG_COLORS), dtype=np.float32)`
			`gradient = np.linspace(0, 1, h).reshape(-1, 1, 1)`
			`bg = (color1 * (1 - gradient) + color2 * gradient).astype(np.uint8)`
			`bg = np.broadcast_to(bg, (h, w, 3)).copy()`
			`else:`
			`# Textured (simulating wood/metal surface)`
			`base_color = random.choice(BG_COLORS)`
			`bg = np.full((h, w, 3), base_color, dtype=np.uint8)`
			`# Add texture noise`
			`noise = np.random.randint(-20, 20, (h, w, 3), dtype=np.int16)`
			`bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)`
			`bg = cv2.GaussianBlur(bg, (3, 3), 0)`
			`return bg`


			`def extract_tool_mask(img):`
			`"""Create a mask separating the tool from white background."""`
			`gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)`
			`# Tool is non-white`
			`_, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)`
			`# Dilate to include edges`
			`kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))`
			`mask = cv2.dilate(mask, kernel, iterations=1)`
			`return mask`


			`def composite_on_background(tool_img, bg, mask=None):`
			`"""Place tool image on background, simulating box on shelf."""`
			`bh, bw = bg.shape[:2]`
			`th, tw = tool_img.shape[:2]`

			`# Random scale for the tool on the "box"`
			`scale = random.uniform(0.3, 0.8)`
			`new_w = int(tw * scale * bw / max(tw, th))`
			`new_h = int(th * scale * bh / max(tw, th))`
			`new_w = min(new_w, int(bw * 0.85))`
			`new_h = min(new_h, int(bh * 0.85))`

			`if new_w < 10 or new_h < 10:`
			`return bg`

			`tool_resized = cv2.resize(tool_img, (new_w, new_h))`

			`if mask is not None:`
			`mask_resized = cv2.resize(mask, (new_w, new_h))`
			`else:`
			`mask_resized = np.ones((new_h, new_w), dtype=np.uint8) * 255`

			`# Random position`
			`max_x = bw - new_w`
			`max_y = bh - new_h`
			`if max_x <= 0 or max_y <= 0:`
			`return bg`

			`x = random.randint(0, max_x)`
			`y = random.randint(0, max_y)`

			`result = bg.copy()`

			`# Sometimes draw a white "box/card" under the tool`
			`if random.random() < 0.6:`
			`pad = random.randint(5, 15)`
			`bx1 = max(0, x - pad)`
			`by1 = max(0, y - pad)`
			`bx2 = min(bw, x + new_w + pad)`
			`by2 = min(bh, y + new_h + pad)`
			`box_color = random.choice([(255, 255, 255), (245, 245, 245), (240, 240, 240)])`
			`cv2.rectangle(result, (bx1, by1), (bx2, by2), box_color, -1)`

			`# Composite tool`
			`mask_3ch = cv2.merge([mask_resized, mask_resized, mask_resized])`
			`mask_f = mask_3ch.astype(np.float32) / 255.0`

			`roi = result[y:y+new_h, x:x+new_w].astype(np.float32)`
			`tool_f = tool_resized.astype(np.float32)`
			`blended = tool_f * mask_f + roi * (1 - mask_f)`
			`result[y:y+new_h, x:x+new_w] = blended.astype(np.uint8)`

			`return result`


			`# Augmentation on the final composite`
			`train_aug = A.Compose([`
			`A.Rotate(limit=45, p=0.7, border_mode=cv2.BORDER_REFLECT_101),`
			`A.HorizontalFlip(p=0.5),`
			`A.VerticalFlip(p=0.3),`
			`A.Perspective(scale=(0.02, 0.06), p=0.3),`
			`A.OneOf([`
			`A.GaussianBlur(blur_limit=(3, 5)),`
			`A.MotionBlur(blur_limit=(3, 5)),`
			`], p=0.3),`
			`A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),`
			`A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=25, val_shift_limit=25, p=0.4),`
			`A.GaussNoise(p=0.2),`
			`A.ImageCompression(quality_range=(60, 95), p=0.2),`
			`A.Resize(IMG_SIZE, IMG_SIZE),`
			`])`

			`val_aug = A.Compose([`
			`A.Rotate(limit=30, p=0.5, border_mode=cv2.BORDER_REFLECT_101),`
			`A.HorizontalFlip(p=0.5),`
			`A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3),`
			`A.Resize(IMG_SIZE, IMG_SIZE),`
			`])`

			`# Simple augmentation - just the tool on white bg (like original)`
			`simple_aug = A.Compose([`
			`A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.5, 1.0), ratio=(0.75, 1.33)),`
			`A.HorizontalFlip(p=0.5),`
			`A.VerticalFlip(p=0.3),`
			`A.Rotate(limit=180, p=0.8, border_mode=cv2.BORDER_CONSTANT, fill=255),`
			`A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.4),`
			`A.GaussianBlur(blur_limit=(3, 5), p=0.2),`
			`])`


			`def generate_images(img_path, output_dir, count, is_train=True):`
			`os.makedirs(output_dir, exist_ok=True)`
			`img = cv2.imread(img_path)`
			`if img is None:`
			`raise FileNotFoundError(f"Cannot read {img_path}")`

			`mask = extract_tool_mask(img)`

			`# Pad to square`
			`h, w = img.shape[:2]`
			`max_side = max(h, w)`
			`padded = np.full((max_side, max_side, 3), 255, dtype=np.uint8)`
			`padded_mask = np.zeros((max_side, max_side), dtype=np.uint8)`
			`y_off = (max_side - h) // 2`
			`x_off = (max_side - w) // 2`
			`padded[y_off:y_off+h, x_off:x_off+w] = img`
			`padded_mask[y_off:y_off+h, x_off:x_off+w] = mask`

			`aug = train_aug if is_train else val_aug`

			`for i in range(count):`
			`if is_train and random.random() < 0.6:`
			`# Composite on background (simulates box on shelf)`
			`bg = create_background((IMG_SIZE + 60, IMG_SIZE + 60))`
			`composite = composite_on_background(padded, bg, padded_mask)`
			`result = aug(image=composite)["image"]`
			`else:`
			`# Simple augmentation (tool on white/clean bg)`
			`result = simple_aug(image=padded)["image"]`

			`out_path = os.path.join(output_dir, f"img_{i:04d}.jpg")`
			`cv2.imwrite(out_path, result)`


			`def main():`
			`import shutil`
			`# Clean old dataset`
			`if os.path.exists(DATASET_DIR):`
			`shutil.rmtree(DATASET_DIR)`

			`print("Generating improved dataset with background compositing...")`
			`for class_name, filename in CLASSES.items():`
			`img_path = os.path.join(IMAGES_DIR, filename)`

			`train_dir = os.path.join(DATASET_DIR, "train", class_name)`
			`generate_images(img_path, train_dir, TRAIN_COUNT, is_train=True)`
			`print(f" {class_name}: {TRAIN_COUNT} train images")`

			`val_dir = os.path.join(DATASET_DIR, "val", class_name)`
			`generate_images(img_path, val_dir, VAL_COUNT, is_train=False)`
			`print(f" {class_name}: {VAL_COUNT} val images")`

			`print(f"\nDataset: {DATASET_DIR}")`
			`print(f" Train: {TRAIN_COUNT * len(CLASSES)} images")`
			`print(f" Val: {VAL_COUNT * len(CLASSES)} images")`


			`if __name__ == "__main__":`
			`main()`