""" Generate augmented classification dataset from 3 source images. Classes: hammer (0), wrench (1), pliers (2) Improved: composites tools onto various backgrounds simulating camera view of boxes on a shelf (brown, gray, dark surfaces). """ import os import cv2 import numpy as np from PIL import Image import albumentations as A import random SEED = 42 random.seed(SEED) np.random.seed(SEED) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) IMAGES_DIR = os.path.join(BASE_DIR, "images") DATASET_DIR = os.path.join(BASE_DIR, "dataset") CLASSES = { "hammer": "Hammer.jpg", "wrench": "Wrench.jpg", "pliers": "Pliers.jpg", } TRAIN_COUNT = 300 # per class VAL_COUNT = 60 # per class IMG_SIZE = 224 # Background colors to simulate shelf/box surfaces BG_COLORS = [ (255, 255, 255), # white box (240, 240, 240), # light gray (220, 220, 220), # gray (200, 190, 170), # beige/cardboard (180, 160, 130), # brown shelf (160, 140, 110), # dark brown (140, 130, 120), # dark shelf (100, 100, 100), # dark gray (80, 80, 80), # very dark (245, 240, 230), # off-white (230, 225, 210), # cream ] def create_background(size, bg_type="random"): """Create a background image.""" h, w = size if bg_type == "random": choice = random.random() if choice < 0.4: # Solid color color = random.choice(BG_COLORS) bg = np.full((h, w, 3), color, dtype=np.uint8) # Add slight noise noise = np.random.randint(-10, 10, (h, w, 3), dtype=np.int16) bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8) elif choice < 0.7: # Gradient color1 = np.array(random.choice(BG_COLORS), dtype=np.float32) color2 = np.array(random.choice(BG_COLORS), dtype=np.float32) gradient = np.linspace(0, 1, h).reshape(-1, 1, 1) bg = (color1 * (1 - gradient) + color2 * gradient).astype(np.uint8) bg = np.broadcast_to(bg, (h, w, 3)).copy() else: # Textured (simulating wood/metal surface) base_color = random.choice(BG_COLORS) bg = np.full((h, w, 3), base_color, dtype=np.uint8) # Add texture noise noise = np.random.randint(-20, 20, (h, w, 3), dtype=np.int16) bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8) bg = cv2.GaussianBlur(bg, (3, 3), 0) return bg def extract_tool_mask(img): """Create a mask separating the tool from white background.""" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Tool is non-white _, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV) # Dilate to include edges kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) mask = cv2.dilate(mask, kernel, iterations=1) return mask def composite_on_background(tool_img, bg, mask=None): """Place tool image on background, simulating box on shelf.""" bh, bw = bg.shape[:2] th, tw = tool_img.shape[:2] # Random scale for the tool on the "box" scale = random.uniform(0.3, 0.8) new_w = int(tw * scale * bw / max(tw, th)) new_h = int(th * scale * bh / max(tw, th)) new_w = min(new_w, int(bw * 0.85)) new_h = min(new_h, int(bh * 0.85)) if new_w < 10 or new_h < 10: return bg tool_resized = cv2.resize(tool_img, (new_w, new_h)) if mask is not None: mask_resized = cv2.resize(mask, (new_w, new_h)) else: mask_resized = np.ones((new_h, new_w), dtype=np.uint8) * 255 # Random position max_x = bw - new_w max_y = bh - new_h if max_x <= 0 or max_y <= 0: return bg x = random.randint(0, max_x) y = random.randint(0, max_y) result = bg.copy() # Sometimes draw a white "box/card" under the tool if random.random() < 0.6: pad = random.randint(5, 15) bx1 = max(0, x - pad) by1 = max(0, y - pad) bx2 = min(bw, x + new_w + pad) by2 = min(bh, y + new_h + pad) box_color = random.choice([(255, 255, 255), (245, 245, 245), (240, 240, 240)]) cv2.rectangle(result, (bx1, by1), (bx2, by2), box_color, -1) # Composite tool mask_3ch = cv2.merge([mask_resized, mask_resized, mask_resized]) mask_f = mask_3ch.astype(np.float32) / 255.0 roi = result[y:y+new_h, x:x+new_w].astype(np.float32) tool_f = tool_resized.astype(np.float32) blended = tool_f * mask_f + roi * (1 - mask_f) result[y:y+new_h, x:x+new_w] = blended.astype(np.uint8) return result # Augmentation on the final composite train_aug = A.Compose([ A.Rotate(limit=45, p=0.7, border_mode=cv2.BORDER_REFLECT_101), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.3), A.Perspective(scale=(0.02, 0.06), p=0.3), A.OneOf([ A.GaussianBlur(blur_limit=(3, 5)), A.MotionBlur(blur_limit=(3, 5)), ], p=0.3), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5), A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=25, val_shift_limit=25, p=0.4), A.GaussNoise(p=0.2), A.ImageCompression(quality_range=(60, 95), p=0.2), A.Resize(IMG_SIZE, IMG_SIZE), ]) val_aug = A.Compose([ A.Rotate(limit=30, p=0.5, border_mode=cv2.BORDER_REFLECT_101), A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3), A.Resize(IMG_SIZE, IMG_SIZE), ]) # Simple augmentation - just the tool on white bg (like original) simple_aug = A.Compose([ A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.5, 1.0), ratio=(0.75, 1.33)), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.3), A.Rotate(limit=180, p=0.8, border_mode=cv2.BORDER_CONSTANT, fill=255), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.4), A.GaussianBlur(blur_limit=(3, 5), p=0.2), ]) def generate_images(img_path, output_dir, count, is_train=True): os.makedirs(output_dir, exist_ok=True) img = cv2.imread(img_path) if img is None: raise FileNotFoundError(f"Cannot read {img_path}") mask = extract_tool_mask(img) # Pad to square h, w = img.shape[:2] max_side = max(h, w) padded = np.full((max_side, max_side, 3), 255, dtype=np.uint8) padded_mask = np.zeros((max_side, max_side), dtype=np.uint8) y_off = (max_side - h) // 2 x_off = (max_side - w) // 2 padded[y_off:y_off+h, x_off:x_off+w] = img padded_mask[y_off:y_off+h, x_off:x_off+w] = mask aug = train_aug if is_train else val_aug for i in range(count): if is_train and random.random() < 0.6: # Composite on background (simulates box on shelf) bg = create_background((IMG_SIZE + 60, IMG_SIZE + 60)) composite = composite_on_background(padded, bg, padded_mask) result = aug(image=composite)["image"] else: # Simple augmentation (tool on white/clean bg) result = simple_aug(image=padded)["image"] out_path = os.path.join(output_dir, f"img_{i:04d}.jpg") cv2.imwrite(out_path, result) def main(): import shutil # Clean old dataset if os.path.exists(DATASET_DIR): shutil.rmtree(DATASET_DIR) print("Generating improved dataset with background compositing...") for class_name, filename in CLASSES.items(): img_path = os.path.join(IMAGES_DIR, filename) train_dir = os.path.join(DATASET_DIR, "train", class_name) generate_images(img_path, train_dir, TRAIN_COUNT, is_train=True) print(f" {class_name}: {TRAIN_COUNT} train images") val_dir = os.path.join(DATASET_DIR, "val", class_name) generate_images(img_path, val_dir, VAL_COUNT, is_train=False) print(f" {class_name}: {VAL_COUNT} val images") print(f"\nDataset: {DATASET_DIR}") print(f" Train: {TRAIN_COUNT * len(CLASSES)} images") print(f" Val: {VAL_COUNT * len(CLASSES)} images") if __name__ == "__main__": main()