first commit
This commit is contained in:
239
generate_dataset.py
Normal file
239
generate_dataset.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Generate augmented classification dataset from 3 source images.
|
||||
Classes: hammer (0), wrench (1), pliers (2)
|
||||
|
||||
Improved: composites tools onto various backgrounds simulating
|
||||
camera view of boxes on a shelf (brown, gray, dark surfaces).
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import albumentations as A
|
||||
import random
|
||||
|
||||
SEED = 42
|
||||
random.seed(SEED)
|
||||
np.random.seed(SEED)
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
IMAGES_DIR = os.path.join(BASE_DIR, "images")
|
||||
DATASET_DIR = os.path.join(BASE_DIR, "dataset")
|
||||
|
||||
CLASSES = {
|
||||
"hammer": "Hammer.jpg",
|
||||
"wrench": "Wrench.jpg",
|
||||
"pliers": "Pliers.jpg",
|
||||
}
|
||||
|
||||
TRAIN_COUNT = 300 # per class
|
||||
VAL_COUNT = 60 # per class
|
||||
IMG_SIZE = 224
|
||||
|
||||
# Background colors to simulate shelf/box surfaces
|
||||
BG_COLORS = [
|
||||
(255, 255, 255), # white box
|
||||
(240, 240, 240), # light gray
|
||||
(220, 220, 220), # gray
|
||||
(200, 190, 170), # beige/cardboard
|
||||
(180, 160, 130), # brown shelf
|
||||
(160, 140, 110), # dark brown
|
||||
(140, 130, 120), # dark shelf
|
||||
(100, 100, 100), # dark gray
|
||||
(80, 80, 80), # very dark
|
||||
(245, 240, 230), # off-white
|
||||
(230, 225, 210), # cream
|
||||
]
|
||||
|
||||
|
||||
def create_background(size, bg_type="random"):
|
||||
"""Create a background image."""
|
||||
h, w = size
|
||||
if bg_type == "random":
|
||||
choice = random.random()
|
||||
if choice < 0.4:
|
||||
# Solid color
|
||||
color = random.choice(BG_COLORS)
|
||||
bg = np.full((h, w, 3), color, dtype=np.uint8)
|
||||
# Add slight noise
|
||||
noise = np.random.randint(-10, 10, (h, w, 3), dtype=np.int16)
|
||||
bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)
|
||||
elif choice < 0.7:
|
||||
# Gradient
|
||||
color1 = np.array(random.choice(BG_COLORS), dtype=np.float32)
|
||||
color2 = np.array(random.choice(BG_COLORS), dtype=np.float32)
|
||||
gradient = np.linspace(0, 1, h).reshape(-1, 1, 1)
|
||||
bg = (color1 * (1 - gradient) + color2 * gradient).astype(np.uint8)
|
||||
bg = np.broadcast_to(bg, (h, w, 3)).copy()
|
||||
else:
|
||||
# Textured (simulating wood/metal surface)
|
||||
base_color = random.choice(BG_COLORS)
|
||||
bg = np.full((h, w, 3), base_color, dtype=np.uint8)
|
||||
# Add texture noise
|
||||
noise = np.random.randint(-20, 20, (h, w, 3), dtype=np.int16)
|
||||
bg = np.clip(bg.astype(np.int16) + noise, 0, 255).astype(np.uint8)
|
||||
bg = cv2.GaussianBlur(bg, (3, 3), 0)
|
||||
return bg
|
||||
|
||||
|
||||
def extract_tool_mask(img):
|
||||
"""Create a mask separating the tool from white background."""
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
# Tool is non-white
|
||||
_, mask = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
|
||||
# Dilate to include edges
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
||||
mask = cv2.dilate(mask, kernel, iterations=1)
|
||||
return mask
|
||||
|
||||
|
||||
def composite_on_background(tool_img, bg, mask=None):
|
||||
"""Place tool image on background, simulating box on shelf."""
|
||||
bh, bw = bg.shape[:2]
|
||||
th, tw = tool_img.shape[:2]
|
||||
|
||||
# Random scale for the tool on the "box"
|
||||
scale = random.uniform(0.3, 0.8)
|
||||
new_w = int(tw * scale * bw / max(tw, th))
|
||||
new_h = int(th * scale * bh / max(tw, th))
|
||||
new_w = min(new_w, int(bw * 0.85))
|
||||
new_h = min(new_h, int(bh * 0.85))
|
||||
|
||||
if new_w < 10 or new_h < 10:
|
||||
return bg
|
||||
|
||||
tool_resized = cv2.resize(tool_img, (new_w, new_h))
|
||||
|
||||
if mask is not None:
|
||||
mask_resized = cv2.resize(mask, (new_w, new_h))
|
||||
else:
|
||||
mask_resized = np.ones((new_h, new_w), dtype=np.uint8) * 255
|
||||
|
||||
# Random position
|
||||
max_x = bw - new_w
|
||||
max_y = bh - new_h
|
||||
if max_x <= 0 or max_y <= 0:
|
||||
return bg
|
||||
|
||||
x = random.randint(0, max_x)
|
||||
y = random.randint(0, max_y)
|
||||
|
||||
result = bg.copy()
|
||||
|
||||
# Sometimes draw a white "box/card" under the tool
|
||||
if random.random() < 0.6:
|
||||
pad = random.randint(5, 15)
|
||||
bx1 = max(0, x - pad)
|
||||
by1 = max(0, y - pad)
|
||||
bx2 = min(bw, x + new_w + pad)
|
||||
by2 = min(bh, y + new_h + pad)
|
||||
box_color = random.choice([(255, 255, 255), (245, 245, 245), (240, 240, 240)])
|
||||
cv2.rectangle(result, (bx1, by1), (bx2, by2), box_color, -1)
|
||||
|
||||
# Composite tool
|
||||
mask_3ch = cv2.merge([mask_resized, mask_resized, mask_resized])
|
||||
mask_f = mask_3ch.astype(np.float32) / 255.0
|
||||
|
||||
roi = result[y:y+new_h, x:x+new_w].astype(np.float32)
|
||||
tool_f = tool_resized.astype(np.float32)
|
||||
blended = tool_f * mask_f + roi * (1 - mask_f)
|
||||
result[y:y+new_h, x:x+new_w] = blended.astype(np.uint8)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Augmentation on the final composite
|
||||
train_aug = A.Compose([
|
||||
A.Rotate(limit=45, p=0.7, border_mode=cv2.BORDER_REFLECT_101),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.3),
|
||||
A.Perspective(scale=(0.02, 0.06), p=0.3),
|
||||
A.OneOf([
|
||||
A.GaussianBlur(blur_limit=(3, 5)),
|
||||
A.MotionBlur(blur_limit=(3, 5)),
|
||||
], p=0.3),
|
||||
A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
|
||||
A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=25, val_shift_limit=25, p=0.4),
|
||||
A.GaussNoise(p=0.2),
|
||||
A.ImageCompression(quality_range=(60, 95), p=0.2),
|
||||
A.Resize(IMG_SIZE, IMG_SIZE),
|
||||
])
|
||||
|
||||
val_aug = A.Compose([
|
||||
A.Rotate(limit=30, p=0.5, border_mode=cv2.BORDER_REFLECT_101),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3),
|
||||
A.Resize(IMG_SIZE, IMG_SIZE),
|
||||
])
|
||||
|
||||
# Simple augmentation - just the tool on white bg (like original)
|
||||
simple_aug = A.Compose([
|
||||
A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.5, 1.0), ratio=(0.75, 1.33)),
|
||||
A.HorizontalFlip(p=0.5),
|
||||
A.VerticalFlip(p=0.3),
|
||||
A.Rotate(limit=180, p=0.8, border_mode=cv2.BORDER_CONSTANT, fill=255),
|
||||
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.4),
|
||||
A.GaussianBlur(blur_limit=(3, 5), p=0.2),
|
||||
])
|
||||
|
||||
|
||||
def generate_images(img_path, output_dir, count, is_train=True):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
raise FileNotFoundError(f"Cannot read {img_path}")
|
||||
|
||||
mask = extract_tool_mask(img)
|
||||
|
||||
# Pad to square
|
||||
h, w = img.shape[:2]
|
||||
max_side = max(h, w)
|
||||
padded = np.full((max_side, max_side, 3), 255, dtype=np.uint8)
|
||||
padded_mask = np.zeros((max_side, max_side), dtype=np.uint8)
|
||||
y_off = (max_side - h) // 2
|
||||
x_off = (max_side - w) // 2
|
||||
padded[y_off:y_off+h, x_off:x_off+w] = img
|
||||
padded_mask[y_off:y_off+h, x_off:x_off+w] = mask
|
||||
|
||||
aug = train_aug if is_train else val_aug
|
||||
|
||||
for i in range(count):
|
||||
if is_train and random.random() < 0.6:
|
||||
# Composite on background (simulates box on shelf)
|
||||
bg = create_background((IMG_SIZE + 60, IMG_SIZE + 60))
|
||||
composite = composite_on_background(padded, bg, padded_mask)
|
||||
result = aug(image=composite)["image"]
|
||||
else:
|
||||
# Simple augmentation (tool on white/clean bg)
|
||||
result = simple_aug(image=padded)["image"]
|
||||
|
||||
out_path = os.path.join(output_dir, f"img_{i:04d}.jpg")
|
||||
cv2.imwrite(out_path, result)
|
||||
|
||||
|
||||
def main():
|
||||
import shutil
|
||||
# Clean old dataset
|
||||
if os.path.exists(DATASET_DIR):
|
||||
shutil.rmtree(DATASET_DIR)
|
||||
|
||||
print("Generating improved dataset with background compositing...")
|
||||
for class_name, filename in CLASSES.items():
|
||||
img_path = os.path.join(IMAGES_DIR, filename)
|
||||
|
||||
train_dir = os.path.join(DATASET_DIR, "train", class_name)
|
||||
generate_images(img_path, train_dir, TRAIN_COUNT, is_train=True)
|
||||
print(f" {class_name}: {TRAIN_COUNT} train images")
|
||||
|
||||
val_dir = os.path.join(DATASET_DIR, "val", class_name)
|
||||
generate_images(img_path, val_dir, VAL_COUNT, is_train=False)
|
||||
print(f" {class_name}: {VAL_COUNT} val images")
|
||||
|
||||
print(f"\nDataset: {DATASET_DIR}")
|
||||
print(f" Train: {TRAIN_COUNT * len(CLASSES)} images")
|
||||
print(f" Val: {VAL_COUNT * len(CLASSES)} images")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user