Tiny ImageNet

Tiny ImageNet #

Can be used as a substitute of ImageNet dataset. It has 200 classes with image sizes of 64x64 pixels. Test set is not labeled. Validation set needs to be used as test set. Validation set needs some preprocessing before it can be used for evaluating the model. Directly passing the images to application will result in incorrect validation/test accuracy.

Download dataset #

wget http://cs231n.stanford.edu/tiny-imagenet-200.zip

Create Dataloader #

There are two ways to create data loaders for Tiny ImageNet. One is using torchvision.datasets.ImageFolder and the other is through Python dictionary.

Using torchvision.datasets.ImageFolder #

# Modified from https://github.com/DennisHanyuanXu/Tiny-ImageNet/blob/master/src/data_prep.py

import os, glob
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets

def create_val_img_folder(configs):
    '''
    This method is responsible for separating validation images into separate sub folders
    '''
    # dataset_dir = os.path.join(args.data_dir, args.dataset)
    val_dir = os.path.join(configs.data_path, 'val')
    img_dir = os.path.join(val_dir, 'images')

    fp = open(os.path.join(val_dir, 'val_annotations.txt'), 'r')
    data = fp.readlines()
    val_img_dict = {}
    for line in data:
        words = line.split('\t')
        val_img_dict[words[0]] = words[1]
    fp.close()

    # Create folder if not present and move images into proper folders
    for img, folder in val_img_dict.items():
        newpath = (os.path.join(img_dir, folder))
        if not os.path.exists(newpath):
            os.makedirs(newpath)
        if os.path.exists(os.path.join(img_dir, img)):
            os.rename(os.path.join(img_dir, img), os.path.join(newpath, img))

# Use either load_tiny_imagenet1() or load_tiny_imagenet2()
def load_tiny_imagenet1(configs):
    create_val_img_folder(configs)

    train_dir = os.path.join(configs.data_path, 'train')
    val_dir = os.path.join(configs.data_path, 'val', 'images')
    # kwargs = {} if args.no_cuda else {'num_workers': 1, 'pin_memory': True}

    # Pre-calculated mean & std on imagenet:
    # norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # For other datasets, we could just simply use 0.5:
    # norm = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

    # Normalization
    norm = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

    train_trans = [transforms.RandomHorizontalFlip(), transforms.ToTensor()]
    val_trans = [transforms.ToTensor(), norm]

    train_data = datasets.ImageFolder(train_dir, 
                                    transform=transforms.Compose(train_trans + [norm]))
    
    val_data = datasets.ImageFolder(val_dir, 
                                    transform=transforms.Compose(val_trans))
    
    print(train_data)
    print(val_data)
    
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=configs.batch_size, 
                                                    num_workers=configs.num_workers, shuffle=True)
    
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=configs.batch_size, 
                                                    num_workers=configs.num_workers, shuffle=True)

    print('Number of iterations required to get through training data of length {}: {}'.format(
        len(train_data), len(train_loader)))
    
    return {'train': train_loader, 'test': val_loader}

def load_tiny_imagenet2(configs):
    create_val_img_folder(configs)

    # transform for the training data
    train_transforms = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    val_transforms = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    train_data_path = configs.data_path + "/train/"
    val_data_path = configs.data_path + "/val/images/"
    train_set = datasets.ImageFolder(train_data_path, transform=train_transforms)
    val_set = datasets.ImageFolder(val_data_path, transform=val_transforms)


    train_loader = torch.utils.data.DataLoader(train_set, batch_size=configs.batch_size, 
                                        num_workers=configs.num_workers, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=configs.batch_size, 
                                        num_workers=configs.num_workers, shuffle=True)

    print('Number of iterations required to get through training data of length {}: {}'.format(
        len(train_set), len(train_loader)))


    return {'train': train_loader, 'test': val_loader}

Using torchvision.io.read_image and Python dictionary #

# https://github.com/pranavphoenix/TinyImageNetLoader/blob/main/tinyimagenetloader.py

import os, glob
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.io import read_image, ImageReadMode

id_dict = {}

class TrainTinyImageNetDataset(Dataset):
    def __init__(self, data_path, id, transform=None):
        self.data_path = data_path + "/*/*/*.JPEG"
        self.filenames = glob.glob(self.data_path)
        self.transform = transform
        self.id_dict = id

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = self.filenames[idx]
        image = read_image(img_path)
        if image.shape[0] == 1:
          image = read_image(img_path,ImageReadMode.RGB)
        print(img_path)
        label = self.id_dict[img_path.split('/')[-3]]
        if self.transform:
            image = self.transform(image.type(torch.FloatTensor))
        return image, label
class TestTinyImageNetDataset(Dataset):
    def __init__(self, data_path, id, transform=None):
        self.data_path = data_path + "/images/*.JPEG"
        self.filenames = glob.glob(self.data_path)
        self.transform = transform
        self.id_dict = id
        self.cls_dic = {}
        self.val_annotation = data_path + "/val_annotations.txt"
        for i, line in enumerate(open(self.val_annotation, 'r')):
            a = line.split('\t')
            img, cls_id = a[0],a[1]
            self.cls_dic[img] = self.id_dict[cls_id]
 

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = self.filenames[idx]
        image = read_image(img_path)
        if image.shape[0] == 1:
          image = read_image(img_path,ImageReadMode.RGB)
        label = self.cls_dic[img_path.split('/')[-1]]
        if self.transform:
            image = self.transform(image.type(torch.FloatTensor))

def load_tiny_imagenet_dict(configs):
    # transform for the training data
    data_path = configs.data_path + "/wnids.txt"
    for i, line in enumerate(open(data_path, 'r')):
        id_dict[line.replace('\n', '')] = i

    train_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]) 

    train_data_path = configs.data_path + "/train/"
    val_data_path = configs.data_path + "/val_dict/"

    train_set = TrainTinyImageNetDataset(data_path=train_data_path, id=id_dict, transform = train_transforms)
    val_set = TestTinyImageNetDataset(data_path=val_data_path, id=id_dict, transform=train_transforms)

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=configs.batch_size, 
                                        num_workers=configs.num_workers, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_set, batch_size=configs.batch_size, 
                                        num_workers=configs.num_workers, shuffle=True)

    print('Number of iterations required to get through training data of length {}: {}'.format(
        len(train_set), len(train_loader)))

    return {'train': train_loader, 'test': val_loader}

Set num_classes=200 #

Do not forget to set num_classes=200 in your model. Otherwise Pytorch may throw an error RuntimeError: CUDA error: device-side assert triggered if the application is run using the following command.

CUDA_LAUNCH_BLOCKING=1 python3 <application_file_name>.py

Remove <label>_boxes.txt files #

To remove <label>_boxes.txt files for training images, use the following bash script.

#!/bin/bash
parent="train"
cd "$parent"

for DIR in */
do 
    echo $DIR
    cd "$DIR"
    rm -f *.txt
    if test -d ./images
    then
        cd ./images
        mv * ../
        cd ..
        rm -r ./images
    fi
    cd ..
done

Why upsample images? #

Tiny ImageNet samples are of 64x64 pixels. Whereas, models like ResNet 50 or resNet152 needs inputs sizes 224x224 pixels. When training from scratch, it is possible to reduce the input size in the models being used to 64x64 (e.g. VGG-16, ResNet50, Resnet152), eliminating the need for resizing the images by the dataloader. This is, however, may not be possible in pre-trained models. Interestingly, some people mentioned that reducing the input size to 64x64 pixels in the above mentioned models has resulted in lower model accuracy in their experiments. From my experience, reducing the input size can lower computations and significantly increase training speed. Take a look at this discussion thread.