Introduction
Training the model
Note: The following section is adapted directly from an example from PyTorch, which you can see here.
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import glob
import json
import matplotlib.pyplot as plt
'figure.figsize'] = [10,10] plt.rcParams[
open('Training_Data/TestImage1.png') Image.
= json.loads(open('Training_Data/image1.json','r').read())
JSON
for shape in JSON['shapes']:
print(shape)
print(shape['points'])
class TrainingDataset(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
# load all image files, sorting them to
# ensure that they are aligned
self.imgs = list(sorted(glob.glob(root+'*.png')))
self.annotations = list(sorted(glob.glob(root+'*.json')))
def __getitem__(self, idx):
# load images ad masks
= self.imgs[idx]
img_path = Image.open(img_path).convert("RGB")
img
# get bounding box coordinates for each mask
= [[0.0, 0.0, 512.0, 512.0]]
boxes = [0]
labels
= json.loads(open(img_path[:-4]+'.json','r').read())
JSON
= len(JSON['shapes'])
num_objs for i in range(num_objs):
= JSON['shapes'][i]
shape = shape['points']
points = points
[[x1,y1],[x2,y2]] = min([x1,x2])
xmin = min([y1,y2])
ymin = max([x1,x2])
xmax = max([y1,y2])
ymax
if (xmax-xmin)*(ymax-ymin)>0:
boxes.append([xmin, ymin, xmax, ymax])1)
labels.append(
= torch.as_tensor(boxes, dtype=torch.float32)
boxes # there is only one class
= torch.as_tensor(labels, dtype=torch.int64)
labels
= torch.tensor([idx])
image_id = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
area
# suppose all instances are not crowd
= torch.zeros((len(labels),), dtype=torch.int64)
iscrowd
= {}
target "boxes"] = boxes
target["labels"] = labels
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
target[
if self.transforms is not None:
= self.transforms(img, target)
img, target
return img, target
def __len__(self):
return len(self.imgs)
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_instance_object_detection_model(num_classes):
# load an instance segmentation model pre-trained on COCO
= torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
model
# get the number of input features for the classifier
= model.roi_heads.box_predictor.cls_score.in_features
in_features
# replace the pre-trained head with a new one
= FastRCNNPredictor(in_features, num_classes)
model.roi_heads.box_predictor return model
This will prepare the model to be trained and evaluated on our custom dataset.
Training and evaluation functions
In references/detection/, we have several helper functions to simplify training and evaluating detection models. Here, we will use references/detection/engine.py, references/detection/utils.py and references/detection/transforms.py.
Let’s copy those files (and their dependencies) here so they are available in the notebook.
%%shell
# Download TorchVision repo to use some files from
# references/detection
//github.com/pytorch/vision.git
git clone https:
cd vision.3.0
git checkout v0
/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../ cp references
Let’s write some helper functions for data augmentation/transformation, which leverage the functions in references/detection
that we have just copied:
from engine import train_one_epoch, evaluate
import utils
import transforms as T
def get_transform(train):
= []
transforms # converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.ToTensor())if train:
# during training, randomly flip the training images
# and ground truth for data augmentation
0.5))
transforms.append(T.RandomHorizontalFlip(
return T.Compose(transforms)
Putting everything together
We now have the dataset class, the models and the data transforms. Let’s instantiate them.
# use our dataset and defined transformations
= TrainingDataset('Training_Data/', get_transform(train=True))
dataset = TrainingDataset('Training_Data/', get_transform(train=False))
dataset_test
# split the dataset in train and test set
1)
torch.manual_seed(= torch.randperm(len(dataset)).tolist()
indices = torch.utils.data.Subset(dataset, indices[:-50])
dataset = torch.utils.data.Subset(dataset_test, indices[-50:])
dataset_test
# define training and validation data loaders
= torch.utils.data.DataLoader(
data_loader =2, shuffle=True, num_workers=4,
dataset, batch_size=utils.collate_fn)
collate_fn
= torch.utils.data.DataLoader(
data_loader_test =1, shuffle=False, num_workers=4,
dataset_test, batch_size=utils.collate_fn) collate_fn
Now let’s instantiate the model and the optimizer.
= torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device
# our dataset has two classes only - background and person
= 2
num_classes
# get the model using our helper function
= get_instance_object_detection_model(num_classes)
model
# move model to the right device
model.to(device)
# construct an optimizer
= [p for p in model.parameters() if p.requires_grad]
params = torch.optim.SGD(params, lr=0.005,
optimizer =0.9, weight_decay=0.0005)
momentum
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
= torch.optim.lr_scheduler.StepLR(optimizer,
lr_scheduler =3,
step_size=0.1) gamma
And now, let’s train the model for ten epochs, evaluating at the end of every epoch.
# let's train it for 10 epochs
= 10
num_epochs
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
=10)
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq# update the learning rate
lr_scheduler.step()# evaluate on the test dataset
=device) evaluate(model, data_loader_test, device
Now that training has finished let’s look at what it predicts in a test image.
import matplotlib.pyplot as plt
'figure.figsize'] = [10,10]
plt.rcParams[
for i in range(0,5):
# pick one image from the test set
= dataset_test[i]
img, _ # put the model in evaluation mode
eval()
model.with torch.no_grad():
= model([img.to(device)])
prediction
for index in range(len(prediction[0]['boxes'])):
= prediction[0]['boxes'][index]
box = prediction[0]['scores'][index]
score = box.cpu().numpy()
[xmin,ymin,xmax,ymax]
if score.cpu().numpy()>0.5:
='r', linewidth=2)
plt.plot([xmin,xmax,xmax,xmin,xmin],[ymin,ymin,ymax,ymax,ymin], color
= Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
test_image
plt.imshow(test_image) plt.show()
'Dragon_Model.tar') torch.save(model.state_dict(),
Results
for file_str in ['dragon1.jpeg','dragon2.jpeg','dragon3.jpeg','dragon4.jpeg','dragon5.jpeg']:
= Image.open(file_str)
img = np.array(img)/255.0
img
= torch.tensor(img,dtype=torch.float).permute(2, 0, 1)
img
# put the model in evaluation mode
eval()
model.with torch.no_grad():
= model([img.to(device)])
prediction
for index in range(len(prediction[0]['boxes'])):
= prediction[0]['boxes'][index]
box = prediction[0]['scores'][index]
score = box.cpu().numpy()
[xmin,ymin,xmax,ymax]
if score.cpu().numpy()>0.95:
='r', linewidth=2)
plt.plot([xmin,xmax,xmax,xmin,xmin],[ymin,ymin,ymax,ymax,ymin], color
= Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
test_image
plt.imshow(test_image) plt.show()