381 lines
13 KiB
Python
381 lines
13 KiB
Python
|
|
from __future__ import division
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from torch.autograd import Variable
|
|
import numpy as np
|
|
import cv2
|
|
import matplotlib.pyplot as plt
|
|
from bbox import bbox_iou
|
|
|
|
def count_parameters(model):
|
|
return sum(p.numel() for p in model.parameters())
|
|
|
|
def count_learnable_parameters(model):
|
|
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
|
|
def convert2cpu(matrix):
|
|
if matrix.is_cuda:
|
|
return torch.FloatTensor(matrix.size()).copy_(matrix)
|
|
else:
|
|
return matrix
|
|
|
|
def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
|
|
batch_size = prediction.size(0)
|
|
stride = inp_dim // prediction.size(2)
|
|
grid_size = inp_dim // stride
|
|
bbox_attrs = 5 + num_classes
|
|
num_anchors = len(anchors)
|
|
|
|
anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
|
|
|
|
|
|
|
|
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
|
|
prediction = prediction.transpose(1,2).contiguous()
|
|
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
|
|
|
|
|
|
#Sigmoid the centre_X, centre_Y. and object confidencce
|
|
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
|
|
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
|
|
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
|
|
|
|
|
|
|
|
#Add the center offsets
|
|
grid_len = np.arange(grid_size)
|
|
a,b = np.meshgrid(grid_len, grid_len)
|
|
|
|
x_offset = torch.FloatTensor(a).view(-1,1)
|
|
y_offset = torch.FloatTensor(b).view(-1,1)
|
|
|
|
if CUDA:
|
|
x_offset = x_offset.cuda()
|
|
y_offset = y_offset.cuda()
|
|
|
|
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
|
|
|
|
prediction[:,:,:2] += x_y_offset
|
|
|
|
#log space transform height and the width
|
|
anchors = torch.FloatTensor(anchors)
|
|
|
|
if CUDA:
|
|
anchors = anchors.cuda()
|
|
|
|
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
|
|
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
|
|
|
|
#Softmax the class scores
|
|
prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))
|
|
|
|
prediction[:,:,:4] *= stride
|
|
|
|
|
|
return prediction
|
|
|
|
def load_classes(namesfile):
|
|
fp = open(namesfile, "r")
|
|
names = fp.read().split("\n")[:-1]
|
|
return names
|
|
|
|
def get_im_dim(im):
|
|
im = cv2.imread(im)
|
|
w,h = im.shape[1], im.shape[0]
|
|
return w,h
|
|
|
|
def unique(tensor):
|
|
tensor_np = tensor.cpu().numpy()
|
|
unique_np = np.unique(tensor_np)
|
|
unique_tensor = torch.from_numpy(unique_np)
|
|
|
|
tensor_res = tensor.new(unique_tensor.shape)
|
|
tensor_res.copy_(unique_tensor)
|
|
return tensor_res
|
|
|
|
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
|
|
conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
|
|
prediction = prediction*conf_mask
|
|
|
|
|
|
try:
|
|
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
|
|
except:
|
|
return 0
|
|
|
|
|
|
box_a = prediction.new(prediction.shape)
|
|
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
|
|
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
|
|
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
|
|
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
|
|
prediction[:,:,:4] = box_a[:,:,:4]
|
|
|
|
|
|
|
|
batch_size = prediction.size(0)
|
|
|
|
output = prediction.new(1, prediction.size(2) + 1)
|
|
write = False
|
|
|
|
|
|
for ind in range(batch_size):
|
|
#select the image from the batch
|
|
image_pred = prediction[ind]
|
|
|
|
|
|
|
|
#Get the class having maximum score, and the index of that class
|
|
#Get rid of num_classes softmax scores
|
|
#Add the class index and the class score of class having maximum score
|
|
max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
|
|
max_conf = max_conf.float().unsqueeze(1)
|
|
max_conf_score = max_conf_score.float().unsqueeze(1)
|
|
seq = (image_pred[:,:5], max_conf, max_conf_score)
|
|
image_pred = torch.cat(seq, 1)
|
|
|
|
|
|
|
|
#Get rid of the zero entries
|
|
non_zero_ind = (torch.nonzero(image_pred[:,4]))
|
|
|
|
|
|
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
|
|
|
|
#Get the various classes detected in the image
|
|
try:
|
|
img_classes = unique(image_pred_[:,-1])
|
|
except:
|
|
continue
|
|
#WE will do NMS classwise
|
|
for cls in img_classes:
|
|
#get the detections with one particular class
|
|
cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
|
|
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
|
|
|
|
|
|
image_pred_class = image_pred_[class_mask_ind].view(-1,7)
|
|
|
|
|
|
|
|
#sort the detections such that the entry with the maximum objectness
|
|
#confidence is at the top
|
|
conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
|
|
image_pred_class = image_pred_class[conf_sort_index]
|
|
idx = image_pred_class.size(0)
|
|
|
|
#if nms has to be done
|
|
if nms:
|
|
#For each detection
|
|
for i in range(idx):
|
|
#Get the IOUs of all boxes that come after the one we are looking at
|
|
#in the loop
|
|
try:
|
|
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
|
|
except ValueError:
|
|
break
|
|
|
|
except IndexError:
|
|
break
|
|
|
|
#Zero out all the detections that have IoU > treshhold
|
|
iou_mask = (ious < nms_conf).float().unsqueeze(1)
|
|
image_pred_class[i+1:] *= iou_mask
|
|
|
|
#Remove the non-zero entries
|
|
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
|
|
image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
|
|
|
|
|
|
|
|
#Concatenate the batch_id of the image to the detection
|
|
#this helps us identify which image does the detection correspond to
|
|
#We use a linear straucture to hold ALL the detections from the batch
|
|
#the batch_dim is flattened
|
|
#batch is identified by extra batch column
|
|
|
|
|
|
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
|
|
seq = batch_ind, image_pred_class
|
|
if not write:
|
|
output = torch.cat(seq,1)
|
|
write = True
|
|
else:
|
|
out = torch.cat(seq,1)
|
|
output = torch.cat((output,out))
|
|
|
|
return output
|
|
|
|
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Sat Mar 24 00:12:16 2018
|
|
|
|
@author: ayooshmac
|
|
"""
|
|
|
|
def predict_transform_half(prediction, inp_dim, anchors, num_classes, CUDA = True):
|
|
batch_size = prediction.size(0)
|
|
stride = inp_dim // prediction.size(2)
|
|
|
|
bbox_attrs = 5 + num_classes
|
|
num_anchors = len(anchors)
|
|
grid_size = inp_dim // stride
|
|
|
|
|
|
prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
|
|
prediction = prediction.transpose(1,2).contiguous()
|
|
prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)
|
|
|
|
|
|
#Sigmoid the centre_X, centre_Y. and object confidencce
|
|
prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
|
|
prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
|
|
prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
|
|
|
|
|
|
#Add the center offsets
|
|
grid_len = np.arange(grid_size)
|
|
a,b = np.meshgrid(grid_len, grid_len)
|
|
|
|
x_offset = torch.FloatTensor(a).view(-1,1)
|
|
y_offset = torch.FloatTensor(b).view(-1,1)
|
|
|
|
if CUDA:
|
|
x_offset = x_offset.cuda().half()
|
|
y_offset = y_offset.cuda().half()
|
|
|
|
x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
|
|
|
|
prediction[:,:,:2] += x_y_offset
|
|
|
|
#log space transform height and the width
|
|
anchors = torch.HalfTensor(anchors)
|
|
|
|
if CUDA:
|
|
anchors = anchors.cuda()
|
|
|
|
anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
|
|
prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
|
|
|
|
#Softmax the class scores
|
|
prediction[:,:,5: 5 + num_classes] = nn.Softmax(-1)(Variable(prediction[:,:, 5 : 5 + num_classes])).data
|
|
|
|
prediction[:,:,:4] *= stride
|
|
|
|
|
|
return prediction
|
|
|
|
|
|
def write_results_half(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
|
|
conf_mask = (prediction[:,:,4] > confidence).half().unsqueeze(2)
|
|
prediction = prediction*conf_mask
|
|
|
|
try:
|
|
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
|
|
except:
|
|
return 0
|
|
|
|
|
|
|
|
box_a = prediction.new(prediction.shape)
|
|
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
|
|
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
|
|
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
|
|
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
|
|
prediction[:,:,:4] = box_a[:,:,:4]
|
|
|
|
|
|
|
|
batch_size = prediction.size(0)
|
|
|
|
output = prediction.new(1, prediction.size(2) + 1)
|
|
write = False
|
|
|
|
for ind in range(batch_size):
|
|
#select the image from the batch
|
|
image_pred = prediction[ind]
|
|
|
|
|
|
#Get the class having maximum score, and the index of that class
|
|
#Get rid of num_classes softmax scores
|
|
#Add the class index and the class score of class having maximum score
|
|
max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
|
|
max_conf = max_conf.half().unsqueeze(1)
|
|
max_conf_score = max_conf_score.half().unsqueeze(1)
|
|
seq = (image_pred[:,:5], max_conf, max_conf_score)
|
|
image_pred = torch.cat(seq, 1)
|
|
|
|
|
|
#Get rid of the zero entries
|
|
non_zero_ind = (torch.nonzero(image_pred[:,4]))
|
|
try:
|
|
image_pred_ = image_pred[non_zero_ind.squeeze(),:]
|
|
except:
|
|
continue
|
|
|
|
#Get the various classes detected in the image
|
|
img_classes = unique(image_pred_[:,-1].long()).half()
|
|
|
|
|
|
|
|
|
|
#WE will do NMS classwise
|
|
for cls in img_classes:
|
|
#get the detections with one particular class
|
|
cls_mask = image_pred_*(image_pred_[:,-1] == cls).half().unsqueeze(1)
|
|
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
|
|
|
|
|
|
image_pred_class = image_pred_[class_mask_ind]
|
|
|
|
|
|
#sort the detections such that the entry with the maximum objectness
|
|
#confidence is at the top
|
|
conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
|
|
image_pred_class = image_pred_class[conf_sort_index]
|
|
idx = image_pred_class.size(0)
|
|
|
|
#if nms has to be done
|
|
if nms:
|
|
#For each detection
|
|
for i in range(idx):
|
|
#Get the IOUs of all boxes that come after the one we are looking at
|
|
#in the loop
|
|
try:
|
|
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
|
|
except ValueError:
|
|
break
|
|
|
|
except IndexError:
|
|
break
|
|
|
|
#Zero out all the detections that have IoU > treshhold
|
|
iou_mask = (ious < nms_conf).half().unsqueeze(1)
|
|
image_pred_class[i+1:] *= iou_mask
|
|
|
|
#Remove the non-zero entries
|
|
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
|
|
image_pred_class = image_pred_class[non_zero_ind]
|
|
|
|
|
|
|
|
#Concatenate the batch_id of the image to the detection
|
|
#this helps us identify which image does the detection correspond to
|
|
#We use a linear straucture to hold ALL the detections from the batch
|
|
#the batch_dim is flattened
|
|
#batch is identified by extra batch column
|
|
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
|
|
seq = batch_ind, image_pred_class
|
|
|
|
if not write:
|
|
output = torch.cat(seq,1)
|
|
write = True
|
|
else:
|
|
out = torch.cat(seq,1)
|
|
output = torch.cat((output,out))
|
|
|
|
return output
|