from __future__ import division import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable import numpy as np import cv2 import matplotlib.pyplot as plt from bbox import bbox_iou def count_parameters(model): return sum(p.numel() for p in model.parameters()) def count_learnable_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) def convert2cpu(matrix): if matrix.is_cuda: return torch.FloatTensor(matrix.size()).copy_(matrix) else: return matrix def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True): batch_size = prediction.size(0) stride = inp_dim // prediction.size(2) grid_size = inp_dim // stride bbox_attrs = 5 + num_classes num_anchors = len(anchors) anchors = [(a[0]/stride, a[1]/stride) for a in anchors] prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size) prediction = prediction.transpose(1,2).contiguous() prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs) #Sigmoid the centre_X, centre_Y. and object confidencce prediction[:,:,0] = torch.sigmoid(prediction[:,:,0]) prediction[:,:,1] = torch.sigmoid(prediction[:,:,1]) prediction[:,:,4] = torch.sigmoid(prediction[:,:,4]) #Add the center offsets grid_len = np.arange(grid_size) a,b = np.meshgrid(grid_len, grid_len) x_offset = torch.FloatTensor(a).view(-1,1) y_offset = torch.FloatTensor(b).view(-1,1) if CUDA: x_offset = x_offset.cuda() y_offset = y_offset.cuda() x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0) prediction[:,:,:2] += x_y_offset #log space transform height and the width anchors = torch.FloatTensor(anchors) if CUDA: anchors = anchors.cuda() anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors #Softmax the class scores prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes])) prediction[:,:,:4] *= stride return prediction def load_classes(namesfile): fp = open(namesfile, "r") names = fp.read().split("\n")[:-1] return names def get_im_dim(im): im = cv2.imread(im) w,h = im.shape[1], im.shape[0] return w,h def unique(tensor): tensor_np = tensor.cpu().numpy() unique_np = np.unique(tensor_np) unique_tensor = torch.from_numpy(unique_np) tensor_res = tensor.new(unique_tensor.shape) tensor_res.copy_(unique_tensor) return tensor_res def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4): conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2) prediction = prediction*conf_mask try: ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2) box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2) box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2) prediction[:,:,:4] = box_a[:,:,:4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:,:5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:,4])) image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:,-1]) except: continue #WE will do NMS classwise for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1,7) #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i+1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view(-1,7) #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq,1) write = True else: out = torch.cat(seq,1) output = torch.cat((output,out)) return output #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Mar 24 00:12:16 2018 @author: ayooshmac """ def predict_transform_half(prediction, inp_dim, anchors, num_classes, CUDA = True): batch_size = prediction.size(0) stride = inp_dim // prediction.size(2) bbox_attrs = 5 + num_classes num_anchors = len(anchors) grid_size = inp_dim // stride prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size) prediction = prediction.transpose(1,2).contiguous() prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs) #Sigmoid the centre_X, centre_Y. and object confidencce prediction[:,:,0] = torch.sigmoid(prediction[:,:,0]) prediction[:,:,1] = torch.sigmoid(prediction[:,:,1]) prediction[:,:,4] = torch.sigmoid(prediction[:,:,4]) #Add the center offsets grid_len = np.arange(grid_size) a,b = np.meshgrid(grid_len, grid_len) x_offset = torch.FloatTensor(a).view(-1,1) y_offset = torch.FloatTensor(b).view(-1,1) if CUDA: x_offset = x_offset.cuda().half() y_offset = y_offset.cuda().half() x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0) prediction[:,:,:2] += x_y_offset #log space transform height and the width anchors = torch.HalfTensor(anchors) if CUDA: anchors = anchors.cuda() anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors #Softmax the class scores prediction[:,:,5: 5 + num_classes] = nn.Softmax(-1)(Variable(prediction[:,:, 5 : 5 + num_classes])).data prediction[:,:,:4] *= stride return prediction def write_results_half(prediction, confidence, num_classes, nms = True, nms_conf = 0.4): conf_mask = (prediction[:,:,4] > confidence).half().unsqueeze(2) prediction = prediction*conf_mask try: ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2) box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2) box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2) prediction[:,:,:4] = box_a[:,:,:4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): #select the image from the batch image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1) max_conf = max_conf.half().unsqueeze(1) max_conf_score = max_conf_score.half().unsqueeze(1) seq = (image_pred[:,:5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:,4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(),:] except: continue #Get the various classes detected in the image img_classes = unique(image_pred_[:,-1].long()).half() #WE will do NMS classwise for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_*(image_pred_[:,-1] == cls).half().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() image_pred_class = image_pred_[class_mask_ind] #sort the detections such that the entry with the maximum objectness #confidence is at the top conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) #if nms has to be done if nms: #For each detection for i in range(idx): #Get the IOUs of all boxes that come after the one we are looking at #in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) except ValueError: break except IndexError: break #Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).half().unsqueeze(1) image_pred_class[i+1:] *= iou_mask #Remove the non-zero entries non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze() image_pred_class = image_pred_class[non_zero_ind] #Concatenate the batch_id of the image to the detection #this helps us identify which image does the detection correspond to #We use a linear straucture to hold ALL the detections from the batch #the batch_dim is flattened #batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq,1) write = True else: out = torch.cat(seq,1) output = torch.cat((output,out)) return output