261 lines
8.4 KiB
Python
261 lines
8.4 KiB
Python
|
from __future__ import division
|
||
|
import time
|
||
|
# import torch
|
||
|
# import torch.nn as nn
|
||
|
# from torch.autograd import Variable
|
||
|
import numpy as np
|
||
|
import cv2
|
||
|
from matplotlib import pyplot as plt
|
||
|
from util import *
|
||
|
from darknet import Darknet
|
||
|
from preprocess import prep_image, inp_to_image, letterbox_image
|
||
|
# import pandas as pd
|
||
|
# import random
|
||
|
import pickle as pkl
|
||
|
import argparse
|
||
|
# import dlib
|
||
|
|
||
|
|
||
|
def get_test_input(input_dim, CUDA):
|
||
|
img = cv2.imread("data/dog-cycle-car.png")
|
||
|
img = cv2.resize(img, (input_dim, input_dim))
|
||
|
img_ = img[:,:,::-1].transpose((2,0,1))
|
||
|
img_ = img_[np.newaxis,:,:,:]/255.0
|
||
|
img_ = torch.from_numpy(img_).float()
|
||
|
img_ = Variable(img_)
|
||
|
|
||
|
if CUDA:
|
||
|
img_ = img_.cuda()
|
||
|
|
||
|
return img_
|
||
|
|
||
|
def prep_image(img, inp_dim):
|
||
|
"""
|
||
|
Prepare image for inputting to the neural network.Returns a Variable
|
||
|
"""
|
||
|
|
||
|
orig_im = img
|
||
|
dim = orig_im.shape[1], orig_im.shape[0]
|
||
|
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
|
||
|
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
|
||
|
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
|
||
|
return img_, orig_im, dim
|
||
|
|
||
|
def write(x, img):
|
||
|
c1 = tuple(x[1:3].int())
|
||
|
c2 = tuple(x[3:5].int())
|
||
|
cls = int(x[-1])
|
||
|
label = "{0}".format(classes[cls])
|
||
|
color = (0, 0, 255)#random.choice(colors)
|
||
|
if (label=='person'):
|
||
|
# cv2.line(img, tuple((c1[0]+(c2[0]-c1[0])/2, c1[1] + (c2[1]-c1[1])/2)), c2, (255, 0, 0), 5)
|
||
|
print((c2[1]-c1[1])/2)
|
||
|
# cv2.rectangle(img, (c1[0], c1[1] + int((c2[1]-c1[1])/2.5)-5), (c2[0],c2[1]), (225,0,0), 3)
|
||
|
cropped_img=img[c1[1]:c2[1],c1[0]:c2[0]]
|
||
|
if len(cropped_img):
|
||
|
get_rgb_Hist(cropped_img)
|
||
|
# get_nose_position(face_img)
|
||
|
cv2.rectangle(img, c1, c2, color, 5)
|
||
|
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
|
||
|
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
|
||
|
# cv2.rectangle(img, c1, c2,color, -1)
|
||
|
# cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
|
||
|
return img
|
||
|
|
||
|
def get_rgb_Hist(nose_img):
|
||
|
|
||
|
# gray = cv2.cvtColor(nose_img, cv2.COLOR_BGR2GRAY)
|
||
|
# cv2.imshow("Nose",gray)
|
||
|
# hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
|
||
|
# plt.plot(hist)
|
||
|
# plt.xlim([0, 256])
|
||
|
cv2.imshow("Nose", nose_img)
|
||
|
|
||
|
plt.subplot(231)
|
||
|
histr_b = cv2.calcHist([nose_img], [0], None, [256], [0, 256])
|
||
|
plt.plot(histr_b, color='b')
|
||
|
plt.xlim([0, 256])
|
||
|
plt.ylim([0, 80])
|
||
|
|
||
|
plt.subplot(232)
|
||
|
histr_g = cv2.calcHist([nose_img], [1], None, [256], [0, 256])
|
||
|
plt.plot(histr_g, color='g')
|
||
|
plt.xlim([0, 20])
|
||
|
plt.ylim([0, 80])
|
||
|
|
||
|
plt.subplot(233)
|
||
|
histr_r = cv2.calcHist([nose_img], [2], None, [256], [0, 256])
|
||
|
plt.plot(histr_r, color='r')
|
||
|
plt.xlim([0, 256])
|
||
|
plt.ylim([0, 80])
|
||
|
|
||
|
plt.subplot(234)
|
||
|
color = ['blue', 'green', 'red']
|
||
|
bgr_mean = [np.average(nose_img[0]), np.average(nose_img[1]), np.average(nose_img[2])]
|
||
|
plt.bar(color, bgr_mean)
|
||
|
plt.ylim([100, 170])
|
||
|
|
||
|
plt.subplot(235)
|
||
|
title = ['Green Hist avg']
|
||
|
plt.bar(title, np.average(histr_g[0:20]))
|
||
|
plt.ylim([0, 6])
|
||
|
|
||
|
plt.draw()
|
||
|
plt.pause(0.0011)
|
||
|
plt.clf()
|
||
|
|
||
|
# detector = dlib.get_frontal_face_detector()
|
||
|
# predictor = dlib.shape_predictor("data/shape_predictor_68_face_landmarks.dat")
|
||
|
# def get_nose_position(face):
|
||
|
# ########################
|
||
|
# # Facial LandMark Detector
|
||
|
#
|
||
|
# face_img = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
|
||
|
# face_img = cv2.equalizeHist(face_img)
|
||
|
# faces = detector(face_img)
|
||
|
# for face in faces:
|
||
|
# x1 = face.left()
|
||
|
# y1 = face.top()
|
||
|
# x2 = face.right()
|
||
|
# y2 = face.bottom()
|
||
|
# # Then we can also do cv2.rectangle function (frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
|
||
|
# landmarks = predictor(face_img, face)
|
||
|
# # We are then accesing the landmark points
|
||
|
# for n in range(0, 68):
|
||
|
# x = landmarks.part(n).x
|
||
|
# y = landmarks.part(n).y
|
||
|
# cv2.circle(face_img, (x, y), 2, (255, 255, 0), -1)
|
||
|
# #################
|
||
|
# # fgMask = backSub.apply(img[c1[1]:c2[1],c1[0]:c2[0]])
|
||
|
# if len(face_img):
|
||
|
# cv2.imshow("Face", face_img)
|
||
|
|
||
|
|
||
|
|
||
|
def arg_parse():
|
||
|
parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')
|
||
|
|
||
|
parser.add_argument("--video", dest = 'video', help =
|
||
|
"Video to run detection upon",
|
||
|
default = "video.avi", type = str)
|
||
|
parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")
|
||
|
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
|
||
|
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
|
||
|
parser.add_argument("--cfg", dest = 'cfgfile', help =
|
||
|
"Config file",
|
||
|
default = "cfg/yolov3_pysource_testing.cfg", type = str)#cfg/yolov3.cfg
|
||
|
parser.add_argument("--weights", dest = 'weightsfile', help =
|
||
|
"weightsfile",
|
||
|
default = "data/yolov3_training_nose_2100_13_10_1530.weights", type = str)#yolov3.weights
|
||
|
parser.add_argument("--reso", dest = 'reso', help =
|
||
|
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
|
||
|
default = "416", type = str)
|
||
|
return parser.parse_args()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
args = arg_parse()
|
||
|
confidence = float(args.confidence)
|
||
|
nms_thesh = float(args.nms_thresh)
|
||
|
start = 0
|
||
|
|
||
|
CUDA = torch.cuda.is_available()
|
||
|
|
||
|
num_classes = 80
|
||
|
|
||
|
CUDA = torch.cuda.is_available()
|
||
|
|
||
|
bbox_attrs = 5 + num_classes
|
||
|
# print("Loading Face Landmark detector.....")
|
||
|
|
||
|
print("Loading network.....")
|
||
|
model = Darknet(args.cfgfile)
|
||
|
print(args.cfgfile)
|
||
|
model.load_weights(args.weightsfile)
|
||
|
print("Network successfully loaded")
|
||
|
|
||
|
model.net_info["height"] = args.reso
|
||
|
inp_dim = int(model.net_info["height"])
|
||
|
assert inp_dim % 32 == 0
|
||
|
assert inp_dim > 32
|
||
|
|
||
|
if CUDA:
|
||
|
model.cuda()
|
||
|
|
||
|
print("x")
|
||
|
model(get_test_input(inp_dim, CUDA), CUDA)
|
||
|
|
||
|
model.eval()
|
||
|
|
||
|
videofile = args.video
|
||
|
# cap = cv2.VideoCapture("vids/breathing_cap.mov")#'http://192.168.21.200:81/')
|
||
|
cap = cv2.VideoCapture(1)
|
||
|
|
||
|
assert cap.isOpened(), 'Cannot capture source'
|
||
|
|
||
|
frames = 0
|
||
|
start = time.time()
|
||
|
while cap.isOpened():
|
||
|
|
||
|
ret, frame = cap.read()
|
||
|
if ret:
|
||
|
|
||
|
|
||
|
img, orig_im, dim = prep_image(frame, inp_dim)
|
||
|
|
||
|
im_dim = torch.FloatTensor(dim).repeat(1,2)
|
||
|
|
||
|
|
||
|
if CUDA:
|
||
|
im_dim = im_dim.cuda()
|
||
|
img = img.cuda()
|
||
|
|
||
|
with torch.no_grad():
|
||
|
output = model(Variable(img), CUDA)
|
||
|
output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
|
||
|
|
||
|
if type(output) == int:
|
||
|
frames += 1
|
||
|
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
|
||
|
cv2.imshow("frame", orig_im)
|
||
|
key = cv2.waitKey(1)
|
||
|
if key & 0xFF == ord('q'):
|
||
|
break
|
||
|
continue
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
im_dim = im_dim.repeat(output.size(0), 1)
|
||
|
scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
|
||
|
|
||
|
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
|
||
|
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
|
||
|
|
||
|
output[:,1:5] /= scaling_factor
|
||
|
|
||
|
for i in range(output.shape[0]):
|
||
|
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
|
||
|
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
|
||
|
|
||
|
classes = load_classes('data/coco.names')
|
||
|
colors = pkl.load(open("data/pallete", "rb"))
|
||
|
list(map(lambda x: write(x, orig_im), output))
|
||
|
|
||
|
|
||
|
cv2.imshow("frame", orig_im)
|
||
|
key = cv2.waitKey(1)
|
||
|
if key & 0xFF == ord('q'):
|
||
|
break
|
||
|
frames += 1
|
||
|
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
|
||
|
|
||
|
|
||
|
else:
|
||
|
break
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|