reality_exploration/backup/video_demo14_10_2020.py

from __future__ import division
import time
# import torch
# import torch.nn as nn
# from torch.autograd import Variable
import numpy as np
import cv2
from matplotlib import pyplot as plt
from util import *
from darknet import Darknet
from preprocess import prep_image, inp_to_image, letterbox_image
# import pandas as pd
# import random
import pickle as pkl
import argparse
# import dlib


def get_test_input(input_dim, CUDA):
    img = cv2.imread("data/dog-cycle-car.png")
    img = cv2.resize(img, (input_dim, input_dim)) 
    img_ =  img[:,:,::-1].transpose((2,0,1))
    img_ = img_[np.newaxis,:,:,:]/255.0
    img_ = torch.from_numpy(img_).float()
    img_ = Variable(img_)
    
    if CUDA:
        img_ = img_.cuda()
    
    return img_

def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network.Returns a Variable
    """

    orig_im = img
    dim = orig_im.shape[1], orig_im.shape[0]
    img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
    return img_, orig_im, dim

def write(x, img):
    c1 = tuple(x[1:3].int())
    c2 = tuple(x[3:5].int())
    cls = int(x[-1])
    label = "{0}".format(classes[cls])
    color = (0, 0, 255)#random.choice(colors)
    if (label=='person'):
        # cv2.line(img, tuple((c1[0]+(c2[0]-c1[0])/2, c1[1] + (c2[1]-c1[1])/2)), c2, (255, 0, 0), 5)
        print((c2[1]-c1[1])/2)
        # cv2.rectangle(img, (c1[0], c1[1] + int((c2[1]-c1[1])/2.5)-5), (c2[0],c2[1]), (225,0,0), 3)
        cropped_img=img[c1[1]:c2[1],c1[0]:c2[0]]
        if len(cropped_img):
            get_rgb_Hist(cropped_img)
            # get_nose_position(face_img)
        cv2.rectangle(img, c1, c2, color, 5)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        # cv2.rectangle(img, c1, c2,color, -1)
        # cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
    return img

def get_rgb_Hist(nose_img):

    # gray = cv2.cvtColor(nose_img, cv2.COLOR_BGR2GRAY)
    # cv2.imshow("Nose",gray)
    # hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    # plt.plot(hist)
    # plt.xlim([0, 256])
    cv2.imshow("Nose", nose_img)

    plt.subplot(231)
    histr_b = cv2.calcHist([nose_img], [0], None, [256], [0, 256])
    plt.plot(histr_b, color='b')
    plt.xlim([0, 256])
    plt.ylim([0, 80])

    plt.subplot(232)
    histr_g = cv2.calcHist([nose_img], [1], None, [256], [0, 256])
    plt.plot(histr_g, color='g')
    plt.xlim([0, 20])
    plt.ylim([0, 80])

    plt.subplot(233)
    histr_r = cv2.calcHist([nose_img], [2], None, [256], [0, 256])
    plt.plot(histr_r, color='r')
    plt.xlim([0, 256])
    plt.ylim([0, 80])

    plt.subplot(234)
    color = ['blue', 'green', 'red']
    bgr_mean = [np.average(nose_img[0]), np.average(nose_img[1]), np.average(nose_img[2])]
    plt.bar(color, bgr_mean)
    plt.ylim([100, 170])

    plt.subplot(235)
    title = ['Green Hist avg']
    plt.bar(title, np.average(histr_g[0:20]))
    plt.ylim([0, 6])

    plt.draw()
    plt.pause(0.0011)
    plt.clf()

# detector = dlib.get_frontal_face_detector()
# predictor = dlib.shape_predictor("data/shape_predictor_68_face_landmarks.dat")
# def get_nose_position(face):
#     ########################
#     # Facial LandMark Detector
#
#     face_img = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
#     face_img = cv2.equalizeHist(face_img)
#     faces = detector(face_img)
#     for face in faces:
#         x1 = face.left()
#         y1 = face.top()
#         x2 = face.right()
#         y2 = face.bottom()
#         # Then we can also do cv2.rectangle function (frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
#         landmarks = predictor(face_img, face)
#         # We are then accesing the landmark points
#         for n in range(0, 68):
#             x = landmarks.part(n).x
#             y = landmarks.part(n).y
#             cv2.circle(face_img, (x, y), 2, (255, 255, 0), -1)
#     #################
#     # fgMask = backSub.apply(img[c1[1]:c2[1],c1[0]:c2[0]])
#     if len(face_img):
#         cv2.imshow("Face", face_img)


def arg_parse():
    parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')
   
    parser.add_argument("--video", dest = 'video', help = 
                        "Video to run detection upon",
                        default = "video.avi", type = str)
    parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")
    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
    parser.add_argument("--cfg", dest = 'cfgfile', help = 
                        "Config file",
                        default = "cfg/yolov3_pysource_testing.cfg", type = str)#cfg/yolov3.cfg
    parser.add_argument("--weights", dest = 'weightsfile', help = 
                        "weightsfile",
                        default = "data/yolov3_training_nose_2100_13_10_1530.weights", type = str)#yolov3.weights
    parser.add_argument("--reso", dest = 'reso', help = 
                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                        default = "416", type = str)
    return parser.parse_args()


if __name__ == '__main__':
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80

    CUDA = torch.cuda.is_available()
    
    bbox_attrs = 5 + num_classes
    # print("Loading Face Landmark detector.....")

    print("Loading network.....")
    model = Darknet(args.cfgfile)
    print(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    print("x")
    model(get_test_input(inp_dim, CUDA), CUDA)

    model.eval()
    
    videofile = args.video
    # cap = cv2.VideoCapture("vids/breathing_cap.mov")#'http://192.168.21.200:81/')
    cap = cv2.VideoCapture(1)
    
    assert cap.isOpened(), 'Cannot capture source'
    
    frames = 0
    start = time.time()    
    while cap.isOpened():
        
        ret, frame = cap.read()
        if ret:
            

            img, orig_im, dim = prep_image(frame, inp_dim)
            
            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
            
            
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()
            
            with torch.no_grad():   
                output = model(Variable(img), CUDA)
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
            
            
            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)
            
            output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
            output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
            
            output[:,1:5] /= scaling_factor
    
            for i in range(output.shape[0]):
                output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])
                output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])
            
            classes = load_classes('data/coco.names')
            colors = pkl.load(open("data/pallete", "rb"))
            list(map(lambda x: write(x, orig_im), output))
            
            
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            frames += 1
            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))

            
        else:
            break
first commit 2020-11-11 17:03:00 +00:00			`from __future__ import division`
			`import time`
			`# import torch`
			`# import torch.nn as nn`
			`# from torch.autograd import Variable`
			`import numpy as np`
			`import cv2`
			`from matplotlib import pyplot as plt`
			`from util import *`
			`from darknet import Darknet`
			`from preprocess import prep_image, inp_to_image, letterbox_image`
			`# import pandas as pd`
			`# import random`
			`import pickle as pkl`
			`import argparse`
			`# import dlib`


			`def get_test_input(input_dim, CUDA):`
			`img = cv2.imread("data/dog-cycle-car.png")`
			`img = cv2.resize(img, (input_dim, input_dim))`
			`img_ = img[:,:,::-1].transpose((2,0,1))`
			`img_ = img_[np.newaxis,:,:,:]/255.0`
			`img_ = torch.from_numpy(img_).float()`
			`img_ = Variable(img_)`

			`if CUDA:`
			`img_ = img_.cuda()`

			`return img_`

			`def prep_image(img, inp_dim):`
			`"""`
			`Prepare image for inputting to the neural network.Returns a Variable`
			`"""`

			`orig_im = img`
			`dim = orig_im.shape[1], orig_im.shape[0]`
			`img = (letterbox_image(orig_im, (inp_dim, inp_dim)))`
			`img_ = img[:,:,::-1].transpose((2,0,1)).copy()`
			`img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)`
			`return img_, orig_im, dim`

			`def write(x, img):`
			`c1 = tuple(x[1:3].int())`
			`c2 = tuple(x[3:5].int())`
			`cls = int(x[-1])`
			`label = "{0}".format(classes[cls])`
			`color = (0, 0, 255)#random.choice(colors)`
			`if (label=='person'):`
			`# cv2.line(img, tuple((c1[0]+(c2[0]-c1[0])/2, c1[1] + (c2[1]-c1[1])/2)), c2, (255, 0, 0), 5)`
			`print((c2[1]-c1[1])/2)`
			`# cv2.rectangle(img, (c1[0], c1[1] + int((c2[1]-c1[1])/2.5)-5), (c2[0],c2[1]), (225,0,0), 3)`
			`cropped_img=img[c1[1]:c2[1],c1[0]:c2[0]]`
			`if len(cropped_img):`
			`get_rgb_Hist(cropped_img)`
			`# get_nose_position(face_img)`
			`cv2.rectangle(img, c1, c2, color, 5)`
			`t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]`
			`c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4`
			`# cv2.rectangle(img, c1, c2,color, -1)`
			`# cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);`
			`return img`

			`def get_rgb_Hist(nose_img):`

			`# gray = cv2.cvtColor(nose_img, cv2.COLOR_BGR2GRAY)`
			`# cv2.imshow("Nose",gray)`
			`# hist = cv2.calcHist([gray], [0], None, [256], [0, 256])`
			`# plt.plot(hist)`
			`# plt.xlim([0, 256])`
			`cv2.imshow("Nose", nose_img)`

			`plt.subplot(231)`
			`histr_b = cv2.calcHist([nose_img], [0], None, [256], [0, 256])`
			`plt.plot(histr_b, color='b')`
			`plt.xlim([0, 256])`
			`plt.ylim([0, 80])`

			`plt.subplot(232)`
			`histr_g = cv2.calcHist([nose_img], [1], None, [256], [0, 256])`
			`plt.plot(histr_g, color='g')`
			`plt.xlim([0, 20])`
			`plt.ylim([0, 80])`

			`plt.subplot(233)`
			`histr_r = cv2.calcHist([nose_img], [2], None, [256], [0, 256])`
			`plt.plot(histr_r, color='r')`
			`plt.xlim([0, 256])`
			`plt.ylim([0, 80])`

			`plt.subplot(234)`
			`color = ['blue', 'green', 'red']`
			`bgr_mean = [np.average(nose_img[0]), np.average(nose_img[1]), np.average(nose_img[2])]`
			`plt.bar(color, bgr_mean)`
			`plt.ylim([100, 170])`

			`plt.subplot(235)`
			`title = ['Green Hist avg']`
			`plt.bar(title, np.average(histr_g[0:20]))`
			`plt.ylim([0, 6])`

			`plt.draw()`
			`plt.pause(0.0011)`
			`plt.clf()`

			`# detector = dlib.get_frontal_face_detector()`
			`# predictor = dlib.shape_predictor("data/shape_predictor_68_face_landmarks.dat")`
			`# def get_nose_position(face):`
			`# ########################`
			`# # Facial LandMark Detector`
			`#`
			`# face_img = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)`
			`# face_img = cv2.equalizeHist(face_img)`
			`# faces = detector(face_img)`
			`# for face in faces:`
			`# x1 = face.left()`
			`# y1 = face.top()`
			`# x2 = face.right()`
			`# y2 = face.bottom()`
			`# # Then we can also do cv2.rectangle function (frame, (x1, y1), (x2, y2), (0, 255, 0), 3)`
			`# landmarks = predictor(face_img, face)`
			`# # We are then accesing the landmark points`
			`# for n in range(0, 68):`
			`# x = landmarks.part(n).x`
			`# y = landmarks.part(n).y`
			`# cv2.circle(face_img, (x, y), 2, (255, 255, 0), -1)`
			`# #################`
			`# # fgMask = backSub.apply(img[c1[1]:c2[1],c1[0]:c2[0]])`
			`# if len(face_img):`
			`# cv2.imshow("Face", face_img)`



			`def arg_parse():`
			`parser = argparse.ArgumentParser(description='YOLO v3 Video Detection Module')`

			`parser.add_argument("--video", dest = 'video', help =`
			`"Video to run detection upon",`
			`default = "video.avi", type = str)`
			`parser.add_argument("--dataset", dest = "dataset", help = "Dataset on which the network has been trained", default = "pascal")`
			`parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)`
			`parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)`
			`parser.add_argument("--cfg", dest = 'cfgfile', help =`
			`"Config file",`
			`default = "cfg/yolov3_pysource_testing.cfg", type = str)#cfg/yolov3.cfg`
			`parser.add_argument("--weights", dest = 'weightsfile', help =`
			`"weightsfile",`
			`default = "data/yolov3_training_nose_2100_13_10_1530.weights", type = str)#yolov3.weights`
			`parser.add_argument("--reso", dest = 'reso', help =`
			`"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",`
			`default = "416", type = str)`
			`return parser.parse_args()`


			`if __name__ == '__main__':`
			`args = arg_parse()`
			`confidence = float(args.confidence)`
			`nms_thesh = float(args.nms_thresh)`
			`start = 0`

			`CUDA = torch.cuda.is_available()`

			`num_classes = 80`

			`CUDA = torch.cuda.is_available()`

			`bbox_attrs = 5 + num_classes`
			`# print("Loading Face Landmark detector.....")`

			`print("Loading network.....")`
			`model = Darknet(args.cfgfile)`
			`print(args.cfgfile)`
			`model.load_weights(args.weightsfile)`
			`print("Network successfully loaded")`

			`model.net_info["height"] = args.reso`
			`inp_dim = int(model.net_info["height"])`
			`assert inp_dim % 32 == 0`
			`assert inp_dim > 32`

			`if CUDA:`
			`model.cuda()`

			`print("x")`
			`model(get_test_input(inp_dim, CUDA), CUDA)`

			`model.eval()`

			`videofile = args.video`
			`# cap = cv2.VideoCapture("vids/breathing_cap.mov")#'http://192.168.21.200:81/')`
			`cap = cv2.VideoCapture(1)`

			`assert cap.isOpened(), 'Cannot capture source'`

			`frames = 0`
			`start = time.time()`
			`while cap.isOpened():`

			`ret, frame = cap.read()`
			`if ret:`


			`img, orig_im, dim = prep_image(frame, inp_dim)`

			`im_dim = torch.FloatTensor(dim).repeat(1,2)`


			`if CUDA:`
			`im_dim = im_dim.cuda()`
			`img = img.cuda()`

			`with torch.no_grad():`
			`output = model(Variable(img), CUDA)`
			`output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)`

			`if type(output) == int:`
			`frames += 1`
			`print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))`
			`cv2.imshow("frame", orig_im)`
			`key = cv2.waitKey(1)`
			`if key & 0xFF == ord('q'):`
			`break`
			`continue`




			`im_dim = im_dim.repeat(output.size(0), 1)`
			`scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1)`

			`output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2`
			`output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2`

			`output[:,1:5] /= scaling_factor`

			`for i in range(output.shape[0]):`
			`output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0])`
			`output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1])`

			`classes = load_classes('data/coco.names')`
			`colors = pkl.load(open("data/pallete", "rb"))`
			`list(map(lambda x: write(x, orig_im), output))`


			`cv2.imshow("frame", orig_im)`
			`key = cv2.waitKey(1)`
			`if key & 0xFF == ord('q'):`
			`break`
			`frames += 1`
			`print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))`


			`else:`
			`break`