I managed to train the network on my new custom data set. I read the video frame by frame using open-cv and process it through the network to get bounding box coordinates. I am saving the output using "fig= plt.gcf()" and "fig.savefig" for static image dataset.
I want to do this on a live video where i can show the detected image frames of the video frame by frame in real time without saving.
This is something I did for one of my projects. Are you familiar with OpenCV? I replaced all of the matplotlib imshow with OpenCV. This allows for better refresh abilities of the window. Below is my reimplementation of the demo function. It is called with the following. Hope this helps.
while True:
demo_video(net,cv2.VideoCapture(videoFilePath))
def demo_video(net, videoFile):
"""Detect object classes in an image using pre-computed object proposals."""
global frameRate
# Load the demo image
#im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
ret, im = videoFile.read()
#cv2.imshow('bla',im)
#cv2.waitKey(20)
# Detect all object classes and regress object bounds
timer = Timer()
timer.tic()
scores, boxes = im_detect(net, im)
timer.toc()
print ('Detection took {:.3f}s for '
'{:d} object proposals').format(timer.total_time, boxes.shape[0])
frameRate = 1.0/timer.total_time
print "fps: " + str(frameRate)
# Visualize detections for each class
CONF_THRESH = 0.65
NMS_THRESH = 0.2
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # because we skipped background
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
cls_scores = scores[:, cls_ind]
#print cls_scores
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
im=vis_detections_video(im, cls, dets, thresh=CONF_THRESH)
cv2.putText(im,'{:s} {:.2f}'.format("FPS:", frameRate),(1750,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255))
cv2.imshow(videoFilePath.split('/')[len(videoFilePath.split('/'))-1],im)
cv2.waitKey(20)
hi,I want to know where is the function 'vis_detections_video(im, cls, dets, thresh=CONF_THRESH)'?? I can't find in demo.py. Please tell me ,thank u!!
That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.
EDIT: Took out the extra, not relevant parts.
def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
global lastColor,frameRate
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
cv2.rectangle(im,(int(bbox[0]),int(bbox[1]-20)),(int(bbox[0]+200),int(bbox[1])),(10,10,10),-1)
cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.75,(255,255,255))#,cv2.CV_AA)
return im
Thank u so much!
发自网易邮箱大师
On 08/13/2017 13:28, rtgoring wrote:
That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.
def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
global lastColor,frameRate
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
#print 'ahahahah'
cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
cv2.rectangle(im,(int(bbox[0]),int(bbox[1]-20)),(int(bbox[0]+200),int(bbox[1])),(10,10,10),-1)
cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.75,(255,255,255))#,cv2.CV_AA)
if 'red' in class_name or 'green' in class_name or 'yellow' in class_name or 'black' in class_name:
#print class_name
if class_name not in colorOrder[-1]:
colorOrder.append(class_name)
print class_name
lastColor = class_name
cv2.putText(im,'Current Color',(10,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0))
txtColor = (0,0,0)
if 'red' in lastColor:
txtColor = (0,0,255)
if 'green' in lastColor:
txtColor = (0,255,0)
if 'yellow' in lastColor:
txtColor = (0,255,255)
cv2.putText(im,'{:s}'.format(lastColor),(10,100),cv2.FONT_HERSHEY_SIMPLEX,1,txtColor)
return im
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub, or mute the thread.
In case anyone is looking at this later, here is how to use OpenCV to load either a video file, or a directory of images. The Video File method could easily be adapted to use a webcam.
#!/usr/bin/env python
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""
Demo script showing detections in sample images.
See README.md for installation instructions before running.
"""
import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from fast_rcnn.nms_wrapper import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, sys, cv2
import argparse
import time
CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
NETS = {'vgg16': ('VGG16',
'VGG16_faster_rcnn_final.caffemodel'),
'zf': ('ZF',
'ZF_faster_rcnn_final.caffemodel')}
def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
cv2.rectangle(im,(int(bbox[0]),int(bbox[1])-10),(int(bbox[0]+200),int(bbox[1])+10),(10,10,10),-1)
cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.45,(255,255,255))#,cv2.CV_AA)
return im
def demo_video(net, im):
"""Detect object classes in an image using pre-computed object proposals."""
timer = Timer()
timer.tic()
scores, boxes = im_detect(net, im)
timer.toc()
print ('Detection took {:.3f}s for '
'{:d} object proposals').format(timer.total_time, boxes.shape[0])
# Visualize detections for each class
CONF_THRESH = 0.75
NMS_THRESH = 0.2
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # because we skipped background
cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
im=vis_detections_video(im, cls, dets, thresh=CONF_THRESH)
cv2.imwrite(os.path.join('output',str(time.time())+'.jpg'),im)
cv2.imshow('ret',im)
cv2.waitKey(20)
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Faster R-CNN demo')
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--cpu', dest='cpu_mode',
help='Use CPU mode (overrides --gpu)',
action='store_true')
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
choices=NETS.keys(), default='vgg16')
args = parser.parse_args()
return args
if __name__ == '__main__':
cfg.TEST.HAS_RPN = True # Use RPN for proposals
args = parse_args()
cv2.namedWindow('ret',0)
prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
NETS[args.demo_net][1])
if not os.path.isfile(caffemodel):
raise IOError(('{:s} not found.\nDid you run ./data/script/'
'fetch_faster_rcnn_models.sh?').format(caffemodel))
if args.cpu_mode:
caffe.set_mode_cpu()
else:
caffe.set_mode_gpu()
caffe.set_device(args.gpu_id)
cfg.GPU_ID = args.gpu_id
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
print '\n\nLoaded network {:s}'.format(caffemodel)
### Load directory of images
'''
imagePath = 'C:\Users\RTGORING\Documents\Code\py-faster-rcnn\data\demo'
imageFiles = []
for f in os.listdir(imagePath):
if f.endswith('jpg') or f.endswith('.jpeg'):
imageFiles.append(f)
imageFiles = sorted(imageFiles)
for imageName in imageFiles:
image = cv2.imread(os.path.join(imagePath,imageName))
demo_video(net,image)
'''
### Load Video File
videoFilePath = "C:\Users\RTGORING\Documents\Data\Vid.avi"
videoFile = cv2.VideoCapture(videoFilePath)
while True:
ret, image = videoFile.read()
demo_video(net,image)
Most helpful comment
Thank u so much!
发自网易邮箱大师
On 08/13/2017 13:28, rtgoring wrote:
That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.
def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
global lastColor,frameRate
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub, or mute the thread.