Py-faster-rcnn: How to use py-faster rcnn to detect objects on a live video

Created on 15 May 2017  ·  5Comments  ·  Source: rbgirshick/py-faster-rcnn

I managed to train the network on my new custom data set. I read the video frame by frame using open-cv and process it through the network to get bounding box coordinates. I am saving the output using "fig= plt.gcf()" and "fig.savefig" for static image dataset.
I want to do this on a live video where i can show the detected image frames of the video frame by frame in real time without saving.

Most helpful comment

Thank u so much!

发自网易邮箱大师
On 08/13/2017 13:28, rtgoring wrote:

That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.

def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
global lastColor,frameRate
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im

for i in inds:
    bbox = dets[i, :4]
    score = dets[i, -1]
    #print 'ahahahah'
    cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
    cv2.rectangle(im,(int(bbox[0]),int(bbox[1]-20)),(int(bbox[0]+200),int(bbox[1])),(10,10,10),-1)
    cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.75,(255,255,255))#,cv2.CV_AA)
    if 'red' in class_name or 'green' in class_name or 'yellow' in class_name or 'black' in class_name:
        #print class_name
        if class_name not in colorOrder[-1]:
            colorOrder.append(class_name)
            print class_name
            lastColor = class_name
            cv2.putText(im,'Current Color',(10,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0))
    txtColor = (0,0,0)
    if 'red' in lastColor:
        txtColor = (0,0,255)
    if 'green' in lastColor:
        txtColor = (0,255,0)
    if 'yellow' in lastColor:
        txtColor = (0,255,255)
    cv2.putText(im,'{:s}'.format(lastColor),(10,100),cv2.FONT_HERSHEY_SIMPLEX,1,txtColor)

return im


You are receiving this because you commented.
Reply to this email directly, view it on GitHub, or mute the thread.

All 5 comments

This is something I did for one of my projects. Are you familiar with OpenCV? I replaced all of the matplotlib imshow with OpenCV. This allows for better refresh abilities of the window. Below is my reimplementation of the demo function. It is called with the following. Hope this helps.

while True:
    demo_video(net,cv2.VideoCapture(videoFilePath))

def demo_video(net, videoFile):
"""Detect object classes in an image using pre-computed object proposals."""
global frameRate
# Load the demo image
#im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
ret, im = videoFile.read()
#cv2.imshow('bla',im)
#cv2.waitKey(20)
# Detect all object classes and regress object bounds
timer = Timer()
timer.tic()
scores, boxes = im_detect(net, im)
timer.toc()
print ('Detection took {:.3f}s for '
'{:d} object proposals').format(timer.total_time, boxes.shape[0])
frameRate = 1.0/timer.total_time
print "fps: " + str(frameRate)
# Visualize detections for each class
CONF_THRESH = 0.65

NMS_THRESH = 0.2
for cls_ind, cls in enumerate(CLASSES[1:]):
    cls_ind += 1 # because we skipped background
    cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]

    cls_scores = scores[:, cls_ind]
#print cls_scores
    dets = np.hstack((cls_boxes,
                      cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(dets, NMS_THRESH)
    dets = dets[keep, :]
    im=vis_detections_video(im, cls, dets, thresh=CONF_THRESH)
cv2.putText(im,'{:s} {:.2f}'.format("FPS:", frameRate),(1750,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255))
cv2.imshow(videoFilePath.split('/')[len(videoFilePath.split('/'))-1],im)
cv2.waitKey(20)

hi,I want to know where is the function 'vis_detections_video(im, cls, dets, thresh=CONF_THRESH)'?? I can't find in demo.py. Please tell me ,thank u!!

That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.

EDIT: Took out the extra, not relevant parts.

def vis_detections_video(im, class_name, dets, thresh=0.5):
    """Draw detected bounding boxes."""
    global lastColor,frameRate
    inds = np.where(dets[:, -1] >= thresh)[0]
    if len(inds) == 0:
        return im

    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]
        cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
    cv2.rectangle(im,(int(bbox[0]),int(bbox[1]-20)),(int(bbox[0]+200),int(bbox[1])),(10,10,10),-1)
    cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.75,(255,255,255))#,cv2.CV_AA)

    return im

Thank u so much!

发自网易邮箱大师
On 08/13/2017 13:28, rtgoring wrote:

That was my re implementation of rbgirshink's vis_detections function. It's the same functionality, just replaced matplotlib functions with cv2. As well as the capability for multiple detection per image.

def vis_detections_video(im, class_name, dets, thresh=0.5):
"""Draw detected bounding boxes."""
global lastColor,frameRate
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return im

for i in inds:
    bbox = dets[i, :4]
    score = dets[i, -1]
    #print 'ahahahah'
    cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
    cv2.rectangle(im,(int(bbox[0]),int(bbox[1]-20)),(int(bbox[0]+200),int(bbox[1])),(10,10,10),-1)
    cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.75,(255,255,255))#,cv2.CV_AA)
    if 'red' in class_name or 'green' in class_name or 'yellow' in class_name or 'black' in class_name:
        #print class_name
        if class_name not in colorOrder[-1]:
            colorOrder.append(class_name)
            print class_name
            lastColor = class_name
            cv2.putText(im,'Current Color',(10,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0))
    txtColor = (0,0,0)
    if 'red' in lastColor:
        txtColor = (0,0,255)
    if 'green' in lastColor:
        txtColor = (0,255,0)
    if 'yellow' in lastColor:
        txtColor = (0,255,255)
    cv2.putText(im,'{:s}'.format(lastColor),(10,100),cv2.FONT_HERSHEY_SIMPLEX,1,txtColor)

return im


You are receiving this because you commented.
Reply to this email directly, view it on GitHub, or mute the thread.

In case anyone is looking at this later, here is how to use OpenCV to load either a video file, or a directory of images. The Video File method could easily be adapted to use a webcam.

#!/usr/bin/env python

# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

"""
Demo script showing detections in sample images.

See README.md for installation instructions before running.
"""

import _init_paths
from fast_rcnn.config import cfg
from fast_rcnn.test import im_detect
from fast_rcnn.nms_wrapper import nms
from utils.timer import Timer
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import caffe, os, sys, cv2
import argparse
import time

CLASSES = ('__background__',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat', 'chair',
           'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor')

NETS = {'vgg16': ('VGG16',
                  'VGG16_faster_rcnn_final.caffemodel'),
        'zf': ('ZF',
                  'ZF_faster_rcnn_final.caffemodel')}


def vis_detections_video(im, class_name, dets, thresh=0.5):
    """Draw detected bounding boxes."""

    inds = np.where(dets[:, -1] >= thresh)[0]
    if len(inds) == 0:
        return im

    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]
        cv2.rectangle(im,(bbox[0],bbox[1]),(bbox[2],bbox[3]),(0,0,255),2)
        cv2.rectangle(im,(int(bbox[0]),int(bbox[1])-10),(int(bbox[0]+200),int(bbox[1])+10),(10,10,10),-1)
        cv2.putText(im,'{:s} {:.3f}'.format(class_name, score),(int(bbox[0]),int(bbox[1]-2)),cv2.FONT_HERSHEY_SIMPLEX,.45,(255,255,255))#,cv2.CV_AA)
    return im

def demo_video(net, im):
    """Detect object classes in an image using pre-computed object proposals."""
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print ('Detection took {:.3f}s for '
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])

    # Visualize detections for each class
    CONF_THRESH = 0.75

    NMS_THRESH = 0.2
    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1 # because we skipped background
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]

        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]
        im=vis_detections_video(im, cls, dets, thresh=CONF_THRESH)
    cv2.imwrite(os.path.join('output',str(time.time())+'.jpg'),im)
    cv2.imshow('ret',im)

    cv2.waitKey(20)

def parse_args():
    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='Faster R-CNN demo')
    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
                        default=0, type=int)
    parser.add_argument('--cpu', dest='cpu_mode',
                        help='Use CPU mode (overrides --gpu)',
                        action='store_true')
    parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',
                        choices=NETS.keys(), default='vgg16')

    args = parser.parse_args()

    return args

if __name__ == '__main__':
    cfg.TEST.HAS_RPN = True  # Use RPN for proposals

    args = parse_args()
    cv2.namedWindow('ret',0)

    prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],
                            'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')
    caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',
                              NETS[args.demo_net][1])

    if not os.path.isfile(caffemodel):
        raise IOError(('{:s} not found.\nDid you run ./data/script/'
                       'fetch_faster_rcnn_models.sh?').format(caffemodel))

    if args.cpu_mode:
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(args.gpu_id)
        cfg.GPU_ID = args.gpu_id
    net = caffe.Net(prototxt, caffemodel, caffe.TEST)

    print '\n\nLoaded network {:s}'.format(caffemodel)

    ### Load directory of images
    '''
    imagePath = 'C:\Users\RTGORING\Documents\Code\py-faster-rcnn\data\demo'
    imageFiles = []
    for f in os.listdir(imagePath):
        if f.endswith('jpg') or f.endswith('.jpeg'):
            imageFiles.append(f)
    imageFiles = sorted(imageFiles)
    for imageName in imageFiles:
        image = cv2.imread(os.path.join(imagePath,imageName))
        demo_video(net,image)
    '''

    ### Load Video File

    videoFilePath = "C:\Users\RTGORING\Documents\Data\Vid.avi"
    videoFile = cv2.VideoCapture(videoFilePath)
    while True:
        ret, image = videoFile.read()
        demo_video(net,image)



Was this page helpful?
0 / 5 - 0 ratings