My PC Spec:
i5 7500
GTX 1070
ram 8gb
cuda 9.0
python 3.6.2
I got very low fps when runing the programe. Like 1 fram per 5-10 sec. and my gpu usage is 0-1%
it use cpu instead. (i'm install tensorflow gpu version only.)
`
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import cv2
cap = cv2.VideoCapture("http://192.168.1.26:81/videostream.cgi?user=admin&pwd=12345678")
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if tf.__version__ < '1.4.0':
raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')
from utils import label_map_util
from utils import visualization_utils as vis_util
export_inference_graph.py
tool can be loaded here simply by changing PATH_TO_CKPT
to point to a new .pb file.MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
file_name = os.path.basename(file.name)
if 'frozen_inference_graph.pb' in file_name:
tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
5
, we know that this corresponds to airplane
. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be finelabel_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
while True:
ret, image_np = cap.read()
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('Obj detection', cv2.resize(image_np, (1280,720)))
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
`
I think it should use your GPU to do the actual detection.
One of the concern I have is your camera setting, since it seems to be a network device, which could be the cause of slowness. Did you experience any slowness when you test with local images?
Its heppend even if i use local images or video file
I found the problem, every time the program send calculate it open a new session on GPU and this is slow.
In my case in the function run_inference_for_single_image I move
with detection_graph.as_default():
with tf.Session() as sess:
to the main function to only call it once time and now i have a good framerate
Thanks for this shortcut tip, I was moved the
``
with detection_graph.as_default():
with tf.Session() as sess:
`````
to main loop and then it work perfect
I put my modifyed (from image1.jpg to video.avi input) code here
https://github.com/ollewelin/Tensorflow-Object-Detection
Also Need OpenCV installed with python:
$ sudo apt-get install python-opencv`
Thanks for this shortcut tip, I was moved the
with detection_graph.as_default(): with tf.Session() as sess:
to main loop and then it work perfect
What do you mean by moving it to the main loop? Sorry i'm new to this and have finally got the code to work, but it is very slow.
Thanks for this shortcut tip, I was moved the
with detection_graph.as_default(): with tf.Session() as sess:
to main loop and then it work perfect
What do you mean by moving it to the main loop? Sorry i'm new to this and have finally got the code to work, but it is very slow.
The Mentioned code is for loading the tensorflow into the memory, you need to call this only once before starting the object detection. So just place the following codes before starting the loop.
MODEL_NAME = 'inference_graph'
CWD_PATH = os.getcwd()
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
NUM_CLASSES = 2
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')`
In case anyone wants to work with tensorflow 2.0:
Run the below code
from collections import deque
import numpy as np
import argparse
from sklearn.metrics import pairwise
from imutils.video import FPS
import os
import sys
import tensorflow as tf
from imutils.video import VideoStream
import six.moves.urllib as urllib
import cv2
import imutils
import time
from imutils.video import FPS
from sklearn.metrics import pairwise
import copy
import pathlib
from collections import defaultdict
from utils import ops as utils_ops
from utils import label_map_util
from utils import visualization_utils as vis_util
utils_ops.tf = tf.compat.v1
tf.gfile = tf.io.gfile
PATH_TO_LABELS = '../../bigdata/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
model_name = 'ssdlite_mobilenet_v2_coco_2018_05_09'
model_dir = "../../bigdata/models/" + model_name + "/saved_model"
detection_model = tf.saved_model.load(str(model_dir))
detection_model = detection_model.signatures['serving_default']
def run_inference_for_single_image(model, image):
image = np.asarray(image)
input_tensor = tf.convert_to_tensor(image)
input_tensor = input_tensor[tf.newaxis,...]
# output_dict is a dict with keys detection_classes , num_detections , detection_boxes(4 coordinates of each box) , detection_scores for 100 boxes
output_dict = model(input_tensor)
# print(1,output_dict)
# num_detections gives number of objects in current frame
num_detections = int(output_dict.pop('num_detections'))
# print(2,num_detections)
# output_dict is a dict with keys detection_classes , detection_boxes(4 coordinates of each box) , detection_scores for num_detections boxes
output_dict = {key:value[0, :num_detections].numpy()
for key,value in output_dict.items()}
# adding num_detections that was earlier popped out
output_dict['num_detections'] = num_detections
# converting all values in detection_classes as ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
print(5,output_dict)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
print(5,detection_masks_reframed)
return output_dict
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
# image_np = np.array(Image.open(image_path))
image_np = np.array(image_path)
print(image_np.shape)
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
return image_np
cap=cv2.VideoCapture('../../videos/a.mp4')
time.sleep(2.0)
while True:
(grabbed, frame) = cap.read()
frame=show_inference(detection_model, frame)
cv2.imshow("version", frame)
key=cv2.waitKey(1)
if key & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
Make sure the files from utils folder are imported correctly, else use sys module to append the location of utils folder.
Most helpful comment
I found the problem, every time the program send calculate it open a new session on GPU and this is slow.
In my case in the function run_inference_for_single_image I move
to the main function to only call it once time and now i have a good framerate