Hi,
A brief explanation of what I'm doing:
I'm trying to use yolo to detect a video with cv2 where cv2 will read each frame and pass it into the detect function https://github.com/pjreddie/darknet/blob/master/python/darknet.py#L125
I've modified it to accept the ndarray from cv2.VideoCapture.read directly rather than opening an image file by adding functions from https://github.com/pjreddie/darknet/blob/master/examples/detector-scipy-opencv.py
And so it works but at a very slow rate.
The major factor was the array_to_image function found in https://github.com/pjreddie/darknet/blob/master/examples/detector-scipy-opencv.py#L9
array_to_image time for one frame: 0.280557155609
Total detection time for one frame: 0.310970067978
Does anyone have a solution to speed this up?
The intent is to be able to directly open and read from a video source rather than slice up the video into images beforehand.
My code:
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
# im = load_image(image, 0, 0)
t = time.time()
im = array_to_image(image)
print('array_to_image time: {}').format(time.time() - t)
rgbgr_image(im)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms);
t = time.time()
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
# free_image(im)
# free_detections(dets, num)
return res
You may want to check the image get_image_from_stream code (image.c) he uses in demo.c (for videos); The python implementation for mat to Image structure does a lot more operations than whats done there.
Hi @eyebies , thanks for the suggestion.
I'm not too familiar with C but it doesn't seem to be very different though.
Python implementation:
def array_to_image(arr):
arr = arr.transpose(2,0,1)
c = arr.shape[0]
h = arr.shape[1]
w = arr.shape[2]
arr = (arr/255.0).flatten()
data = dn.c_array(dn.c_float, arr)
im = dn.IMAGE(w,h,c,data)
return im
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
# im = load_image(image, 0, 0)
t = time.time()
im = array_to_image(image)
print('array_to_image time: {}').format(time.time() - t)
rgbgr_image(im)
num = c_int(0)
pnum = pointer(num)
predict_image(net, im)
dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
num = pnum[0]
if (nms): do_nms_obj(dets, num, meta.classes, nms);
t = time.time()
res = []
for j in range(num):
for i in range(meta.classes):
if dets[j].prob[i] > 0:
b = dets[j].bbox
res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
res = sorted(res, key=lambda x: -x[1])
# free_image(im)
# free_detections(dets, num)
return res
C implementation:
void ipl_into_image(IplImage* src, image im)
{
unsigned char *data = (unsigned char *)src->imageData;
int h = src->height;
int w = src->width;
int c = src->nChannels;
int step = src->widthStep;
int i, j, k;
for(i = 0; i < h; ++i){
for(k= 0; k < c; ++k){
for(j = 0; j < w; ++j){
im.data[k*w*h + i*w + j] = data[i*step + j*c + k]/255.;
}
}
}
}
image ipl_to_image(IplImage* src)
{
int h = src->height;
int w = src->width;
int c = src->nChannels;
image out = make_image(w, h, c);
ipl_into_image(src, out);
return out;
}
image get_image_from_stream(CvCapture *cap)
{
IplImage* src = cvQueryFrame(cap);
if (!src) return make_empty_image(0,0,0);
image im = ipl_to_image(src);
rgbgr_image(im);
return im;
}
c_array function might be the culprit. Pls profile the function.
Yup that is the case:
arr = arr.transpose(2, 0, 1) time: 5.96046447754e-06
arr.shape time: 3.09944152832e-06
arr = (arr/255.0).flatten() time: 0.00742697715759
data = c_array(c_float, arr) time: 0.0640590190887
im = IMAGE(w, h, c, data) time: 1.4066696167e-05array_to_image time: 0.0716328620911
YOLOv3 detection time: 0.0984659194946
Is this what you mean by profiling it?
Found a solution in https://github.com/pjreddie/darknet/issues/289#issuecomment-342448358
Shoutout to @TheMikeyR
Most helpful comment
Found a solution in https://github.com/pjreddie/darknet/issues/289#issuecomment-342448358
Shoutout to @TheMikeyR