I just use the 'person' class in the VOC2007 dataset ,and also I use the difficult samples for keeping the trainning process woking fine.The final model,however,can't detect anything.Anyone else knows what's going on here?
Hi wait1988,
How was the loss during training?
Can we see your train prototxt?
train.prototxt:
name: "ZF"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 2"
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 96
kernel_size: 7
pad: 3
stride: 2
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 3
alpha: 0.00005
beta: 0.75
norm_region: WITHIN_CHANNEL
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
kernel_size: 3
stride: 2
pad: 1
pool: MAX
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 5
pad: 2
stride: 2
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 3
alpha: 0.00005
beta: 0.75
norm_region: WITHIN_CHANNEL
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
kernel_size: 3
stride: 2
pad: 1
pool: MAX
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 384
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 384
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "conv5"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 256
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}
layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}
layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}
layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'rpn.proposal_target_layer'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 2"
}
}
layer {
name: "roi_pool_conv5"
type: "ROIPooling"
bottom: "conv5"
bottom: "rois"
top: "roi_pool_conv5"
roi_pooling_param {
pooled_w: 6
pooled_h: 6
spatial_scale: 0.0625 # 1/16
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "roi_pool_conv5"
top: "fc6"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
scale_train: false
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
scale_train: false
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 8
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred"
bottom: "bbox_targets"
bottom: 'bbox_inside_weights'
bottom: 'bbox_outside_weights'
top: "bbox_loss"
loss_weight: 1
a piece of log:
I0420 14:35:15.158990 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0030837 (* 1 = 0.0030837 loss)
I0420 14:35:15.158994 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00924739 (* 1 = 0.00924739 loss)
I0420 14:35:15.158998 16768 solver.cpp:571] Iteration 69880, lr = 0.0001
I0420 14:35:20.064649 16768 solver.cpp:242] Iteration 69900, loss = 0.328746
I0420 14:35:20.064685 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0186957 (* 1 = 0.0186957 loss)
I0420 14:35:20.064692 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0638745 (* 1 = 0.0638745 loss)
I0420 14:35:20.064695 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0173537 (* 1 = 0.0173537 loss)
I0420 14:35:20.064699 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.0119759 (* 1 = 0.0119759 loss)
I0420 14:35:20.064703 16768 solver.cpp:571] Iteration 69900, lr = 0.0001
I0420 14:35:25.063719 16768 solver.cpp:242] Iteration 69920, loss = 0.245719
I0420 14:35:25.063753 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0571181 (* 1 = 0.0571181 loss)
I0420 14:35:25.063760 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0622113 (* 1 = 0.0622113 loss)
I0420 14:35:25.063765 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.00133641 (* 1 = 0.00133641 loss)
I0420 14:35:25.063768 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00168988 (* 1 = 0.00168988 loss)
I0420 14:35:25.063772 16768 solver.cpp:571] Iteration 69920, lr = 0.0001
I0420 14:35:29.993849 16768 solver.cpp:242] Iteration 69940, loss = 0.190783
I0420 14:35:29.993882 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0851794 (* 1 = 0.0851794 loss)
I0420 14:35:29.993888 16768 solver.cpp:258] Train net output #1: cls_loss = 0.0448895 (* 1 = 0.0448895 loss)
I0420 14:35:29.993893 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.00884468 (* 1 = 0.00884468 loss)
I0420 14:35:29.993897 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.0815758 (* 1 = 0.0815758 loss)
I0420 14:35:29.993901 16768 solver.cpp:571] Iteration 69940, lr = 0.0001
I0420 14:35:34.859596 16768 solver.cpp:242] Iteration 69960, loss = 0.241285
I0420 14:35:34.859629 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.0548535 (* 1 = 0.0548535 loss)
I0420 14:35:34.859635 16768 solver.cpp:258] Train net output #1: cls_loss = 0.114608 (* 1 = 0.114608 loss)
I0420 14:35:34.859640 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0819508 (* 1 = 0.0819508 loss)
I0420 14:35:34.859644 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.00594436 (* 1 = 0.00594436 loss)
I0420 14:35:34.859648 16768 solver.cpp:571] Iteration 69960, lr = 0.0001
I0420 14:35:39.879679 16768 solver.cpp:242] Iteration 69980, loss = 0.326018
I0420 14:35:39.879714 16768 solver.cpp:258] Train net output #0: bbox_loss = 0.12343 (* 1 = 0.12343 loss)
I0420 14:35:39.879720 16768 solver.cpp:258] Train net output #1: cls_loss = 0.110242 (* 1 = 0.110242 loss)
I0420 14:35:39.879724 16768 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.0487792 (* 1 = 0.0487792 loss)
I0420 14:35:39.879729 16768 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.12166 (* 1 = 0.12166 loss)
I0420 14:35:39.879732 16768 solver.cpp:571] Iteration 69980, lr = 0.0001
speed: 0.246s / iter
Have you built your own parser in lib/datasets to assign class 0 to background and class 1 to person ?
To debug I would try to use weights from a pre-trained model first.
Yes,I modified pascal_voc.py to change the original 21 classes to 2 classes,that is,background and person.All the other configurations are kept the same as training the original 21 classes.
Did you trained the same model for the 21 classes before ? Did it worked well ?
Yes,I did.It works well.So it's weird for the one class case.
Maybe you have the classical cache issue ?
$rm data/cache/train_gt_roidb.pkl
Hmm,I also deleted the cache folder.I really don't know what's going on.Bythe way,How do you prepare the train samples?I just use the person sample list to replace the original trainval.txt.
Solved.Be careful with the test.prototxt
How was it solved? Which changes have you made in test.prototxt?
@wait1988
Have you successfuly solved this problem? what performance did you achieved?
The official version could detect pedestrian with 76.7 AP (PASCAL 07 test) if the training data is PASCAL VOC07+12 trainval. However, I am quite satisfied with the detection results but have you tried to compare the performance of a single class pedestrian detector and a multi-class detection in the "pedestrian" class?
Is there any difference in terms of AP?