Py-faster-rcnn: ResNet Implementation for Faster-rcnn

Created on 14 Jan 2016  ·  108Comments  ·  Source: rbgirshick/py-faster-rcnn

Recently, I'm trying to combine ResNet network with Faster-rcnn. As the first step, I tried to train a model with ResNet 34 networks without bottleneck architectures. There is no error during training process, however, the detection result is very bad. I believe there is something wrong in my implementation, here is the prototxt I used for training, can anybody offer some help about how should I modify it?

name: "ResNet34"
layer {
  name: 'input-data'
  type: 'Python'
  top: 'data'
  top: 'im_info'
  top: 'gt_boxes'
  python_param {
    module: 'roi_data_layer.layer'
    layer: 'RoIDataLayer'
    param_str: "'num_classes': 2"
  }
}

#conv1 7x7 64 /2
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 7
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv1_bn"
  type: "BatchNorm"
  bottom: "conv1"
  top: "conv1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv1_relu"
  type: "ReLU"
  bottom: "conv1_bn"
  top: "conv1_bn"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1_bn"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

#conv2_1 3x3 64
layer {
  name: "conv2_1_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_1_bn"
  type: "BatchNorm"
  bottom: "conv2_1_1"
  top: "conv2_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_1_relu"
  type: "ReLU"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_1_bn"
}
layer {
  name: "conv2_1_2"
  type: "Convolution"
  bottom: "conv2_1_1_bn"
  top: "conv2_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_1_2_bn"
  type: "BatchNorm"
  bottom: "conv2_1_2"
  top: "conv2_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_1_sum"
  type: "Eltwise"
  bottom: "pool1"
  bottom: "conv2_1_2_bn"
  top: "conv2_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_1_sum_relu"
  type: "ReLU"
  bottom: "conv2_1_sum"
  top: "conv2_1_sum"
}

#conv2_2 3x3 64
layer {
  name: "conv2_2_1"
  type: "Convolution"
  bottom: "conv2_1_sum"
  top: "conv2_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_1_bn"
  type: "BatchNorm"
  bottom: "conv2_2_1"
  top: "conv2_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_1_relu"
  type: "ReLU"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_1_bn"
}
layer {
  name: "conv2_2_2"
  type: "Convolution"
  bottom: "conv2_2_1_bn"
  top: "conv2_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_2_2_bn"
  type: "BatchNorm"
  bottom: "conv2_2_2"
  top: "conv2_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_2_sum"
  type: "Eltwise"
  bottom: "conv2_1_sum"
  bottom: "conv2_2_2_bn"
  top: "conv2_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_2_sum_relu"
  type: "ReLU"
  bottom: "conv2_2_sum"
  top: "conv2_2_sum"
}

#conv2_3 3x3 64
layer {
  name: "conv2_3_1"
  type: "Convolution"
  bottom: "conv2_2_sum"
  top: "conv2_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_1_bn"
  type: "BatchNorm"
  bottom: "conv2_3_1"
  top: "conv2_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_1_relu"
  type: "ReLU"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_1_bn"
}
layer {
  name: "conv2_3_2"
  type: "Convolution"
  bottom: "conv2_3_1_bn"
  top: "conv2_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_3_2_bn"
  type: "BatchNorm"
  bottom: "conv2_3_2"
  top: "conv2_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv2_3_sum"
  type: "Eltwise"
  bottom: "conv2_2_sum"
  bottom: "conv2_3_2_bn"
  top: "conv2_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv2_3_sum_relu"
  type: "ReLU"
  bottom: "conv2_3_sum"
  top: "conv2_3_sum"
}
layer {
  name: "conv2_proj"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv2_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv2_proj_bn"
  type: "BatchNorm"
  bottom: "conv2_proj"
  top: "conv2_proj_bn"
  batch_norm_param {
  }
}

#conv3_1 3x3 128
layer {
  name: "conv3_1_1"
  type: "Convolution"
  bottom: "conv2_3_sum"
  top: "conv3_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_1_bn"
  type: "BatchNorm"
  bottom: "conv3_1_1"
  top: "conv3_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_1_relu"
  type: "ReLU"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_1_bn"
}
layer {
  name: "conv3_1_2"
  type: "Convolution"
  bottom: "conv3_1_1_bn"
  top: "conv3_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_1_2_bn"
  type: "BatchNorm"
  bottom: "conv3_1_2"
  top: "conv3_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_1_sum"
  type: "Eltwise"
  bottom: "conv2_proj_bn"
  bottom: "conv3_1_2_bn"
  top: "conv3_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_1_sum_relu"
  type: "ReLU"
  bottom: "conv3_1_sum"
  top: "conv3_1_sum"
}

#conv3_2 3x3 128
layer {
  name: "conv3_2_1"
  type: "Convolution"
  bottom: "conv3_1_sum"
  top: "conv3_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_1_bn"
  type: "BatchNorm"
  bottom: "conv3_2_1"
  top: "conv3_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_1_relu"
  type: "ReLU"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_1_bn"
}
layer {
  name: "conv3_2_2"
  type: "Convolution"
  bottom: "conv3_2_1_bn"
  top: "conv3_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_2_2_bn"
  type: "BatchNorm"
  bottom: "conv3_2_2"
  top: "conv3_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_2_sum"
  type: "Eltwise"
  bottom: "conv3_1_sum"
  bottom: "conv3_2_2_bn"
  top: "conv3_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_2_sum_relu"
  type: "ReLU"
  bottom: "conv3_2_sum"
  top: "conv3_2_sum"
}

#conv3_3 3x3 128
layer {
  name: "conv3_3_1"
  type: "Convolution"
  bottom: "conv3_2_sum"
  top: "conv3_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_1_bn"
  type: "BatchNorm"
  bottom: "conv3_3_1"
  top: "conv3_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_1_relu"
  type: "ReLU"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_1_bn"
}
layer {
  name: "conv3_3_2"
  type: "Convolution"
  bottom: "conv3_3_1_bn"
  top: "conv3_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_3_2_bn"
  type: "BatchNorm"
  bottom: "conv3_3_2"
  top: "conv3_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_3_sum"
  type: "Eltwise"
  bottom: "conv3_2_sum"
  bottom: "conv3_3_2_bn"
  top: "conv3_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_3_sum_relu"
  type: "ReLU"
  bottom: "conv3_3_sum"
  top: "conv3_3_sum"
}

#conv3_4 3x3 128
layer {
  name: "conv3_4_1"
  type: "Convolution"
  bottom: "conv3_3_sum"
  top: "conv3_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_1_bn"
  type: "BatchNorm"
  bottom: "conv3_4_1"
  top: "conv3_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_1_relu"
  type: "ReLU"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_1_bn"
}
layer {
  name: "conv3_4_2"
  type: "Convolution"
  bottom: "conv3_4_1_bn"
  top: "conv3_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 128
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_4_2_bn"
  type: "BatchNorm"
  bottom: "conv3_4_2"
  top: "conv3_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv3_4_sum"
  type: "Eltwise"
  bottom: "conv3_3_sum"
  bottom: "conv3_4_2_bn"
  top: "conv3_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv3_4_sum_relu"
  type: "ReLU"
  bottom: "conv3_4_sum"
  top: "conv3_4_sum"
}
layer {
  name: "conv3_proj"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv3_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv3_proj_bn"
  type: "BatchNorm"
  bottom: "conv3_proj"
  top: "conv3_proj_bn"
  batch_norm_param {
  }
}

#conv4_1 3x3 256
layer {
  name: "conv4_1_1"
  type: "Convolution"
  bottom: "conv3_4_sum"
  top: "conv4_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_1_bn"
  type: "BatchNorm"
  bottom: "conv4_1_1"
  top: "conv4_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_1_relu"
  type: "ReLU"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_1_bn"
}
layer {
  name: "conv4_1_2"
  type: "Convolution"
  bottom: "conv4_1_1_bn"
  top: "conv4_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_1_2_bn"
  type: "BatchNorm"
  bottom: "conv4_1_2"
  top: "conv4_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_1_sum"
  type: "Eltwise"
  bottom: "conv3_proj_bn"
  bottom: "conv4_1_2_bn"
  top: "conv4_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_1_sum_relu"
  type: "ReLU"
  bottom: "conv4_1_sum"
  top: "conv4_1_sum"
}

#conv4_2 3x3 256
layer {
  name: "conv4_2_1"
  type: "Convolution"
  bottom: "conv4_1_sum"
  top: "conv4_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_1_bn"
  type: "BatchNorm"
  bottom: "conv4_2_1"
  top: "conv4_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_1_relu"
  type: "ReLU"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_1_bn"
}
layer {
  name: "conv4_2_2"
  type: "Convolution"
  bottom: "conv4_2_1_bn"
  top: "conv4_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_2_2_bn"
  type: "BatchNorm"
  bottom: "conv4_2_2"
  top: "conv4_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_2_sum"
  type: "Eltwise"
  bottom: "conv4_1_sum"
  bottom: "conv4_2_2_bn"
  top: "conv4_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_2_sum_relu"
  type: "ReLU"
  bottom: "conv4_2_sum"
  top: "conv4_2_sum"
}

#conv4_3 3x3 256
layer {
  name: "conv4_3_1"
  type: "Convolution"
  bottom: "conv4_2_sum"
  top: "conv4_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_1_bn"
  type: "BatchNorm"
  bottom: "conv4_3_1"
  top: "conv4_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_1_relu"
  type: "ReLU"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_1_bn"
}
layer {
  name: "conv4_3_2"
  type: "Convolution"
  bottom: "conv4_3_1_bn"
  top: "conv4_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_3_2_bn"
  type: "BatchNorm"
  bottom: "conv4_3_2"
  top: "conv4_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_3_sum"
  type: "Eltwise"
  bottom: "conv4_2_sum"
  bottom: "conv4_3_2_bn"
  top: "conv4_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_3_sum_relu"
  type: "ReLU"
  bottom: "conv4_3_sum"
  top: "conv4_3_sum"
}

#conv4_4 3x3 256
layer {
  name: "conv4_4_1"
  type: "Convolution"
  bottom: "conv4_3_sum"
  top: "conv4_4_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_1_bn"
  type: "BatchNorm"
  bottom: "conv4_4_1"
  top: "conv4_4_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_1_relu"
  type: "ReLU"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_1_bn"
}
layer {
  name: "conv4_4_2"
  type: "Convolution"
  bottom: "conv4_4_1_bn"
  top: "conv4_4_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_4_2_bn"
  type: "BatchNorm"
  bottom: "conv4_4_2"
  top: "conv4_4_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_4_sum"
  type: "Eltwise"
  bottom: "conv4_3_sum"
  bottom: "conv4_4_2_bn"
  top: "conv4_4_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_4_sum_relu"
  type: "ReLU"
  bottom: "conv4_4_sum"
  top: "conv4_4_sum"
}

#conv4_5 3x3 256
layer {
  name: "conv4_5_1"
  type: "Convolution"
  bottom: "conv4_4_sum"
  top: "conv4_5_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_1_bn"
  type: "BatchNorm"
  bottom: "conv4_5_1"
  top: "conv4_5_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_1_relu"
  type: "ReLU"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_1_bn"
}
layer {
  name: "conv4_5_2"
  type: "Convolution"
  bottom: "conv4_5_1_bn"
  top: "conv4_5_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_5_2_bn"
  type: "BatchNorm"
  bottom: "conv4_5_2"
  top: "conv4_5_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_5_sum"
  type: "Eltwise"
  bottom: "conv4_4_sum"
  bottom: "conv4_5_2_bn"
  top: "conv4_5_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_5_sum_relu"
  type: "ReLU"
  bottom: "conv4_5_sum"
  top: "conv4_5_sum"
}

#conv4_6 3x3 256
layer {
  name: "conv4_6_1"
  type: "Convolution"
  bottom: "conv4_5_sum"
  top: "conv4_6_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_1_bn"
  type: "BatchNorm"
  bottom: "conv4_6_1"
  top: "conv4_6_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_1_relu"
  type: "ReLU"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_1_bn"
}
layer {
  name: "conv4_6_2"
  type: "Convolution"
  bottom: "conv4_6_1_bn"
  top: "conv4_6_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 256
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_6_2_bn"
  type: "BatchNorm"
  bottom: "conv4_6_2"
  top: "conv4_6_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv4_6_sum"
  type: "Eltwise"
  bottom: "conv4_5_sum"
  bottom: "conv4_6_2_bn"
  top: "conv4_6_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv4_6_sum_relu"
  type: "ReLU"
  bottom: "conv4_6_sum"
  top: "conv4_6_sum"
}
layer {
  name: "conv4_proj"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv4_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 1
    pad: 0
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv4_proj_bn"
  type: "BatchNorm"
  bottom: "conv4_proj"
  top: "conv4_proj_bn"
  batch_norm_param {
  }
}

#conv5_1 3x3 512
layer {
  name: "conv5_1_1"
  type: "Convolution"
  bottom: "conv4_6_sum"
  top: "conv5_1_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 2
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_1_bn"
  type: "BatchNorm"
  bottom: "conv5_1_1"
  top: "conv5_1_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_1_relu"
  type: "ReLU"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_1_bn"
}
layer {
  name: "conv5_1_2"
  type: "Convolution"
  bottom: "conv5_1_1_bn"
  top: "conv5_1_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_1_2_bn"
  type: "BatchNorm"
  bottom: "conv5_1_2"
  top: "conv5_1_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_1_sum"
  type: "Eltwise"
  bottom: "conv4_proj_bn"
  bottom: "conv5_1_2_bn"
  top: "conv5_1_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_1_sum_relu"
  type: "ReLU"
  bottom: "conv5_1_sum"
  top: "conv5_1_sum"
}

#conv5_2 3x3 512
layer {
  name: "conv5_2_1"
  type: "Convolution"
  bottom: "conv5_1_sum"
  top: "conv5_2_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_1_bn"
  type: "BatchNorm"
  bottom: "conv5_2_1"
  top: "conv5_2_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_1_relu"
  type: "ReLU"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_1_bn"
}
layer {
  name: "conv5_2_2"
  type: "Convolution"
  bottom: "conv5_2_1_bn"
  top: "conv5_2_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_2_2_bn"
  type: "BatchNorm"
  bottom: "conv5_2_2"
  top: "conv5_2_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_2_sum"
  type: "Eltwise"
  bottom: "conv5_1_sum"
  bottom: "conv5_2_2_bn"
  top: "conv5_2_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_2_sum_relu"
  type: "ReLU"
  bottom: "conv5_2_sum"
  top: "conv5_2_sum"
}

#conv5_3 3x3 512
layer {
  name: "conv5_3_1"
  type: "Convolution"
  bottom: "conv5_2_sum"
  top: "conv5_3_1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_1_bn"
  type: "BatchNorm"
  bottom: "conv5_3_1"
  top: "conv5_3_1_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_1_relu"
  type: "ReLU"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_1_bn"
}
layer {
  name: "conv5_3_2"
  type: "Convolution"
  bottom: "conv5_3_1_bn"
  top: "conv5_3_2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
     lr_mult: 2
     decay_mult: 0
  }
  convolution_param {
    num_output: 512
    kernel_size: 3
    pad: 1
    stride: 1
    weight_filler {
      type: "msra"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "conv5_3_2_bn"
  type: "BatchNorm"
  bottom: "conv5_3_2"
  top: "conv5_3_2_bn"
  batch_norm_param {
  }
}
layer {
  name: "conv5_3_sum"
  type: "Eltwise"
  bottom: "conv5_2_sum"
  bottom: "conv5_3_2_bn"
  top: "conv5_3_sum"
  eltwise_param {
    operation: SUM
  }
}
layer {
  name: "conv5_3_sum_relu"
  type: "ReLU"
  bottom: "conv5_3_sum"
  top: "conv5_3_sum"
}

#========= RPN ============

layer {
  name: "rpn_conv/3x3"
  type: "Convolution"
  bottom: "conv5_3_sum"
  top: "rpn/output"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 512
    kernel_size: 3 pad: 1 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}
layer {
  name: "rpn_relu/3x3"
  type: "ReLU"
  bottom: "rpn/output"
  top: "rpn/output"
}

layer {
  name: "rpn_cls_score"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_cls_score"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 18   # 2(bg/fg) * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
  name: "rpn_bbox_pred"
  type: "Convolution"
  bottom: "rpn/output"
  top: "rpn_bbox_pred"
  param { lr_mult: 1.0 }
  param { lr_mult: 2.0 }
  convolution_param {
    num_output: 36   # 4 * 9(anchors)
    kernel_size: 1 pad: 0 stride: 1
    weight_filler { type: "gaussian" std: 0.01 }
    bias_filler { type: "constant" value: 0 }
  }
}

layer {
   bottom: "rpn_cls_score"
   top: "rpn_cls_score_reshape"
   name: "rpn_cls_score_reshape"
   type: "Reshape"
   reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
  name: 'rpn-data'
  type: 'Python'
  bottom: 'rpn_cls_score'
  bottom: 'gt_boxes'
  bottom: 'im_info'
  bottom: 'data'
  top: 'rpn_labels'
  top: 'rpn_bbox_targets'
  top: 'rpn_bbox_inside_weights'
  top: 'rpn_bbox_outside_weights'
  python_param {
    module: 'rpn.anchor_target_layer'
    layer: 'AnchorTargetLayer'
    param_str: "'feat_stride': 16"
  }
}

layer {
  name: "rpn_loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "rpn_cls_score_reshape"
  bottom: "rpn_labels"
  propagate_down: 1
  propagate_down: 0
  top: "rpn_cls_loss"
  loss_weight: 1
  loss_param {
    ignore_label: -1
    normalize: true
  }
}

layer {
  name: "rpn_loss_bbox"
  type: "SmoothL1Loss"
  bottom: "rpn_bbox_pred"
  bottom: "rpn_bbox_targets"
  bottom: 'rpn_bbox_inside_weights'
  bottom: 'rpn_bbox_outside_weights'
  top: "rpn_loss_bbox"
  loss_weight: 1
  smooth_l1_loss_param { sigma: 3.0 }
}

#========= RoI Proposal ============

layer {
  name: "rpn_cls_prob"
  type: "Softmax"
  bottom: "rpn_cls_score_reshape"
  top: "rpn_cls_prob"
}

layer {
  name: 'rpn_cls_prob_reshape'
  type: 'Reshape'
  bottom: 'rpn_cls_prob'
  top: 'rpn_cls_prob_reshape'
  reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
  name: 'proposal'
  type: 'Python'
  bottom: 'rpn_cls_prob_reshape'
  bottom: 'rpn_bbox_pred'
  bottom: 'im_info'
  top: 'rpn_rois'
#  top: 'rpn_scores'
  python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 16"
  }
}

#layer {
#  name: 'debug-data'
#  type: 'Python'
#  bottom: 'data'
#  bottom: 'rpn_rois'
#  bottom: 'rpn_scores'
#  python_param {
#    module: 'rpn.debug_layer'
#    layer: 'RPNDebugLayer'
#  }
#}

layer {
  name: 'roi-data'
  type: 'Python'
  bottom: 'rpn_rois'
  bottom: 'gt_boxes'
  top: 'rois'
  top: 'labels'
  top: 'bbox_targets'
  top: 'bbox_inside_weights'
  top: 'bbox_outside_weights'
  python_param {
    module: 'rpn.proposal_target_layer'
    layer: 'ProposalTargetLayer'
    param_str: "'num_classes': 2"
  }
}

#========= RCNN ============

layer {
  name: "roi_pool5"
  type: "ROIPooling"
  bottom: "conv5_3_sum"
  bottom: "rois"
  top: "pool5"
  roi_pooling_param {
    pooled_w: 7
    pooled_h: 7
    spatial_scale: 0.0625 # 1/16
  }
}
layer {
  name: "cls_score"
  type: "InnerProduct"
  bottom: "pool5"
  top: "cls_score"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "bbox_pred"
  type: "InnerProduct"
  bottom: "pool5"
  top: "bbox_pred"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 8
    weight_filler {
      type: "gaussian"
      std: 0.001
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "loss_cls"
  type: "SoftmaxWithLoss"
  bottom: "cls_score"
  bottom: "labels"
  propagate_down: 1
  propagate_down: 0
  top: "loss_cls"
  loss_weight: 1
}
layer {
  name: "loss_bbox"
  type: "SmoothL1Loss"
  bottom: "bbox_pred"
  bottom: "bbox_targets"
  bottom: "bbox_inside_weights"
  bottom: "bbox_outside_weights"
  top: "loss_bbox"
  loss_weight: 1
}

Most helpful comment

Attaching my train.prototxt. Solver is the same as VGG-16.
train.txt

All 108 comments

Hi @twtygqyy, what training error and validation error are you obtaining for your ResNet?

Hi @victorhcm I'm still working on it and I found it is not easy to train a Faster-rcnn model from scratch with ResNet. Fortunately, the author released the pre-trained models yesterday, you can download them from https://github.com/KaimingHe/deep-residual-networks to fine-tune a fater-rcnn model

Thank you, @twtygqyy!

@twtygqyy
actually 2 questions. is it straight forward to use BVLCNet instead of ZF or VGG and train Faster RCNN? i.e fine tune it with the earlier model weights?

the 2nd question is, have you achieved any success in finetuning ResNET? can you pls share your experience

@kshalini
For the first question, it is straight forward to change the networks and use pre-trained model for fine-tuning. The only thing you should modify is inserting the RPN layers before fully connected layers.

For the second question, in order to use ResNet model for fine-tuning, it is necessary to update the current caffe fork in py-faster-rcnn since it is using an old version without BN and scaling layers which ResNet requires.
I tried to update the caffe version and I'm currently training the faster-rcnn with ResNet model. However, I found that the training speed is very slow, maybe I made some mistakes. I will see whether I can get a good model soon.

I've just updated the caffe submodule to one rebased onto Caffe master as of this morning (commit 33f2445).

@rbgirshick Thanks!

@rbgirshick Thanks!

Was someone successful in training ResNet Faster-RCNN models? If so, could you upload the prototxt and solvers.
I'm unable to finetune for tasks that I had successes using ZF model

a naive question here. is there any comparative study on using plain resNet vs doing a pyfasterRCNN+resNet?

on the VOC datasets, the resNET-51 seems to give >80% accuracy already. in case we are dealing with lesser no. of object categories (say < 100), what would be better recommendation - just go with resNET (34, 51 etc.) or to go with resNET + pyFasterRCNN?

Hi @kshalini, do you need the localization of the objects? If that is the case, I think you should go with resNET + pyFasterRCNN, as resNET alone just provides the object class scores.

@victorhcm
yes localization is desirable. maybe not always, but in some cases yes. i get it now. thanks!

in order to train pyFasterRCNN with resNET, we also need the (resnet) train_val.prototxt which am not able to locate anywhere yet. only deploy.prototxt seems to be available. does someone have one? (maybe @twtygqyy ?)

@kshalini : Take a look at models/VGG/faster_rcnn_alt_opt/ . The solvers and train-val equivalents are there. For eg: in stage1_fast_rcnn_solver30k40k.pt the train prototxt is
train_net: "models/VGG16/faster_rcnn_alt_opt/stage1_fast_rcnn_train.pt"
for pyfasterRCNN with resNet, i think you have to change models and follow a similar format.

@kshalini Actually MSRA used ResNet with RPN layer from Faster-rcnn paper to do object detection on MSCOCO object detection challenge (80 categories), I'm not quite understand your question about the selection between ResNet or ResNet Faster-rcnn to deal with lesser no. of object categories.

@kshalini There is not region proposal networks in ResNet train_val prototxt, you have to insert RPN layers for object detection instead of only ResNet for classification.
About how to insert RPN layes, you can take the prototxt I shown at the beginning of this issuse for example and modify the ResNet network accoding to https://github.com/KaimingHe/deep-residual-networks to fine-tune a faster-rcnn model

@twtygqyy @siddharthm83 @victorhcm

thanks! actually i had managed to get a pyFasterRCNN trained by finetuning the VGGnet using the end2end method, about a month ago and the results were quite decent (>85% classification for my dataset with about 50 categories).

i now want to try this using resNET for classification. just that I am struggling to locate a train_val.prototxt for resNet. The Kaiming He, Github link just has the deploy.prototxt. I don't know yet how to modify that and make it into a train_val (i think some edits needs to go in at the top and bottom)

that's where I am looking for some help.

@kshalini
Please check the following link for a three-class train_val example
https://github.com/twtygqyy/deep-residual-networks/tree/master/prototxt/ResNet-50-3classes-train_val

@twtygqyy awesome!!! thanks a lot. will try this out shortly.

as a first step - to train as a simple Caffe model and then step-2, in combination with fasterRCNN, just so that I know if am getting anything wrong along the path.

are there any additional instructions to keep in mind while training? (like flip or other data augmentations like skew, scale etc.)

@kshalini ; i dont completely understand your question. I thought you wanted to do detection and hence asking your question here (faster-rcnn). Can you explain what your use case is?
If you only want classification, training resnet from scratch for imagenet would possibly need a ton of GPU memory and multi-gpu. If you just need to train for a smaller subset for classification, i would recommend fine tuning. See eg here: http://caffe.berkeleyvision.org/gathered/examples/finetune_flickr_style.html

@siddharthm83 my use case is to classify types of cars.

initially i tried basic finetuning (no rcnn) and got up to a certain level of accuracy (~70%). later i tried out faster-rcnn (finetuned vgg) and i got around ~80% classification accuracy.

now am exploring if i can use resNet for the same. so i want to try this out in steps - first without rcnn and then with faster-rcnn. I am comparing accuracy and speed of these approaches.

though the primary objective is classification, the reason i am interested in localization also is to analyze the failure cases (false +ves etc.) to understand where it is going wrong.

pls advice if i am missing something. thanks.

@kshalini , good problem to solve. Check Hinton's paper on dark knowledge where he talks about building specialist nets to classify datasets where the datasets can be easily confused (too similar).
http://arxiv.org/pdf/1503.02531v1.pdf

@kshalini How did you finetune the pyfasterrcnn with VGG16?

@arushk1
You can modify the 'num_classes' and 'num_output' in prototxt files from models/VGG16/faster_rcnn_end2end and use --weights data/imagenet_models/VGG16.v2.caffemodel option for finetuning

@arushk1, yes as @twtygqyy says - I did exactly the same. just make sure you get your path settings (to your data etc.) right. it should start showing signs of converging after sometime. good luck!

@twtygqyy

Are you able to replace VGG16 with Resent in py-faster-rcnn and use Kaiming's pretrained models to finetune py-faster-rcnn successfully?

@kaishijeng Yes, I did, just by inserting RPN layers after conv5 and I add one additional conv layer for size normalization. I'm not sure whether I did it in a right way but it seems work for me.

@twtygqyy

Which pretrained resnet model (50, 101 or 152) do you use? Do you mind sharing your trained model and train.prototxt? I like to compare performance with VGG16.

@kaishijeng
I fine-tuned ResNet50 on my own dataset with only 2 classes (1 for background) for test. Due to the memory issue, I deleted several blocks for each conv and only trained with 20,000 iterations to check whether it can work or not. Thus the performance cannot be compared with full ResNet + faster-rcnn.

@twtygqyy Why did you insert RPN layers after conv5 and not before conv5? see quote from paper bellow.

We compute the full-image shared conv feature maps using ... (conv1x, conv2x, conv3x, and conv4x ... These layers are shared by a region proposal network ... and a Fast R-CNN detection network. RoI pool-ing is performed before conv5_1. ... all layers of conv5x and up are adopted for each region.

@SilverWaveGL Thanks for pointing out that, it should be like the quote you mentioned for reproducing the result in the paper. In my case, just for a quick test, I removed several repeated blocks in early convs since I do not have enough memory on my machine for a full ResNet faster-rcnn training.

@twtygqyy So, for finetuning, I use the faster_rcnn_end2end.sh script? with --weights?

@twtygqyy @SilverWaveGL @kaishijeng, you have to insert the RPN and ROI pooling before Conv5x (I inserted it right after res4f). In addition, you need to make the pooled_w & pooled_h params in the ROIpooling layer to equal 14 (since in the Resnet architecture, the output size of conv4x is 14x14; in VGG-16 it was 7x7)

I just finished training with Resnet-50 and faster RCNN on VOC2007 trainval and here is my final loss. I can't run the test script since I don't have matlab installed on my ubuntubox yet. Will update when i find the mAP.
I0217 09:16:19.035871 2471 solver.cpp:229] Iteration 69980, loss = 0.616664
I0217 09:16:19.035902 2471 solver.cpp:245] Train net output #0: loss_bbox = 0.33329 (* 1 = 0.33329 loss)
I0217 09:16:19.035909 2471 solver.cpp:245] Train net output #1: loss_cls = 0.309969 (* 1 = 0.309969 loss)
I0217 09:16:19.035914 2471 solver.cpp:245] Train net output #2: rpn_cls_loss = 0.0435722 (* 1 = 0.0435722 loss)
I0217 09:16:19.035920 2471 solver.cpp:245] Train net output #3: rpn_loss_bbox = 0.0399682 (* 1 = 0.0399682 loss)

@siddharthm83

Can you share your solver.prototxt, train.prototxt and test.prototxt?

Attaching my train.prototxt. Solver is the same as VGG-16.
train.txt

@siddharthm83 Thanks, Super! How much memory you used for such a full network training? 4G of memory is enough for VGG+faster-rcnn (with CuDNN), but for ResNet50_faster-rcnn, it seems not enough.

@arushk1 yes, training can be started like this:
./tools/train_net.py --gpu 1 --weights data/imagenet_models/ResNet-50-model.caffemodel --imdb imagenet_train --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/ResNet-50/faster_rcnn_end2end/solver.prototxt

@twtygqyy @siddharthm83

i tried the link for resnet-50 (by @twtygqyy ), and did a finetuning on my 30-class dataset.

also i just tried plain resNET (no faster rcnn) as I wanted to try something quick first, and since I am new to resNet. and i ran only 10K iterations

everything ran smoothly. the Loss started from around 3.5 and went down gradually to .000xx levels. I had kept the initial learning rate at .001 and reduced by factor of 10, after 7.5K iterations.

but during classification, i get all equal values for all the classes. not sure what I am missing. can you pls help.

@janakipj Pure classification with ResNet, right? Not sure about what's wrong with your testing. For fine-tuning, did you changed the number of class together with the name of fully connected layer for training and testing? What's the validation accuracy during your training? The training loss seems correct according to your description.

yes @twtygqyy , pure classification experiment (1st steps) with ResNet.

yes I changed the num_output in the final layer to 30. In your train_val, the last layer is fc3 with 3 classes so i set it to 30 in my case. that's all.

and at the top, i set it to use the mean_image_file instead of the channel wise mean pixel values. from your train_val. And I also commented out the "mirror = true".

I tried testing the accuracy of my 10k iterations snapshot and it gave me 97%.

For testing, I use the net.forward (standard python code) and try to interpret the output of the last layer using the key "fc3" which is the last layer in your trainval. And I get all equal values.

should i do anything with the bias or use "xavier" for weight filler.. can you guide me here?

@janakipj
Try

image = caffe.io.load_image(image_path)                      
prediction = net.predict([image])
top_1 = net.blobs['prob'].data[0].flatten().argsort()[-1:-2:-1] 

with the deploy prototxt

@twtygqyy thanks. I can see one big gap in what I am doing but I would need your help / input.

I am using the deploy.prototxt of KaimingHe's ResNet-50 from here: https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt

Since you only shared the train_val and solver in your repo, I took the deploy from the other one. I had earlier deployed using Kaiming's prototxt along with his trained model and it worked fine. I assumed it would work for this ResNet-50 also

If you can please share your deploy.prototxt for your corresponding Train_val (3 class) - i think that would make it simpler?

thanks @twtygqyy !

just tried and it works!

am now going to be running the detailed test for my dataset to benchmark, get mAP etc. will report back here how that goes.

So, cleared first hurdle. next step would be to try this with pyFaster-rcNN.

Can you please suggest the right set of prototxts for those also (train and deploy) ?

To run ResNet with fasterrcnn, I need to train the resnet for classification as it is given in the KaimingHe's repo (with 21 classes) and then train the py-faster-rcnn on that model according to the instructions given above (also with 21 classes). Right?

What if I want to train py-faster-rcnn for classes that are not one of the original 21 classes? Do I have to retrain the resnet model for those classes? or can I directly finetune the rcnn-resnet model I got from the steps above?

@siddharthm83 Good catch with the 14x14!
Can you please attach your deploy.txt

@twtygqyy thanks once again!
i hit a pretty decent accuracy of close to 83% on my custom dataset. quite happy with the detection as such. now i want to definitely try out pyFaster-rCNN along with resNET.

@siddharthm83, i'd like to try your train.txt posted here for my training, but i am not sure about the deploy.prototxt to test it. since i ran into issues with my deploy.proto for the plain resNET, i certainly am not so good at handcrafting the deploy from the train_val.

if you can please share that it would be helpful. (either of you).

@arushk1 Just fine-tune the model is fine

@SilverWaveGL @janakipj
You can modify your own deploy like this
test.txt

@twtygqyy

I use siddharthm83 deploy.txt to train py-faster-rcnn with VOC2007 which has 20 classes.

Do I need to change your test.txt for test/deploy purpose?

@kaishijeng Yes, since mine is a 2-class deploy txt.

@twtygqyy https://github.com/twtygqyy

Which parameters need to change for 20 classes in your test.txt?

On Thu, Feb 18, 2016 at 3:49 PM, Jiu XU [email protected] wrote:

@kaishijeng https://github.com/kaishijeng Yes, since mine is a 2-class
deploy txt.


Reply to this email directly or view it on GitHub
https://github.com/rbgirshick/py-faster-rcnn/issues/62#issuecomment-185983057
.

@siddharthm83,

I have a trained model based on your train.txt and like to try it with test. Since I didn't see response from twtyggyy, do you mind sharing your test.txt?

Thanks

@kaishikeng. I'm on travel, will have to wait till Monday. Just remove the loss layers in the end. Check the vgg16 test prototxt and follow similar method if you want it before then.

@siddharthm83,

OK, I will try your suggestion to remove the loss layers from train.txt

Thanks

@siddharthm83

I am able to create test.txt and use it in tools/demo.py to test my trained
model. Seem to work properly for test images.

Thanks,

On Fri, Feb 19, 2016 at 10:44 AM, siddharthm83 [email protected]
wrote:

@kaishikeng. I'm on travel, will have to wait till Monday. Just remove the
loss layers in the end. Check the vgg16 test prototxt and follow similar
method if you want it before then.


Reply to this email directly or view it on GitHub
https://github.com/rbgirshick/py-faster-rcnn/issues/62#issuecomment-186356385
.

@siddharthm83 Sorry for the late reply, good to hear you solve the problem.

I think we can close this issue now, thanks all for joining and commenting on this issue.

@twtygqyy @siddharthm83 can anyone of you share mAP and loss after training. I am trying to train on COCO dataset but the loss is oscillating and doesn't seem to converge even after recommended number of iterations

@twtygqyy @siddharthm83 - I was able to successfully implement ResNet-50 model in Faster RCNN network. However when I use the same principles to implement ResNet-152 model, I get the below error in res5a layer. I have attached the error log and my train.protxt file. Really appreciate any help that I can get here.

ERROR:
I0314 18:29:47.374686 16592 net.cpp:397] scale5a_branch2c -> res5a_branch2c (in-place)
I0314 18:29:47.374725 16592 layer_factory.hpp:77] Creating layer scale5a_branch2c
I0314 18:29:47.374860 16592 net.cpp:150] Setting up scale5a_branch2c
I0314 18:29:47.374871 16592 net.cpp:157] Top shape: 1 2048 19 32 (1245184)
I0314 18:29:47.374876 16592 net.cpp:165] Memory required for data: 4223885008
I0314 18:29:47.374883 16592 layer_factory.hpp:77] Creating layer res5a
I0314 18:29:47.374891 16592 net.cpp:106] Creating Layer res5a
I0314 18:29:47.374897 16592 net.cpp:454] res5a <- res5a_branch1
I0314 18:29:47.374903 16592 net.cpp:454] res5a <- res5a_branch2c
I0314 18:29:47.374909 16592 net.cpp:411] res5a -> res5a
F0314 18:29:47.374917 16592 eltwise_layer.cpp:34] Check failed: bottom[i]->shape() == bottom[0]->shape()
* Check failure stack trace: *
Aborted (core dumped)

errlog.txt
trainprototxt.txt

@abhirevan It seems that you removed several resnet blocks in conv4, this should be fine. Apparently there is something wrong with the size, please check the bottom layer of layer "res5a_branch2a".

@kshitid20 I'm sorry I haven't trained full ResNet Faster-rcnn on MSCOCO due to memory problem. I think you can change your learning rate to see if it's gonna work.

@twtygqyy @siddharthm83- Thanks for the tip, I was able to fix it. Were you able to run faster-rcnn using the alt_opt algorithm using resnet?

I am trying to train my own dataset using resnet+py-faster-rcnn (using @siddharthm83 train.txt). I am getting the following error.

I0321 07:29:44.037149 1892 solver.cpp:60] Solver scaffolding done.
Loading pretrained model weights from data/imagenet_models/resnet.caffemodel
I0321 07:29:44.240974 1892 net.cpp:816] Ignoring source layer fc1000
I0321 07:29:44.241065 1892 net.cpp:816] Ignoring source layer prob
Solving...
F0321 07:29:45.412804 1892 syncedmem.cpp:56] Check failed: error == cudaSuccess (2 vs. 0) out of memory
* Check failure stack trace: *
Aborted (core dumped)

I am using AWS instance. I was able to train resnet-50 (without fast-rcnn) using the same instance with same dataset. But when I tried using py-faster-rcnn, I am getting this error. I know this error could possibly be due to insufficient memory. So I changed the batch size in deploy.prototxt (iter_size: 1). But still I am getting the error. Can someone help me out?

@abhirevan I haven't tried that with resnet (tested with VGG and ZF beforece). Why did you want to use alt_algothm? For extracting the region proposals? I think current end-to-end training could be much more efficient.

@hoticevijay The memory of Grid 520 GPU is not enough for faster-rcnn with resnet-50 since more memory is required for RPN block in faster-rcnn. If you still want to test resnet with py-faster-rcnn, remove several blocks in conv2~5 and have a try. (I was able to train by doing that, but the result is not as good as VGG16)

@twtygqyy Thanks for your insight. I will implement your suggestion.

@twtygqyy - As per the paper alt_opt algorithm gives better performance. I experienced that when testing with VGGNET where the numbers were pretty similar, but the result bounding boxes were much more tighter and better. I tried to implement a similar configuration to VGGNET for RESNET-50 , but found it difficult to implement the dummy layers at the end especially for stage(1/2)_fast_rcnn_train.pt configuration files.

@twtygqyy: I am now able to start the training using your resnet-34 implementation. Can you tell me the average number of iterations it took for the network to converge for you? My data set has 30 classes and around 80 images per class.

@abhirevan @siddharthm83 In your training prototxt you should set for each BatchNorm layer "lr_mult=0" depending on the number of the layer's blobs.

please read link

@SilverWaveGL Don't mean to revive this thread. But as per the paper:
For the usage of BN layers, after pre-training, we compute the BN statistics (means and variances) for each layer on the ImageNet training set. Then the BN layers are fixed during fine-tuning for object detection. As such, the BN layers become linear activations with constant offsets and scales, and BN statistics are not updated by fine-tuning. We fix the BN layers mainly for reducing memory consumption in Faster R-CNN training.
I am not sure if this is related to lr_mult. Perhaps that is why the prototxt file still works for Faster RCNN finetuning? Did you observe any changes after adding the lr_mult ?

@siddharthm83 @twtygqyy I modified the ResNet prototxt file to have the ROI proposals and was able to fine tune it successfully in 50K iterations on val set
Train net output #3: rpn_loss_bbox = 0.0179207 (* 1 = 0.0179207 loss)
Now I am trying to use the final caffe model along with the deploy.prototxt for ResNet modified as a test prototxt (removing lr params, top layer and appending the input data layer). However I keep getting this error.
File "./lib/rpn/anchor_target_layer.py", line 116, in forward (all_anchors[:, 2] < im_info[0][1] ) & # width ValueError: operands could not be broadcast together with shapes (17100,) (600,800)

I printed a few debug statements but was unsuccessful in trouble shooting. Any help would be appreciated.

@siddharthm83 Did your test go through for ResNet after the training step? I added the extra layers for test.prototxt but my testing does not go through.

Shouldn't the feat_stride and scale of ROI_pooling layer be changed since now that the receptive field of the layer before the RPN net is different from the VGG net?

@danfeiX What should it be ?

@nrajani never mind, the receptive field of res4f is exactly same as conv5_3 in VGG

So I followed this https://github.com/rbgirshick/py-faster-rcnn/issues/122 and changed pooling to 7 X 7 but I get this error now:
Check failed: shape[i] >= 0 (-2 vs. 0)

I am looking into using ResNet with Faster-RCNN as well, but can someone post the results they got with this net? Preferably mAP on VOC2007 and time necessary per image. I know for VGG16 it runs in approximately 200msec per image, how much time does Faster-RCNN with ResNet take?

Hi~ @twtygqyy
Thanks to @siddharthm83 's training prototxt, I am now attempting to implement a detection baseline on PASCAL VOC 2007 dataset for 20 categories, using ResNet-50 + py-fast-rcnn. I've successfully trained some *.caffemodel. However, when I run the test_net.py(referring to @twtygqyy 's prototxt, but I don't take 'im_info' as an input, instead, my input is 'rois' because I am not using RPN proposals), an error occurs:

I0505 08:44:00.312559 10669 net.cpp:270] This network produces output bbox_pred
I0505 08:44:00.312577 10669 net.cpp:270] This network produces output cls_prob
I0505 08:44:00.312784 10669 net.cpp:283] Network initialization done.
I0505 08:44:00.426910 10669 net.cpp:816] Ignoring source layer data
I0505 08:44:00.443357 10669 net.cpp:816] Ignoring source layer loss_cls
I0505 08:44:00.443397 10669 net.cpp:816] Ignoring source layer loss_bbox
voc_2007_test ss roidb loaded from /home/wuwenxi/wenxi/Resnet-in-faster-rcnn/data/cache/voc_2007_test_selective_search_roidb.pkl
F0505 08:44:01.302139 10669 syncedmem.cpp:56] Check failed: error == cudaSuccess (2 vs. 0) out of memory
* Check failure stack trace: *
Aborted (core dumped)

Could you please give me some advice? thx a lot.

@ngmanhei Are you using other objectness measures instead of RPN for bbox candidates? It should be fine, like original fast-rcnn.
Not sure about the error you mentioned. According to my experience, Check failed: error == cudaSuccess (2 vs. 0) out of memory usually casused by lacking of memory. Did you install caffe with CUDNN?

I'm running a ResNet Faster-RCNN training now, seems like it requires at least 7gb of memory (using cudnn). How much do you have available?

I used pretrained VGG16 model on imagenet and then I tested it on validation. When I evaluate the output using the imagenet demo script, I get all zeros for all classes as AP. I am not sure why that is happening I even tried increasing the threshold to 0.5 but I still get the same result. Any help would be really appreciated.

I created a train.prototxt for ResNet101 with Faster RCNN.
(https://gist.github.com/hgaiser/c6616ade16ff920bd55509520fb1386d) in which I also added DropOut layers near the end. However I am still experiencing the following:

I have a custom dataset which was used to train with VGG16. The training set is recorded in a controlled environment (black background) while the test set is more "in the wild". The resulting mAP on our testset is around 0.82 so I wanted to try ResNet101 to see if this improves result. After training ResNet101 using the above train.prototxt I get a score of roughly 0.3. However, my loss does decrease (to about 0.03) and mAP on a small piece of the training set on which wasn't trained was about 0.999. Did my network somehow overfit on my training? How do I get it to generalize better? As can be seen in the prototxt I tried adding DropOut layers, which appeared to little effect. Do I simply need more varying data? I am looking for any tips or hints :)

@siddharthm83 I used train.prototxt file for training. After that I maked test.prototxt file just like u said removing 'loss' layer but I got "Unknown bottom blob 'rois' (layer 'roi_pool5', bottom index 1)" error when I tried command "net = caffe.Net(prototxt, caffemodel, caffe.TEST)".
Can u share ur text.prototxt file ??

@kaishijeng
Thank u in advance.

I'm training faster-rcnn with ResNet-50 layers on ms coco, train.prototxt provided by @siddharthm83. However, I'm getting loss_bbox = 0 and loss_cls = 0. Any insight would be appreciated.


I0531 13:21:11.898075 32392 solver.cpp:229] Iteration 1860, loss = 0.474211

I0531 13:21:11.898149 32392 solver.cpp:245] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0531 13:21:11.898160 32392 solver.cpp:245] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

I0531 13:21:11.898170 32392 solver.cpp:245] Train net output #2: rpn_cls_loss = 0.501028 (* 1 = 0.501028 loss)

I0531 13:21:11.898180 32392 solver.cpp:245] Train net output #3: rpn_loss_bbox = 0.286518 (* 1 = 0.286518 loss)

@liuchang8am , I am not sure what you are doing wrong. Hard to tell from the 4 lines you have pasted. Is your classifier loss 0 from the first iteration, or gradually converges to 0?
I have not trained it on mscoco, so I am not sure.

@siddharthm83 The loss_bbox is 0 since the first iteration. I test the trained model, and it can't detect any objects in the pre-defined categories. Full experiment log is attached, hope it shows my error somewhere.
log.txt

@siddharthm83 I also followed a similar approach when merging resNet and py faster rcnn, however can you please explain me the reason why you removed the ( fc -relu - dropout ) layers and added a conv layer between rpn - ROI and rcnn modules

@siddharthm83 I cannot think exactly why the output size of conv5_3 in VGG16 is 7 * 7. For example, if the size of input image is 600 * 1000,shouldn't it be 39*64 ?

In order to get a pretrained resnet model for PASCAL VOC dataset (since py-faster-rcnn needs a pretrained model), should I train the Images after cropping the images based on annotations or should I train using whole images?

@kristellmarisse You should use the ResNet model trained on Imagenet and train on whole images without cropping so as to generate negative samples.

@abhirevan, when you got resnet-50 working with faster-rcnn, which dataset did you train on. I want to train on VOC being with. I can find imageNet models trained with resnet-50. should I first fine-tune imagenet for VOC (using plain resnet-50) and then use that model to train the rcnn? Or can I start the rcnn training with the imageNet model itself?

@SilverWaveGL set the lr_mult and decay_mult to be zero in training stage? or in testing stage? can you mind explaining it? thanks.

@siddharthm83 ,can you help me solve this problem?Thank you

345

@janakipj you mentioned "For testing, I use the net.forward (standard python code) and try to interpret the output of the last layer using the key "fc3" which is the last layer in your trainval. And I get all equal values". Do you find why the values all equal ? I met same

i am doing work on ResNet model for fixation prediction,i deploy ResNet model for one image but after input layer data is not feeding to the model?
Any one can guide me

@twtygqyy
Just one quick question about the "param_str: "'feat_stride': 16" and "spatial_scale: 0.0675#1/16".
It is related to how many strides you totally made for final layer before rpn-layer. I thought it is 2**5=32 by searching "stride: 2" in the training file.
Is there anyone that can provide some clues about setting up "feat_stride" and "spatial_scale"? It should be modified from network to network, right?

Hi, everyone, I have release a Implementation of ResNet-101 based Faster-RCNN, which also used OHEM (Online Hard Example Mining) while training, you can find the code and model weights here

@abhirevan @kshalini @twtygqyy @siddharthm83 @rbgirshick

I'm trying to train the ResNet-50 model on PASCAL VOC 2007 trainval dataset. I'm using this command to start the training

./tools/train_net.py --gpu 1 --weights data/imagenet_models/ResNet-50-model.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/ResNet-50/faster_rcnn_end2end/solver.prototxt

I'm using the solver/train prototxt files from @twtygqyy repo

However, I'm getting this error:

Normalizing targets done WARNING: Logging before InitGoogleLogging() is written to STDERR I0222 13:59:58.538053 23076 solver.cpp:54] Initializing solver from parameters: test_iter: 100 test_interval: 1000 base_lr: 0.0001 display: 100 max_iter: 200000 lr_policy: "multistep" gamma: 0.1 momentum: 0.9 weight_decay: 0.0001 stepsize: 20000 snapshot: 10000
snapshot_prefix: "resnet50_train" solver_mode: GPU
net: "models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt"
test_initialization: false
I0222 13:59:58.538121 23076 solver.cpp:96] Creating training net from net file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
[libprotobuf ERROR google/protobuf/text_format.cc:245] Error parsing text-format caffe.NetParameter: 74:26: Message type "caffe.LayerParameter" has no field named "batch_norm_param".
F0222 13:59:58.538242 23076 upgrade_proto.cpp:928] Check failed: ReadProtoFromTextFile(param_file, param) Failed to parse NetParameter file: models/ResNet-50/faster_rcnn_end2end/ResNet-50-train_val.prototxt
*** Check failure stack trace: *** Aborted (core dumped)

I'm on the latest commit of faster-rcnn branch of caffe-fast-rcnn

Pardon my lack of knowledge, but would you guys mind helping me resolve this error, please? Appreciate it. Thanks.

That's a weird error, the error that it is giving is that the LayerParameter type has no field named batch_norm_param, but even upstream Caffe has that field..

Does VGG16 work properly then?

@hgaiser

Does VGG16 work properly then?

No, I suppose not. I'm able to train and test VGG_CNN_M_1024 model successfully. However, the loss_cls and loss_bbox seems to remain 0 until the last iteration. I don't know whether that is relevant or not, in this case.

./tools/train_net.py --gpu 1 --weights data/imagenet_models/VGG_CNN_M_1024.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt

Anyways, this is the output snippet from my VGG_CNN_M_1024 training:

Train net output #0: loss_bbox = 0 (* 1 = 0 loss)
I0223 14:28:28.146657 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)
I0223 14:28:28.146667 3834 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.121718 (* 1 = 0.121718 loss)

I0223 14:28:28.146678 3834 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.11191 (* 1 = 0.11191 loss)

I0223 14:28:28.146688 3834 solver.cpp:571] Iteration 100, lr = 0.001
I0223 14:28:30.319339 3834 solver.cpp:242] Iteration 120, loss = 0.286128

I0223 14:28:30.319385 3834 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:28:30.319396 3834 solver.cpp:258] Train net output #1: loss_cls = 0 (* 1 = 0 loss)

#
  • On the other hand, I'm getting the out of memory error in case of VGG16.

./tools/train_net.py --gpu 0 --weights data/imagenet_models/VGG16.v2.caffemodel --imdb voc_2007_trainval --cfg experiments/cfgs/faster_rcnn_end2end.yml --solver models/pascal_voc/VGG16/faster_rcnn_end2end/solver.prototxt

Iteration 0, loss = 3.53162
I0223 14:26:47.963392 3791 solver.cpp:258] Train net output #0: loss_bbox = 0 (* 1 = 0 loss)

I0223 14:26:47.963405 3791 solver.cpp:258] Train net output #1: loss_cls = 1.98189 (* 1 = 1.98189 loss)

I0223 14:26:47.963416 3791 solver.cpp:258] Train net output #2: rpn_cls_loss = 0.79846 (* 1 = 0.79846 loss)

I0223 14:26:47.963426 3791 solver.cpp:258] Train net output #3: rpn_loss_bbox = 0.690695 (* 1 = 0.690695 loss)

I0223 14:26:47.963438 3791 solver.cpp:571] Iteration 0, lr = 0.001
F0223 14:26:47.970721 3791 syncedmem.cpp:58] Check failed: error == cudaSuccess (2 vs. 0) out of memory
*** Check failure stack trace: ***
Aborted (core dumped)

FYI, I'm using caffe-fast-rcnn from @rbgirshick repo and I think it does not have a batch_norm_param field.

Thanks for reaching out, though. Let me know if you need any other information. Thank you very much.

@kshalini, how did you find the accuracy value for pyFasterRCNN trained by finetuning the VGGnet using the end2end method? What additional codes you used to get accuracy value?

@onkarganjewar ,hi,why do you remove the BN from the original ResNet? I.m confused

Hi everyone, I open source my implementation of Faster RCNN with ResNet for ImageNet Detection here. Hope it is useful for someone. Thanks!

@tianzhi0549
I have used Resnet resnet101_faster_rcnn_bn_scale_merged_end2end_iter_70000.caffemodel pretrained weight tomake a pretrained model at 0th epoch and finetuned on it.
resnetTrain.txt
screen shot 2017-10-29 at 11 48 33 am

On testing, the mAP is very low.Also detecting only class with bigger boundingboxes correctly in majority cases.Also has issue with overlapping cases.Can you please help me to trigger the issue.

Thanks in advance.

screen shot 2017-10-29 at 11 48 33 am

@twtygqyy Could you please give me your resnet files for faster rcnn?i saw someone find your train.prototxt and test.protxt,but i can not find them.I try to modify my .prototxt file,but failed.Thank you very much!!!

Have found the reason of training very slow?I met the same problem. @twtygqyy

@sulth something wrong with the method of calculate the AP, you need to implement your owns.

@siddharthm83 Can we remain pooled_w & pooled_h params in the ROIpooling layer to equal 7 and just set spatio_scale=1/8 instead of spatio_scale=1/16?

Hi I am trying to train Resnet-18 I have used train.prototxt as -

name: "ResNet-18"

layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 21"
}
}

layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "conv1_relu"
type: "ReLU"
}

layer {
bottom: "conv1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
kernel_size: 3
stride: 2
pool: MAX
}
}

layer {
bottom: "pool1"
top: "res2a_branch1"
name: "res2a_branch1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "bn2a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "scale2a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "pool1"
top: "res2a_branch2a"
name: "res2a_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "bn2a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "scale2a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "res2a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2b"
name: "res2a_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "bn2a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "scale2a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch1"
bottom: "res2a_branch2b"
top: "res2a"
name: "res2a"
type: "Eltwise"
}

layer {
bottom: "res2a"
top: "res2a"
name: "res2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a"
top: "res2b_branch2a"
name: "res2b_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "bn2b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "scale2b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "res2b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2b"
name: "res2b_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "bn2b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "scale2b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a"
bottom: "res2b_branch2b"
top: "res2b"
name: "res2b"
type: "Eltwise"
}

layer {
bottom: "res2b"
top: "res2b"
name: "res2b_relu"
type: "ReLU"
}

layer {
bottom: "res2b"
top: "res3a_branch1"
name: "res3a_branch1"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "bn3a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "scale3a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b"
top: "res3a_branch2a"
name: "res3a_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "bn3a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "scale3a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "res3a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2b"
name: "res3a_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "bn3a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "scale3a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch1"
bottom: "res3a_branch2b"
top: "res3a"
name: "res3a"
type: "Eltwise"
}

layer {
bottom: "res3a"
top: "res3a"
name: "res3a_relu"
type: "ReLU"
}

layer {
bottom: "res3a"
top: "res3b_branch2a"
name: "res3b_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "bn3b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "scale3b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "res3b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2b"
name: "res3b_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "bn3b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "scale3b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a"
bottom: "res3b_branch2b"
top: "res3b"
name: "res3b"
type: "Eltwise"
}

layer {
bottom: "res3b"
top: "res3b"
name: "res3b_relu"
type: "ReLU"
}

layer {
bottom: "res3b"
top: "res4a_branch1"
name: "res4a_branch1"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "bn4a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "scale4a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b"
top: "res4a_branch2a"
name: "res4a_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "bn4a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "scale4a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "res4a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2b"
name: "res4a_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "bn4a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "scale4a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch1"
bottom: "res4a_branch2b"
top: "res4a"
name: "res4a"
type: "Eltwise"
}

layer {
bottom: "res4a"
top: "res4a"
name: "res4a_relu"
type: "ReLU"
}

layer {
bottom: "res4a"
top: "res4b_branch2a"
name: "res4b_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "bn4b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "scale4b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "res4b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2b"
name: "res4b_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "bn4b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "scale4b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a"
bottom: "res4b_branch2b"
top: "res4b"
name: "res4b"
type: "Eltwise"
}

layer {
bottom: "res4b"
top: "res4b"
name: "res4b_relu"
type: "ReLU"
}

Add RPN network

layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "res4b"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}

layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}

layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}

layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}

layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'

top: 'rpn_scores'

python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 16"
}

}

layer {

name: 'debug-data'

type: 'Python'

bottom: 'data'

bottom: 'rpn_rois'

bottom: 'rpn_scores'

python_param {

module: 'rpn.debug_layer'

layer: 'RPNDebugLayer'

}

}

layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'rpn.proposal_target_layer'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 21"
}
}

Stop Resnet till conv4x and add ROIpooling

layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "res4b"
bottom: "rois"
top: "roipool5"
roi_pooling_param {
pooled_w: 14
pooled_h: 14
spatial_scale: 0.0625 # 1/16
}
}

Resume Resnet conv5x layers after ROIpooling layers

layer {
bottom: "roipool5"
top: "res5a_branch1"
name: "res5a_branch1"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "bn5a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "scale5a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "roipool5"
top: "res5a_branch2a"
name: "res5a_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "bn5a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "scale5a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "res5a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2b"
name: "res5a_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "bn5a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "scale5a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch1"
bottom: "res5a_branch2b"
top: "res5a"
name: "res5a"
type: "Eltwise"
}

layer {
bottom: "res5a"
top: "res5a"
name: "res5a_relu"
type: "ReLU"
}

layer {
bottom: "res5a"
top: "res5b_branch2a"
name: "res5b_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "bn5b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "scale5b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "res5b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2b"
name: "res5b_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "bn5b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "scale5b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a"
bottom: "res5b_branch2b"
top: "res5b"
name: "res5b"
type: "Eltwise"
}

layer {
bottom: "res5b"
top: "res5b"
name: "res5b_relu"
type: "ReLU"
}

layer {
bottom: "res5b"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
kernel_size: 7
stride: 1
pool: AVE
}
}

### Add faster RCNN cls and bbox layer

layer {
name: "cls_score_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "cls_score_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 21
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "bbox_pred_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "bbox_pred_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 84
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score_uefa"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "loss_cls"
loss_weight: 1
}

layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred_uefa"
bottom: "bbox_targets"
bottom: "bbox_inside_weights"
bottom: "bbox_outside_weights"
top: "loss_bbox"
loss_weight: 1
}

And the test.prototxt file is -

name: "ResNet-18"

input: "data"
input_shape {
dim: 1
dim: 3
dim: 1280
dim: 720
}

input: "im_info"
input_shape {
dim: 1
dim: 3
}

layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "conv1"
top: "conv1"
name: "conv1_relu"
type: "ReLU"
}

layer {
bottom: "conv1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
kernel_size: 3
stride: 2
pool: MAX
}
}

layer {
bottom: "pool1"
top: "res2a_branch1"
name: "res2a_branch1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 1
pad: 0
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "bn2a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch1"
top: "res2a_branch1"
name: "scale2a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "pool1"
top: "res2a_branch2a"
name: "res2a_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "bn2a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "scale2a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2a"
name: "res2a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a_branch2a"
top: "res2a_branch2b"
name: "res2a_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "bn2a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2a_branch2b"
top: "res2a_branch2b"
name: "scale2a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a_branch1"
bottom: "res2a_branch2b"
top: "res2a"
name: "res2a"
type: "Eltwise"
}

layer {
bottom: "res2a"
top: "res2a"
name: "res2a_relu"
type: "ReLU"
}

layer {
bottom: "res2a"
top: "res2b_branch2a"
name: "res2b_branch2a"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "bn2b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "scale2b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2a"
name: "res2b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res2b_branch2a"
top: "res2b_branch2b"
name: "res2b_branch2b"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "bn2b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res2b_branch2b"
top: "res2b_branch2b"
name: "scale2b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2a"
bottom: "res2b_branch2b"
top: "res2b"
name: "res2b"
type: "Eltwise"
}

layer {
bottom: "res2b"
top: "res2b"
name: "res2b_relu"
type: "ReLU"
}

layer {
bottom: "res2b"
top: "res3a_branch1"
name: "res3a_branch1"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "bn3a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch1"
top: "res3a_branch1"
name: "scale3a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res2b"
top: "res3a_branch2a"
name: "res3a_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "bn3a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "scale3a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2a"
name: "res3a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3a_branch2a"
top: "res3a_branch2b"
name: "res3a_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "bn3a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3a_branch2b"
top: "res3a_branch2b"
name: "scale3a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a_branch1"
bottom: "res3a_branch2b"
top: "res3a"
name: "res3a"
type: "Eltwise"
}

layer {
bottom: "res3a"
top: "res3a"
name: "res3a_relu"
type: "ReLU"
}

layer {
bottom: "res3a"
top: "res3b_branch2a"
name: "res3b_branch2a"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "bn3b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "scale3b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2a"
name: "res3b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res3b_branch2a"
top: "res3b_branch2b"
name: "res3b_branch2b"
type: "Convolution"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "bn3b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res3b_branch2b"
top: "res3b_branch2b"
name: "scale3b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3a"
bottom: "res3b_branch2b"
top: "res3b"
name: "res3b"
type: "Eltwise"
}

layer {
bottom: "res3b"
top: "res3b"
name: "res3b_relu"
type: "ReLU"
}

layer {
bottom: "res3b"
top: "res4a_branch1"
name: "res4a_branch1"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "bn4a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch1"
top: "res4a_branch1"
name: "scale4a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res3b"
top: "res4a_branch2a"
name: "res4a_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "bn4a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "scale4a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2a"
name: "res4a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4a_branch2a"
top: "res4a_branch2b"
name: "res4a_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "bn4a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4a_branch2b"
top: "res4a_branch2b"
name: "scale4a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a_branch1"
bottom: "res4a_branch2b"
top: "res4a"
name: "res4a"
type: "Eltwise"
}

layer {
bottom: "res4a"
top: "res4a"
name: "res4a_relu"
type: "ReLU"
}

layer {
bottom: "res4a"
top: "res4b_branch2a"
name: "res4b_branch2a"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "bn4b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "scale4b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2a"
name: "res4b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res4b_branch2a"
top: "res4b_branch2b"
name: "res4b_branch2b"
type: "Convolution"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "bn4b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res4b_branch2b"
top: "res4b_branch2b"
name: "scale4b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res4a"
bottom: "res4b_branch2b"
top: "res4b"
name: "res4b"
type: "Eltwise"
}

layer {
bottom: "res4b"
top: "res4b"
name: "res4b_relu"
type: "ReLU"
}

Add RPN network

layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "res4b"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}

layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 18 # 2(bg/fg) * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 36 # 4 * 9(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}

layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}

layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
param_str: "'feat_stride': 16"
}
}

layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}

layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}

========= RoI Proposal ============

layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}

layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } }
}

layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'

top: 'rpn_scores'

python_param {
    module: 'rpn.proposal_layer'
    layer: 'ProposalLayer'
    param_str: "'feat_stride': 16"
}

}

Stop Resnet till conv4x and add ROIpooling

layer {
name: "roi_pool5"
type: "ROIPooling"
bottom: "res4b"
bottom: "rois"
top: "roipool5"
roi_pooling_param {
pooled_w: 14
pooled_h: 14
spatial_scale: 0.0625 # 1/16
}
}

Resume Resnet conv5x layers after ROIpooling layers

layer {
bottom: "roipool5"
top: "res5a_branch1"
name: "res5a_branch1"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 1
pad: 0
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "bn5a_branch1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch1"
top: "res5a_branch1"
name: "scale5a_branch1"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "roipool5"
top: "res5a_branch2a"
name: "res5a_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 2
bias_term: false
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "bn5a_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "scale5a_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2a"
name: "res5a_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5a_branch2a"
top: "res5a_branch2b"
name: "res5a_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "bn5a_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5a_branch2b"
top: "res5a_branch2b"
name: "scale5a_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a_branch1"
bottom: "res5a_branch2b"
top: "res5a"
name: "res5a"
type: "Eltwise"
}

layer {
bottom: "res5a"
top: "res5a"
name: "res5a_relu"
type: "ReLU"
}

layer {
bottom: "res5a"
top: "res5b_branch2a"
name: "res5b_branch2a"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "bn5b_branch2a"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "scale5b_branch2a"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2a"
name: "res5b_branch2a_relu"
type: "ReLU"
}

layer {
bottom: "res5b_branch2a"
top: "res5b_branch2b"
name: "res5b_branch2b"
type: "Convolution"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
stride: 1
bias_term: false
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "bn5b_branch2b"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
}
}

layer {
bottom: "res5b_branch2b"
top: "res5b_branch2b"
name: "scale5b_branch2b"
type: "Scale"
scale_param {
bias_term: true
}
}

layer {
bottom: "res5a"
bottom: "res5b_branch2b"
top: "res5b"
name: "res5b"
type: "Eltwise"
}

layer {
bottom: "res5b"
top: "res5b"
name: "res5b_relu"
type: "ReLU"
}

layer {
bottom: "res5b"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
kernel_size: 7
stride: 1
pool: AVE
}
}

### Add faster RCNN cls and bbox layer

layer {
name: "cls_score_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "cls_score_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 21
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "bbox_pred_uefa"
type: "InnerProduct"
bottom: "pool5"
top: "bbox_pred_uefa"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 84
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}

layer {
name: "cls_prob"
type: "Softmax"
bottom: "cls_score_uefa"
top: "cls_prob"
}

When running test.prototxt I am facing as issue as -
F0226 05:04:43.527575 429 insert_splits.cpp:29] Unknown bottom blob 'gt_boxes' (layer 'rpn-data', bottom index 1)
* Check failure stack trace: *
How to solve it???

Was this page helpful?
0 / 5 - 0 ratings