Hi I want to write mobilenet fpn.
Improve MaskRCNN speed and accuracy.
/torchvision/models/detection/backbone_utils.py
from collections import OrderedDict
from torch import nn
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.ops import misc as misc_nn_ops
from .._utils import IntermediateLayerGetter
from .. import resnet
from .. import mobilenet_v2
from torchvision.models import mobilenet_v2 as MobileNetV2
class BackboneWithFPN(nn.Sequential):
def __init__(self, backbone, return_layers, in_channels_list, out_channels):
body = IntermediateLayerGetter(backbone, return_layers=return_layers)
fpn = FeaturePyramidNetwork(
in_channels_list=in_channels_list,
out_channels=out_channels,
extra_blocks=LastLevelMaxPool(),
)
super(BackboneWithFPN, self).__init__(OrderedDict(
[("body", body), ("fpn", fpn)]))
self.out_channels = out_channels
def resnet_fpn_backbone(backbone_name, pretrained):
backbone = resnet.__dict__[backbone_name](
pretrained=pretrained,
norm_layer=misc_nn_ops.FrozenBatchNorm2d)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_stage2 = backbone.inplanes // 8
in_channels_list = [
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
class FPNMobileNet(nn.Module):
def __init__(self, pretrained=True):
super().__init__()
net = MobileNetV2(pretrained)
self.features = net.features
self.layer1= nn.Sequential(*self.features[0:4])
self.layer2 = nn.Sequential(*self.features[4:7])
self.layer3 = nn.Sequential(*self.features[7:11])
self.layer4 = nn.Sequential(*self.features[11:19])
for param in self.features.parameters():
param.requires_grad = False
def forward(self, x):
# Bottom-up pathway, from ResNet
enc0 = self.layer1(x)
enc1 = self.layer2(enc0) # 256
enc2 = self.layer3(enc1) # 512
enc3 = self.layer4(enc2) # 1024
return enc3
def mobilenet_fpn_backbone(pretrained):
backbone = FPNMobileNet(pretrained)
print(backbone)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_stage2 =1280 // 8
in_channels_list = [
in_channels_stage2,
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
/torchvision/models/detection/mobilenet_fpn.py
from .backbone_utils import mobilenet_fpn_backbone
def fpn(pretrained = True):
backbone = mobilenet_fpn_backbone( pretrained)
return backbone
demo.py
```from torchvision.models.detection import mobilenet_fpn
backbone = mobilenet_fpn.fpn(True)
backbone.eval()
x = torch.rand(1,3, 100, 100)
out = backbone(x)
print(out)
```
"RuntimeError: Given groups=1, weight of size 32 3 3 3, expected input[1, 1280, 4, 4] to have 3 channels, but got 1280 channels instead"
Hi,
There are two issues with your implementation:
in_channels_list is not correct, and should follow the number of channels in mobilenet_v2 (which is [24, 32, 64, 1280])_IntermediateLayerGetter is very simplistic, and doesn't handle some use cases. You should remove self.features from FPNMobileNetHere is a working version
class FPNMobileNet(nn.Module):
def __init__(self, pretrained=True):
super().__init__()
net = MobileNetV2(pretrained)
features = net.features
self.layer1= nn.Sequential(*features[0:4])
self.layer2 = nn.Sequential(*features[4:7])
self.layer3 = nn.Sequential(*features[7:11])
self.layer4 = nn.Sequential(*features[11:19])
for param in features.parameters():
param.requires_grad = False
def forward(self, x):
# Bottom-up pathway, from ResNet
enc0 = self.layer1(x) # 24
enc1 = self.layer2(enc0) # 32
enc2 = self.layer3(enc1) # 64
enc3 = self.layer4(enc2) # 1280
return enc3
def mobilenet_fpn_backbone(pretrained):
backbone = FPNMobileNet(pretrained)
# freeze layers
for name, parameter in backbone.named_parameters():
if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
parameter.requires_grad_(False)
return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3}
in_channels_list = [
24, 32, 64, 1280
]
out_channels = 256
return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
It works, many thanks!
@fmassa Hi, I want ask if I want extract certain layers usign _IntermediateLayerGetter should I using only index of that layer or using a [:n] ?
for example:
in mobilenetv2, take the 4th layer, should be features[4] or features[:4]?
what's the difference? Or if I want take 7th layer, should it be features[4:7] or features[7]`?
self.layer1= nn.Sequential(*features[0:4])
self.layer2 = nn.Sequential(*features[4:7])
self.layer3 = nn.Sequential(*features[7:11])
self.layer4 = nn.Sequential(*features[11:19])
or :
self.layer1= nn.Sequential(*features[0:4])
self.layer2 = nn.Sequential(*features[4:7])
self.layer3 = nn.Sequential(*features[7:14])
self.layer4 = nn.Sequential(*features[14:18])
as *features[19] have to much channel
Most helpful comment
Hi,
There are two issues with your implementation:
in_channels_listis not correct, and should follow the number of channels in mobilenet_v2 (which is[24, 32, 64, 1280])_IntermediateLayerGetteris very simplistic, and doesn't handle some use cases. You should removeself.featuresfromFPNMobileNetHere is a working version