This code implements the MobileNetV2 model, which is a popular...

April 19, 2024 at 07:55 AM

import math
import os

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

BatchNorm2d = nn.BatchNorm2d

# 这段代码实现了MobileNetV2模型，它是一种流行的轻量级深度学习网络，适用于移动和嵌入式设备上的图像识别任务。下面是每个部分的中文解释和分析：

# math, os: 基本的Python库，用于数学运算和操作系统级的文件操作。
# torch: PyTorch深度学习框架，用于构建和训练神经网络。
# torch.nn: 包含神经网络层的模块，如卷积层、批量归一化层等。
# torch.utils.model_zoo: 用于下载和加载预训练模型。
# BatchNorm2d定义为nn.BatchNorm2d：简化了批量归一化层的调用。

# 测试代码，创建MobileNetV2模型实例并打印每个特征层。
# 总结，这段代码详细实现了MobileNetV2网络的构建过程，包括定义卷积层、倒置残差结构、网络初始化和权重加载等关键组件。




# conv_bn和conv_1x1_bn函数：
# 用于创建带批量归一化和ReLU激活函数的卷积层。
# conv_bn生成3x3卷积层，而conv_1x1_bn生成1x1卷积层。

def conv_bn(inp, oup, stride):
    return nn.Sequential(
        # 这个函数的作用很简单，就是创建一个卷积层，对输入的特征层进行卷积，标准化和激活操作！
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )

def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        BatchNorm2d(oup),
        nn.ReLU6(inplace=True)
    )


# InvertedResidual类：
    # 是MobileNetV2的核心构建块，使用倒置残差结构。
    # 根据expand_ratio判断是使用逐点卷积扩展通道数，还是使用深度卷积处理特征。
    # 支持残差连接以促进信息流。
class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = round(inp * expand_ratio)
        self.use_res_connect = self.stride == 1 and inp == oup

        if expand_ratio == 1:
            self.conv = nn.Sequential(
                #--------------------------------------------#
                #   进行3x3的逐层卷积，进行跨特征点的特征提取
                #--------------------------------------------#
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                BatchNorm2d(hidden_dim),
                nn.ReLU6(inplace=True),
                #-----------------------------------#
                #   利用1x1卷积进行通道数的调整
                #-----------------------------------#
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # mobelnetV2三层
                #-----------------------------------#
                #   利用1x1卷积进行通道数的上升
                #-----------------------------------#
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                # 标准化和激活函数
                BatchNorm2d(hidden_dim),
                nn.ReLU6(inplace=True),
                #--------------------------------------------#
                #   进行3x3的逐层卷积，进行跨特征点的特征提取（前面1×1的卷积是没有跨特征点的卷积 ）
                #--------------------------------------------#
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                BatchNorm2d(hidden_dim),
                nn.ReLU6(inplace=True),
                #-----------------------------------#
                #   利用1x1卷积进行通道数的下降（最开始的上升是为了让网络具备更好的特征表征能力，下降是为了降低网络的参数量，便于计算）
                #-----------------------------------#
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                BatchNorm2d(oup),
            )

    def forward(self, x):
        if self.use_res_connect:
            # 可以使用残差连接。
            return x + self.conv(x)
        else:
            return self.conv(x)



# MobileNetV2类：
    # 定义了整个MobileNetV2网络结构。
    # 使用InvertedResidual块构建网络，并设置不同的扩展因子和通道数以形成特征提取层。
    # 包含初始化权重的方法和前向传播方法。
class MobileNetV2(nn.Module):
    def __init__(self, n_class=1000, input_size=224, width_mult=1.):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual # 这个block有四个参数，分别是扩展因子，通道数，残差连接的步长，卷积核大小
        input_channel = 32
        last_channel = 1280
        # 这里的t代表了扩展因子expand_ratio，c代表输出的通道数output_channel，n代表残差结构中的重复次数，s代表步长（也就是是否要进行高和宽的压缩）。
        # 关于这个压缩，最开始进行了一次  self.features = [conv_bn(3, input_channel, 2)]，这里的步长2也就代表着压缩！
        # 这里的，interverted_residual_setting中的s有四个为2的，也代表着压缩了四次！总共压缩了五次！
        interverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1], # 256, 256, 32 -> 256, 256, 16
            [6, 24, 2, 2], # 256, 256, 16 -> 128, 128, 24   2 下采样！
            [6, 32, 3, 2], # 128, 128, 24 -> 64, 64, 32     4 下采样！
            [6, 64, 4, 2], # 64, 64, 32 -> 32, 32, 64       7 下采样！
            [6, 96, 3, 1], # 32, 32, 64 -> 32, 32, 96
            [6, 160, 3, 2], # 32, 32, 96 -> 16, 16, 160     14 下采样！
            [6, 320, 1, 1], # 16, 16, 160 -> 16, 16, 320
        ]

        assert input_size % 32 == 0
        input_channel = int(input_channel * width_mult)
        self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
        # 512, 512, 3 -> 256, 256, 32（输入的图片是512*512*3，所以输入的通道数为3，通过一次卷积得到的图片为256*256*32）
        # 使用了一个列表，定义一个卷积层，然后使用一个循环来构建残差结构。
        #
        self.features = [conv_bn(3, input_channel, 2)]

        for t, c, n, s in interverted_residual_setting:
            output_channel = int(c * width_mult)
            for i in range(n):
                if i == 0:
                    # 这个block有四个参数，分别是扩展因子，输出的通道数，残差连接的步长，卷积核大小
                    self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
                else:
                    self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
                input_channel = output_channel

        # 16, 16, 320 -> 16, 16, 1280 ，1×1的卷积，调整通道数！
        self.features.append(conv_1x1_bn(input_channel, self.last_channel))
        self.features = nn.Sequential(*self.features)

        # 语义分割模型用不到这个
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, n_class),
        )

        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.mean(3).mean(2)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


# load_url函数：用于下载或加载预训练模型权重。
# mobilenetv2函数：构建MobileNetV2模型实例，可选加载预训练权重。

def load_url(url, model_dir='./model_data', map_location=None):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    filename = url.split('/')[-1]
    cached_file = os.path.join(model_dir, filename)
    if os.path.exists(cached_file):
        return torch.load(cached_file, map_location=map_location)
    else:
        return model_zoo.load_url(url,model_dir=model_dir)

def mobilenetv2(pretrained=False, **kwargs):
    model = MobileNetV2(n_class=1000, **kwargs)
    if pretrained:
        model.load_state_dict(load_url('https://github.com/bubbliiiing/deeplabv3-plus-pytorch/releases/download/v1.0/mobilenet_v2.pth.tar'), strict=False)
    return model

if __name__ == "__main__":
    model = mobilenetv2()
    for i, layer in enumerate(model.features):
        print(i, layer)

This code implements the MobileNetV2 model, which is a popular lightweight deep learning network used for image recognition tasks on mobile and embedded devices. It defines the MobileNetV2 architecture using inverted residual blocks, convolutional layers with batch normalization and ReLU activation functions, and implements methods for model initialization and weight loading.

The code includes the definition of convolutional layers with batch normalization and ReLU activation functions, as well as the InvertedResidual and MobileNetV2 classes. The InvertedResidual class represents the core building block of MobileNetV2, utilizing inverted residual structures with optional residual connections. The MobileNetV2 class defines the overall network structure, using a series of InvertedResidual blocks with different expansion ratios and channel sizes for feature extraction.

Additionally, the code includes a function for loading pretrained model weights and a function to create an instance of the MobileNetV2 model with optional pretrained weights loading. When executed, the code creates an instance of the MobileNetV2 model and prints each layer within the model's feature extraction part.

Generate your own explanations

Download our vscode extension

Read other generated explanations

Built by @thebuilderjr