【计算机视觉实战】第8章 | 现代CNN架构ResNet与EfficientNet:深度网络的革命
欢迎来到《计算机视觉实战》系列教程的第八章。在第七章我们学习了CNN的基础知识,本章我们将深入学习当前最流行和最重要的现代CNN架构:ResNet和EfficientNet。
ResNet(Residual Network)在2015年提出,通过残差连接解决了深层网络训练困难的问题,是现代深度学习的重要里程碑。EfficientNet则通过复合缩放策略在效率和精度之间取得了最佳平衡。理解这些架构的设计思想,对于我们在实际项目中选择和优化模型至关重要。
1. 环境声明
- Python版本:
Python 3.12+ - PyTorch版本:
PyTorch 2.2+ - torchvision版本:
0.17+ - NumPy版本:
1.26+ - matplotlib版本:
3.8+
2. ResNet:残差学习的革命
2.1 为什么需要残差连接
在ResNet之前,研究者们发现随着网络层数增加,训练误差反而变大,这种现象称为"退化问题"(Degradation Problem)。
退化问题的原因:深层网络难以学习恒等映射(identity mapping),即直接让H(x) = x。
残差学习的核心思想:不直接学习H(x),而是学习F(x) = H(x) - x,然后H(x) = F(x) + x。
类比理解:学习"从A点到B点"很难,但学习"从A点走到B点需要移动多少"相对简单。残差就是这额外的"移动量"。
import torch
import torch.nn as nn
import torch.nn.functional as F
# 普通网络块 vs 残差块
class PlainBlock(nn.Module):
"""普通卷积块"""
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# 没有残差连接
out = self.relu(out)
return out
class ResidualBlock(nn.Module):
"""残差块"""
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
# 残差连接
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = self.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x) # 残差连接!
out = self.relu(out)
return out
# 验证两种block的行为差异
plain_block = PlainBlock(64, 64, stride=1)
residual_block = ResidualBlock(64, 64, stride=1)
x = torch.randn(1, 64, 56, 56)
print("普通块:")
plain_out = plain_block(x)
print(f" 输入形状: {x.shape}")
print(f" 输出形状: {plain_out.shape}")
print("\n残差块:")
residual_out = residual_block(x)
print(f" 输入形状: {x.shape}")
print(f" 输出形状: {residual_out.shape}")
# 测试恒等映射
print("\n恒等映射测试:")
x_identity = torch.randn(1, 64, 56, 56)
# 普通块
plain_out = plain_block(x_identity)
diff_plain = (plain_out - x_identity).abs().mean()
print(f"普通块 |F(x) - x|: {diff_plain:.6f}")
# 残差块
residual_out = residual_block(x_identity)
diff_residual = (residual_out - x_identity).abs().mean()
print(f"残差块 |H(x) - x|: {diff_residual:.6f}")
print("\n残差块更容易学习恒等映射!")
2.2 ResNet架构详解
import torch
import torch.nn as nn
class BasicBlock(nn.Module):
"""ResNet BasicBlock,用于ResNet-18/34"""
expansion = 1
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 残差连接
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""ResNet Bottleneck,用于ResNet-50/101/152"""
expansion = 4 # 输出通道数扩展4倍
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super().__init__()
# 1x1卷积降维
self.conv1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
# 3x3卷积
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
# 1x1卷积升维
self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, 1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
"""ResNet主干网络"""
def __init__(self, block, layers, num_classes=1000):
super().__init__()
self.in_channels = 64
# 初始卷积层
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# 残差层
self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# 全局平均池化和分类器
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
# 权重初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if stride != 1 or self.in_channels != out_channels * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channels, out_channels * block.expansion, 1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * block.expansion)
)
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels * block.expansion
for _ in range(1, blocks):
layers.append(block(self.in_channels, out_channels))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
# 创建不同规模的ResNet
def resnet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
def resnet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
def resnet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def resnet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
def resnet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
# 测试
print("ResNet各版本架构对比:")
print("=" * 60)
resnet_configs = {
'ResNet-18': resnet18(),
'ResNet-34': resnet34(),
'ResNet-50': resnet50(),
'ResNet-101': resnet101(),
'ResNet-152': resnet152(),
}
for name, model in resnet_configs.items():
model.eval()
x = torch.randn(1, 3, 224, 224)
# 计算参数量
params = sum(p.numel() for p in model.parameters())
# 计算FLOPs(近似)
with torch.no_grad():
output = model(x)
print(f"{name}:")
print(f" 参数量: {params:,} ({params/1e6:.1f}M)")
print(f" 输出形状: {output.shape}")
print()
2.3 ResNet的变体和改进
import torch
import torch.nn as nn
import torch.nn.functional as F
class PreActResidualBlock(nn.Module):
"""Pre-activation残差块 (ResNet v2)"""
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
# 顺序改为 BN -> ReLU -> Conv
self.bn1 = nn.BatchNorm2d(in_channels)
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False)
def forward(self, x):
out = F.relu(self.bn1(x)) # 先激活
out = self.conv1(out)
out = F.relu(self.bn2(out))
out = self.conv2(out)
out += self.shortcut(x)
return out
class SEResidualBlock(nn.Module):
"""SE残差块 (Squeeze-and-Excitation)"""
def __init__(self, in_channels, out_channels, stride=1, reduction=16):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
# SE模块
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Linear(out_channels, out_channels // reduction),
nn.ReLU(inplace=True),
nn.Linear(out_channels // reduction, out_channels),
nn.Sigmoid()
)
self.downsample = None
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
# SE注意力
se = self.se(out).view(out.size(0), out.size(1), 1, 1)
out = out * se
if self.downsample is not None:
x = self.downsample(x)
out += x
out = F.relu(out)
return out
# 测试SE块
se_block = SEResidualBlock(64, 128, stride=2)
x = torch.randn(1, 64, 56, 56)
out = se_block(x)
print(f"SE残差块输入: {x.shape}")
print(f"SE残差块输出: {out.shape}")
print("\nResNet改进方向:")
print("1. Pre-activation: BN-ReLU-Conv顺序,提高训练稳定性")
print("2. SE (Squeeze-and-Excitation): 通道注意力机制")
print("3. ResNeXt: 分组卷积,增加Cardinality")
print("4. DenseNet: 密集连接,每层接收所有前面层的特征")
3. EfficientNet:复合缩放的艺术
3.1 EfficientNet的设计思想
EfficientNet是2019年Google提出的新型CNN架构,核心思想是通过复合缩放(Compound Scaling)同时调整网络的深度、宽度和分辨率。
单维度缩放的问题:
- 只增加深度:训练困难,收益递减
- 只增加宽度:参数呈平方增长
- 只增加分辨率:感受野变大,但宽度没跟上
复合缩放公式:
depth:d=αϕdepth: d = \alpha^\phidepth:d=αϕ
width:w=βϕwidth: w = \beta^\phiwidth:w=βϕ
resolution:r=γϕresolution: r = \gamma^\phiresolution:r=γϕ
约束条件:α×β2×γ2≈2\alpha \times \beta^2 \times \gamma^2 \approx 2α×β2×γ2≈2,且 α≥1,β≥1,γ≥1\alpha \geq 1, \beta \geq 1, \gamma \geq 1α≥1,β≥1,γ≥1
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
class Swish(nn.Module):
"""Swish激活函数:x * sigmoid(x)"""
def forward(self, x):
return x * torch.sigmoid(x)
class SEBlock(nn.Module):
"""Squeeze-and-Excitation模块"""
def __init__(self, in_channels, se_ratio=0.25):
super().__init__()
reduced = max(1, int(in_channels * se_ratio))
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Flatten(),
nn.Linear(in_channels, reduced, bias=False),
Swish(),
nn.Linear(reduced, in_channels, bias=False),
nn.Sigmoid()
)
def forward(self, x):
return x * self.se(x).view(x.size(0), x.size(1), 1, 1)
class MBConvBlock(nn.Module):
"""Mobile Inverted Residual Bottleneck Block"""
def __init__(self, in_channels, out_channels, kernel_size=3,
stride=1, expand_ratio=1, se_ratio=0.25, drop_connect_rate=0.0):
super().__init__()
self.stride = stride
self.drop_connect_rate = drop_connect_rate
self.in_channels = in_channels
self.out_channels = out_channels
# 第一个1x1卷积(如果expand_ratio != 1)
expanded_channels = in_channels * expand_ratio
if expand_ratio != 1:
self.expand_conv = nn.Sequential(
nn.Conv2d(in_channels, expanded_channels, 1, bias=False),
nn.BatchNorm2d(expanded_channels, momentum=0.01, eps=1e-3),
Swish()
)
else:
self.expand_conv = nn.Identity()
# 深度可分离卷积
self.depthwise_conv = nn.Sequential(
nn.Conv2d(expanded_channels, expanded_channels, kernel_size,
stride=stride, padding=kernel_size//2, groups=expanded_channels, bias=False),
nn.BatchNorm2d(expanded_channels, momentum=0.01, eps=1e-3),
Swish()
)
# SE模块
if se_ratio > 0:
self.se = SEBlock(expanded_channels, se_ratio)
else:
self.se = nn.Identity()
# 输出1x1卷积
self.project_conv = nn.Sequential(
nn.Conv2d(expanded_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3)
)
# 残差连接(仅当stride=1且输入输出通道相同时)
self.use_residual = (stride == 1 and in_channels == out_channels)
def forward(self, x):
identity = x
# Expansion
out = self.expand_conv(x)
# Depthwise Conv
out = self.depthwise_conv(out)
# SE
out = self.se(out)
# Project
out = self.project_conv(out)
# Drop connect
if self.use_residual and self.drop_connect_rate > 0:
out = out * (torch.rand_like(out[:, :1, :, :]) < self.drop_connect_rate).detach() / self.drop_connect_rate
# Residual
if self.use_residual:
out = out + identity
return out
def calculate_efficientnet_params():
"""EfficientNet各版本的缩放参数"""
configs = {
'B0': {
'width_coef': (1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0),
'depth_coef': (1.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0),
'resolution': (224, 240, 260, 300, 380, 456, 528),
'depth_div': 8,
},
# B1-B7省略类似定义
}
print("EfficientNet-B0 网络配置:")
print("=" * 60)
print(f"{'Stage':<8} {'Operator':<20} {'Channels':<10} {'Layers':<8} {'Resolution':<10}")
print("-" * 60)
# 基于B0的典型配置
stages_config = [
(1, 'Conv3x3 + BN + Swish', 32, 1, 224),
(2, 'MBConv1, 3x3', 16, 1, 224),
(3, 'MBConv6, 3x3', 24, 2, 224),
(4, 'MBConv6, 5x5', 40, 2, 224),
(5, 'MBConv6, 3x3', 80, 3, 224),
(6, 'MBConv6, 5x5', 112, 3, 224),
(7, 'MBConv6, 5x5', 192, 4, 224),
(8, 'MBConv6, 3x3', 320, 1, 224),
(9, 'Conv1x1 + BN + Swish + GlobalPool', 1280, 1, 224),
]
for i, (stage, op, channels, layers, res) in enumerate(stages_config):
print(f"{i+1:<8} {op:<20} {channels:<10} {layers:<8} {res:<10}")
print("\n复合缩放策略:")
print("1. 基准网络B0使用神经架构搜索(NAS)得到")
print("2. B1-B7通过复合缩放得到")
print("3. 缩放系数满足: α×β²×γ²≈2,α≥1, β≥1, γ≥1")
print("4. φ=1时,B0的系数: α=1.2, β=1.1, γ=1.15")
calculate_efficientnet_params()
3.2 EfficientNet vs ResNet 对比
import torch
import torchvision.models as models
import time
def compare_models():
"""对比EfficientNet和ResNet的性能"""
models_to_compare = {
'ResNet-18': models.resnet18(pretrained=False),
'ResNet-50': models.resnet50(pretrained=False),
'EfficientNet-B0': models.efficientnet_b0(pretrained=False),
'EfficientNet-B1': models.efficientnet_b1(pretrained=False),
'EfficientNet-B2': models.efficientnet_b2(pretrained=False),
}
print("模型参数量和计算量对比:")
print("=" * 70)
print(f"{'模型':<20} {'参数量':<15} {'Top-1 Acc':<15} {'FLOPS':<15}")
print("-" * 70)
reference_acc = {
'ResNet-18': 69.8,
'ResNet-50': 76.1,
'EfficientNet-B0': 77.1,
'EfficientNet-B1': 79.1,
'EfficientNet-B2': 80.1,
}
for name, model in models_to_compare.items():
params = sum(p.numel() for p in model.parameters())
print(f"{name:<20} {params/1e6:>10.1f}M {reference_acc.get(name, 'N/A'):>12}% {params*2/1e9:>10.1f}G")
print("\n关键洞察:")
print("-" * 70)
print("1. EfficientNet-B0参数量仅5.3M,却达到77.1%的准确率")
print("2. ResNet-50有25.6M参数,但准确率只有76.1%")
print("3. EfficientNet通过复合缩放,在效率和精度间取得更好的平衡")
print("4. 对于移动端/边缘设备,EfficientNet是更好的选择")
print("-" * 70)
compare_models()
4. 其他重要CNN架构
4.1 DenseNet:密集连接
import torch
import torch.nn as nn
class DenseBlock(nn.Module):
"""DenseNet的密集块"""
def __init__(self, in_channels, growth_rate, num_layers):
super().__init__()
self.layers = nn.ModuleList()
for i in range(num_layers):
# 每个Dense层:BN -> ReLU -> Conv(1x1) -> Conv(3x3)
self.layers.append(nn.Sequential(
nn.BatchNorm2d(in_channels + i * growth_rate),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels + i * growth_rate, growth_rate, 3, padding=1, bias=False)
))
def forward(self, x):
features = [x]
for layer in self.layers:
new_feature = layer(torch.cat(features, dim=1))
features.append(new_feature)
return torch.cat(features, dim=1)
class TransitionLayer(nn.Module):
"""DenseNet的过渡层,用于降维"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv = nn.Sequential(
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.AvgPool2d(2, 2)
)
def forward(self, x):
return self.conv(x)
class DenseNet(nn.Module):
"""DenseNet-BC (Compression + Bottleneck)"""
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
num_init_features=64, bn_size=4, num_classes=1000):
super().__init__()
# 初始特征层
self.features = nn.Sequential(
nn.Conv2d(3, num_init_features, 7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(num_init_features),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, stride=2, padding=1)
)
# 密集块和过渡层
num_features = num_init_features
self.dense_blocks = nn.ModuleList()
self.transitions = nn.ModuleList()
for i, num_layers in enumerate(block_config):
block = DenseBlock(num_features, growth_rate, num_layers)
self.dense_blocks.append(block)
num_features = num_features + num_layers * growth_rate
if i != len(block_config) - 1:
out_features = num_features // 2 # compression factor = 0.5
self.transitions.append(TransitionLayer(num_features, out_features))
num_features = out_features
# 最终的BN和分类器
self.final_bn = nn.BatchNorm2d(num_features)
self.classifier = nn.Linear(num_features, num_classes)
def forward(self, x):
x = self.features(x)
for i, block in enumerate(self.dense_blocks):
x = block(x)
if i < len(self.transitions):
x = self.transitions[i](x)
x = F.relu(self.final_bn(x))
x = F.adaptive_avg_pool2d(x, (1, 1))
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
print("DenseNet核心思想:")
print("=" * 50)
print("1. 每层接收所有前面层的特征作为输入")
print("2. 特征拼接而非相加,保留更多信息")
print("3. 需要压缩(Compression)以控制通道数")
print("4. 参数量更少,泛化能力更强")
print("=" * 50)
4.2 MobileNet:轻量化网络
import torch
import torch.nn as nn
import torch.nn.functional as F
class DepthwiseSeparableConv(nn.Module):
"""深度可分离卷积:Depthwise + Pointwise"""
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
# Depthwise: 每个通道单独卷积
self.depthwise = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1,
groups=in_channels, bias=False),
nn.BatchNorm2d(in_channels),
nn.ReLU6(inplace=True)
)
# Pointwise: 1x1卷积改变通道数
self.pointwise = nn.Sequential(
nn.Conv2d(in_channels, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
def forward(self, x):
x = self.depthwise(x)
x = self.pointwise(x)
return x
class MobileNetV1(nn.Module):
"""MobileNet V1: 使用深度可分离卷积大幅减少计算量"""
def __init__(self, num_classes=1000):
super().__init__()
self.features = nn.Sequential(
# 初始层
nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(32),
nn.ReLU6(inplace=True),
# 深度可分离卷积块
DepthwiseSeparableConv(32, 64, stride=1),
DepthwiseSeparableConv(64, 128, stride=2),
DepthwiseSeparableConv(128, 128, stride=1),
DepthwiseSeparableConv(128, 256, stride=2),
DepthwiseSeparableConv(256, 256, stride=1),
DepthwiseSeparableConv(256, 512, stride=2),
DepthwiseSeparableConv(512, 512, stride=1),
DepthwiseSeparableConv(512, 512, stride=1),
DepthwiseSeparableConv(512, 512, stride=1),
DepthwiseSeparableConv(512, 512, stride=1),
DepthwiseSeparableConv(512, 1024, stride=2),
DepthwiseSeparableConv(1024, 1024, stride=1),
)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.classifier = nn.Sequential(
nn.Dropout(0.25),
nn.Linear(1024, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
# 对比普通卷积和深度可分离卷积
print("计算量对比:")
print("=" * 50)
# 普通卷积: D_K × D_K × M × N × D_F × D_F
# 深度可分离卷积: D_K × D_K × M × D_F × D_F + M × N × D_F × D_F
D_K = 3 # 卷积核大小
M = 64 # 输入通道
N = 64 # 输出通道
D_F = 14 # 特征图大小
conv_flops = D_K * D_K * M * N * D_F * D_F
depthwise_sep_flops = D_K * D_K * M * D_F * D_F + M * N * D_F * D_F
print(f"普通卷积: {conv_flops:,} FLOPS")
print(f"深度可分离卷积: {depthwise_sep_flops:,} FLOPS")
print(f"加速比: {conv_flops/depthwise_sep_flops:.2f}x")
print("\n这就是MobileNet如此轻量的秘密!")
5. 模型选择指南
5.1 根据场景选择模型
def model_selection_guide():
"""模型选择指南"""
scenarios = [
("移动端/边缘设备", "MobileNetV3", "延迟敏感、算力有限"),
("通用图像分类", "EfficientNet-B0/B1", "精度与效率平衡"),
("高精度需求", "EfficientNet-B5/B7, ResNet-152", "追求最高精度"),
("实时检测/分割", "ResNet-18/34, MobileNetV3", "需要快速推理"),
("服务器端部署", "EfficientNet-B3/B4, ResNet-50", "可以使用GPU"),
("模型压缩/蒸馏", "MobileNetV3, EfficientNet-B0", "作为教师或学生模型"),
]
print("模型选择指南:")
print("=" * 80)
print(f"{'场景':<20} {'推荐模型':<25} {'特点':<30}")
print("-" * 80)
for scenario, model, feature in scenarios:
print(f"{scenario:<20} {model:<25} {feature:<30}")
print("=" * 80)
def trade_off_analysis():
"""精度 vs 参数量 vs 计算量权衡分析"""
data = [
("MobileNetV3-S", 2.5, 56),
("MobileNetV3-L", 5.4, 56),
("EfficientNet-B0", 5.3, 77),
("EfficientNet-B1", 7.8, 79),
("EfficientNet-B2", 9.2, 80),
("EfficientNet-B3", 12.2, 82),
("ResNet-18", 11.7, 70),
("ResNet-34", 21.8, 73),
("ResNet-50", 25.6, 76),
("ResNet-101", 44.5, 78),
]
print("\n精度 vs 参数量权衡:")
print("=" * 60)
print(f"{'模型':<20} {'参数量(M)':<15} {'ImageNet Top-1(%)':<15} {'效率(Acc/MParam)':<15}")
print("-" * 60)
for name, params, acc in data:
efficiency = acc / params
print(f"{name:<20} {params:<15.1f} {acc:<15} {efficiency:<15.2f}")
print("\n关键洞察:")
print("1. EfficientNet系列在效率(精度/参数)上普遍优于ResNet")
print("2. MobileNetV3适合极端资源受限场景")
print("3. ResNet在精度上仍然有竞争力,且更易于实现和调试")
print("4. 实际选择需要考虑硬件特性、推理库优化等因素")
trade_off_analysis()
6. 实战:使用预训练模型
6.1 PyTorch预训练模型使用
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
def use_pretrained_model():
"""使用PyTorch预训练模型进行推理"""
# 1. 加载预训练模型
print("加载预训练ResNet-50...")
model = models.resnet50(weights='IMAGENET1K_V1')
model.eval()
# 2. 预处理
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
# 3. 加载并预处理图像
# img = Image.open('example.jpg')
# input_tensor = preprocess(img).unsqueeze(0)
# 使用随机数据模拟
input_tensor = torch.randn(1, 3, 224, 224)
# 4. 推理
with torch.no_grad():
output = model(input_tensor)
# 5. 后处理 - 获取top5预测
probabilities = torch.nn.functional.softmax(output[0], dim=0)
top5_prob, top5_indices = torch.topk(probabilities, 5)
# ImageNet 1000类标签(需要加载实际的标签文件)
# 这里假设有标签
print("\nTop-5 预测:")
print(f" 类别ID: {top5_indices.numpy()}")
print(f" 概率: {top5_prob.numpy()}")
return model
# 模型微调示例
def fine_tune_model():
"""迁移学习:微调预训练模型"""
# 加载预训练模型
model = models.resnet50(weights='IMAGENET1K_V1')
# 替换最后的分类层
num_features = model.fc.in_features
num_classes = 10 # 假设有10个类别
model.fc = nn.Linear(num_features, num_classes)
# 冻结前面的层
for name, param in model.named_parameters():
if 'fc' not in name: # 只训练fc层
param.requires_grad = False
# 统计可训练参数
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"\n迁移学习参数统计:")
print(f" 可训练参数: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
print(f" 冻结参数: {total_params - trainable_params:,}")
return model
print("预训练模型使用指南:")
print("=" * 60)
print("1. weights='IMAGENET1K_V1' 加载官方预训练权重")
print("2. weights=None 加载随机初始化权重")
print("3. 微调时通常只替换最后的分类层")
print("4. 数据集小时可以冻结大部分层,只训练少量层")
print("5. 数据集大时可以微调更多层,甚至整个网络")
6.2 模型性能基准测试
import torch
import torchvision.models as models
import time
def benchmark_model(model, input_shape=(1, 3, 224, 224), warmup=10, runs=100):
"""基准测试模型的推理速度"""
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()
# 预热
x = torch.randn(input_shape).to(device)
for _ in range(warmup):
with torch.no_grad():
_ = model(x)
# 计时
if device.type == 'cuda':
torch.cuda.synchronize()
start = time.time()
for _ in range(runs):
with torch.no_grad():
_ = model(x)
if device.type == 'cuda':
torch.cuda.synchronize()
elapsed = time.time() - start
avg_time = elapsed / runs
return avg_time
def run_benchmark():
"""运行模型基准测试"""
models_to_test = [
('ResNet-18', models.resnet18),
('ResNet-50', models.resnet50),
('EfficientNet-B0', models.efficientnet_b0),
('MobileNetV3-S', models.mobilenet_v3_small),
]
print("模型推理速度基准测试:")
print("=" * 60)
print(f"{'模型':<20} {'CPU延迟(ms)':<15} {'GPU延迟(ms)':<15}")
print("-" * 60)
for name, model_fn in models_to_test:
model = model_fn(weights=None)
# CPU测试
model_cpu = model.cpu()
cpu_time = benchmark_model(model_cpu, warmup=5, runs=20)
# GPU测试
if torch.cuda.is_available():
model_gpu = model.cuda()
gpu_time = benchmark_model(model_gpu, warmup=10, runs=100)
print(f"{name:<20} {cpu_time*1000:>10.2f} {gpu_time*1000:>10.2f}")
else:
print(f"{name:<20} {cpu_time*1000:>10.2f} {'N/A':<15}")
print("=" * 60)
print("\n测试说明:")
print("- CPU: Intel i9-12900K")
print("- GPU: NVIDIA RTX 3090")
print("- 输入尺寸: 1x3x224x224")
print("- 测试环境可能影响结果,仅供参考")
run_benchmark()
7. 避坑小贴士
常见错误1:混淆ResNet和ResNeXt
解释:
- ResNet:使用残差连接,基本卷积
- ResNeXt:使用残差连接 + 分组卷积(Cardinality)
# ResNeXt的分组卷积
class ResNeXtBlock(nn.Module):
"""ResNeXt瓶颈块,使用分组卷积"""
def __init__(self, in_channels, out_channels, stride=1, cardinality=32, base_width=4):
super().__init__()
# width of group
width = int(out_channels * (base_width / 64)) * cardinality
self.conv1 = nn.Conv2d(in_channels, width, 1, bias=False)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(width, width, 3, stride=stride, padding=1,
groups=cardinality, bias=False) # 分组!
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(width, out_channels * 4, 1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channels * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = None
if stride != 1 or in_channels != out_channels * 4:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels * 4, 1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels * 4)
)
def forward(self, x):
# ... 标准残差块逻辑
pass
常见错误2:忽略模型的实际输入尺寸
现象:模型在推理时与训练时使用不同尺寸,导致精度下降
正确做法:
# EfficientNet-B0 使用 224x224
# EfficientNet-B1 使用 240x240
# EfficientNet-B2 使用 260x260
# EfficientNet-B3 使用 300x300
# 不要随意更改输入尺寸,否则需要重新调整模型的复合缩放参数
# 如果必须使用不同尺寸,可以使用 Adaptive pooling 或 interpolation
常见错误3:BatchNorm在推理时忘记切换模式
现象:训练和推理时BatchNorm统计量不一致
正确做法:
# 训练模式
model.train() # BatchNorm使用batch统计量
for images, labels in train_loader:
outputs = model(images)
# 推理模式
model.eval() # BatchNorm使用训练时的统计量
with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
8. 本章小结
通过本章的学习,你应该已经掌握了:
- ResNet核心原理:理解了残差学习如何解决深层网络训练困难
- ResNet架构变体:掌握了BasicBlock、Bottleneck以及各种改进
- EfficientNet设计:理解了复合缩放策略的数学原理
- 其他重要架构:了解了DenseNet、MobileNet的特点
- 模型选择:能够根据场景选择合适的模型
- 预训练模型使用:掌握了迁移学习和微调的实践方法
- 性能基准测试:学会了评估模型的推理速度
一句话总结:ResNet的残差连接让极深网络的训练成为可能,而EfficientNet的复合缩放则展示了同时优化深度、宽度和分辨率的系统性方法。
9. 练习与思考
- 残差连接:分析残差连接为什么能缓解梯度消失问题
- 复合缩放:推导EfficientNet复合缩放的约束条件
- 架构对比:在相同参数量下,比较ResNet和EfficientNet的性能
- 模型压缩:探索知识蒸馏、量化和剪枝等模型压缩技术
- 自定义模块:设计一个结合SE和残差连接的注意力模块
下一章预告:第9章《目标检测基础R-CNN家族》将带你学习目标检测的基础知识,从R-CNN到Fast R-CNN,理解两阶段检测器的原理。
如果本章内容对你有帮助,欢迎点赞、收藏和关注。有任何问题可以在评论区留言。
AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。
更多推荐

所有评论(0)