基於 Pytorch 對 YOLOV5 進行簡易實現

【GiantPandaCV 導語】這篇文章主要針對於 YOLOV5-Pytorch 版本的網絡結構代碼進行實現，簡化代碼的理解並簡化配置文件，進一步梳理一些 YOLOV5 四種網絡結構，在這個過程中對於 V5 的網絡有着更加深入的理解。最後希望看完這篇文章的讀者可以有所收穫，對於代碼中的一些寫法上的優化希望可以和大家一起交流進步。

一、網絡完整代碼

實現思路，v5 中的 common 代碼結構進行了保留，因爲這一部分代碼是比較好理解的，整體代碼看起來是比較簡單的，主要是整體網絡結構的搭建，通過解析 yaml 文件對於一些開發人員來說是不是很友好的。

網絡中的一些變量

c1：輸入通道 c2：輸出通道  k：卷積核大小  s：步長 p：padding g：分組  act；激活函數 e：擴展倍數
gw：網絡寬度因子  gd：網絡深度因子  n：模塊重複次數  nc：類別數

主幹網絡代碼CSPDarknet53

import torch
import torch.nn as nn
    
    
def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
    
    
class CBL(nn.Module):
    
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, e=1.0):
        super(CBL, self).__init__()
        c1 = round(c1 * e)
        c2 = round(c2 * e)
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
    
    def forward(self, x):
        return self.act(self.bn(self.conv(x)))
    
    
class Focus(nn.Module):
    
    def __init__(self, c1, c2, k=3, s=1, p=1, g=1, act=True, e=1.0):
        super(Focus, self).__init__()
        c2 = round(c2 * e)
        self.conv = CBL(c1 * 4, c2, k, s, p, g, act)
    
    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
        flatten_channel = torch.cat([x[..., 0::2, 0::2],
                                     x[..., 1::2, 0::2],
                                     x[..., 0::2, 1::2],
                                     x[..., 1::2, 1::2]], dim=1)
        return self.conv(flatten_channel)
    
    
class SPP(nn.Module):
    
    def __init__(self, c1, c2, k=(5, 9, 13), e=1.0):
        super(SPP, self).__init__()
        c1 = round(c1 * e)
        c2 = round(c2 * e)
        c_ = c1 // 2
        self.cbl_before = CBL(c1, c_, 1, 1)
        self.max_pool = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
        self.cbl_after = CBL(c_ * 4, c2, 1, 1)
    
    def forward(self, x):  
        x = self.cbl_before(x)
        x_cat = torch.cat([x] + [m(x) for m in self.max_pool], 1)
        return self.cbl_after(x_cat)
    
    
class ResUnit_n(nn.Module):
    
    def __init__(self, c1, c2, n):
        super(ResUnit_n, self).__init__()
        self.shortcut = c1 == c2
        res_unit = nn.Sequential(
            CBL(c1, c1, k=1, s=1, p=0),
            CBL(c1, c2, k=3, s=1, p=1)
        )
        self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
    
    def forward(self, x):
        return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x)
    
    
class CSP1_n(nn.Module):
    
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
        super(CSP1_n, self).__init__()
    
        c1 = round(c1 * e[1])
        c2 = round(c2 * e[1])
        n = round(n * e[0])
        c_ = c2 // 2
        self.up = nn.Sequential(
            CBL(c1, c_, k, s, autopad(k, p), g, act),
            ResUnit_n(c_, c_, n),
            # nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這裏最新yolov5結構中去掉了，與網上的結構圖稍微有些區別
        )
        self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
        self.tie = nn.Sequential(
            nn.BatchNorm2d(c_ * 2),
            nn.LeakyReLU(),
            nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
        )
    def forward(self, x):
        total = torch.cat([self.up(x), self.bottom(x)], dim=1)
        out = self.tie(total)
        return out
    
    
class CSPDarkNet(nn.Module):
    
    def __init__(self, gd=0.33, gw=0.5):
        super(CSPDarkNet, self).__init__()
        self.truck_big = nn.Sequential(
            Focus(3, 64, e=gw),
            CBL(64, 128, k=3, s=2, p=1, e=gw),
            CSP1_n(128, 128, n=3, e=[gd, gw]),
            CBL(128, 256, k=3, s=2, p=1, e=gw),
            CSP1_n(256, 256, n=9, e=[gd, gw]),
    
        )
        self.truck_middle = nn.Sequential(
            CBL(256, 512, k=3, s=2, p=1, e=gw),
            CSP1_n(512, 512, n=9, e=[gd, gw]),
        )
        self.truck_small = nn.Sequential(
            CBL(512, 1024, k=3, s=2, p=1, e=gw),
            SPP(1024, 1024, e=gw)
        )
    
    def forward(self, x):
        h_big = self.truck_big(x)  # torch.Size([2, 128, 76, 76])
        h_middle = self.truck_middle(h_big)
        h_small = self.truck_small(h_middle)
        return h_big, h_middle, h_small
    
    
def darknet53(gd, gw, pretrained, **kwargs):
    model = CSPDarkNet(gd, gw)
    if pretrained:
        if isinstance(pretrained, str):
            model.load_state_dict(torch.load(pretrained))
        else:
            raise Exception(f"darknet request a pretrained path. got[{pretrained}]")
    return model

整體網絡的構建

import torch
import torch.nn as nn
from cspdarknet53v5 import darknet53
    
    
def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
    
    
class UpSample(nn.Module):
    
    def __init__(self):
        super(UpSample, self).__init__()
        self.up_sample = nn.Upsample(scale_factor=2, mode='nearest')
    
    def forward(self, x):
        return self.up_sample(x)
    
    
class CBL(nn.Module):
    
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, e=1.0):
        super(CBL, self).__init__()
        c1 = round(c1 * e)
        c2 = round(c2 * e)
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
    
    def forward(self, x):
        return self.act(self.bn(self.conv(x)))
    
    
class ResUnit_n(nn.Module):
    
    def __init__(self, c1, c2, n):
        super(ResUnit_n, self).__init__()
        self.shortcut = c1 == c2
        res_unit = nn.Sequential(
            CBL(c1, c1, k=1, s=1, p=0),
            CBL(c1, c2, k=3, s=1, p=1)
        )
        self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
    
    def forward(self, x):
        return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x)
    
    
class CSP1_n(nn.Module):
    
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
        super(CSP1_n, self).__init__()
    
        c1 = round(c1 * e[1])
        c2 = round(c2 * e[1])
        n = round(n * e[0])
        c_ = c2 // 2
        self.up = nn.Sequential(
            CBL(c1, c_, k, s, autopad(k, p), g, act),
            ResUnit_n(c_, c_, n),
            # nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這裏最新yolov5結構中去掉了，與網上的結構圖稍微有些區別
        )
        self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
        self.tie = nn.Sequential(
            nn.BatchNorm2d(c_ * 2),
            nn.LeakyReLU(),
            nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
        )
    
    def forward(self, x):
        total = torch.cat([self.up(x), self.bottom(x)], dim=1)
        out = self.tie(total)
        return out
    
    
class CSP2_n(nn.Module):
    
    def __init__(self, c1, c2, e=0.5, n=1):
        super(CSP2_n, self).__init__()
        c_ = int(c1 * e)
        cbl_2 = nn.Sequential(
            CBL(c1, c_, 1, 1, 0),
            CBL(c_, c_, 1, 1, 0),
        )
        self.cbl_2n = nn.Sequential(*[cbl_2 for _ in range(n)])
        self.conv_up = nn.Conv2d(c_, c_, 1, 1, 0)
        self.conv_bottom = nn.Conv2d(c1, c_, 1, 1, 0)
        self.tie = nn.Sequential(
            nn.BatchNorm2d(c_ * 2),
            nn.LeakyReLU(),
            nn.Conv2d(c_ * 2, c2, 1, 1, 0)
        )
    
    def forward(self, x):
        up = self.conv_up(self.cbl_2n(x))
        total = torch.cat([up, self.conv_bottom(x)], dim=1)
        out = self.tie(total)
        return out
    
    
class yolov5(nn.Module):
    
    def __init__(self, nc=80, gd=0.33, gw=0.5):
        super(yolov5, self).__init__()
        # ------------------------------Backbone--------------------------------
        self.backbone = darknet53(gd, gw, None)
    
        # ------------------------------Neck------------------------------------
        self.neck_small = nn.Sequential(
            CSP1_n(1024, 1024, n=3, e=[gd, gw]),
            CBL(1024, 512, 1, 1, 0, e=gw)
        )
        self.up_middle = nn.Sequential(
            UpSample()
        )
        self.out_set_middle = nn.Sequential(
            CSP1_n(1024, 512, n=3, e=[gd, gw]),
            CBL(512, 256, 1, 1, 0, e=gw),
        )
        self.up_big = nn.Sequential(
            UpSample()
        )
        self.out_set_tie_big = nn.Sequential(
            CSP1_n(512, 256, n=3, e=[gd, gw])
        )
    
        self.pan_middle = nn.Sequential(
            CBL(256, 256, 3, 2, 1, e=gw)
        )
        self.out_set_tie_middle = nn.Sequential(
            CSP1_n(512, 512, n=3, e=[gd, gw])
        )
        self.pan_small = nn.Sequential(
            CBL(512, 512, 3, 2, 1, e=gw)
        )
        self.out_set_tie_small = nn.Sequential(
            CSP1_n(1024, 1024, n=3, e=[gd, gw])
        )
        # ------------------------------Prediction--------------------------------
        # prediction
        big_ = round(256 * gw)
        middle = round(512 * gw)
        small_ = round(1024 * gw)
        self.out_big = nn.Sequential(
            nn.Conv2d(big_, 3 * (5 + nc), 1, 1, 0)
        )
        self.out_middle = nn.Sequential(
            nn.Conv2d(middle, 3 * (5 + nc), 1, 1, 0)
        )
        self.out_small = nn.Sequential(
            nn.Conv2d(small_, 3 * (5 + nc), 1, 1, 0)
        )
    
    def forward(self, x):
        h_big, h_middle, h_small = self.backbone(x)
        neck_small = self.neck_small(h_small)  
        # ----------------------------up sample 38*38-------------------------------
        up_middle = self.up_middle(neck_small)
        middle_cat = torch.cat([up_middle, h_middle], dim=1)
        out_set_middle = self.out_set_middle(middle_cat)
    
        # ----------------------------up sample 76*76-------------------------------
        up_big = self.up_big(out_set_middle)  # torch.Size([2, 128, 76, 76])
        big_cat = torch.cat([up_big, h_big], dim=1)
        out_set_tie_big = self.out_set_tie_big(big_cat)
    
        # ----------------------------PAN 36*36-------------------------------------
        neck_tie_middle = torch.cat([self.pan_middle(out_set_tie_big), out_set_middle], dim=1)
        up_middle = self.out_set_tie_middle(neck_tie_middle)
    
        # ----------------------------PAN 18*18-------------------------------------
        neck_tie_small = torch.cat([self.pan_small(up_middle), neck_small], dim=1)
        out_set_small = self.out_set_tie_small(neck_tie_small)
    
        # ----------------------------prediction-------------------------------------
        out_small = self.out_small(out_set_small)
        out_middle = self.out_middle(up_middle)
        out_big = self.out_big(out_set_tie_big)
    
        return out_small, out_middle, out_big
    
    
if __name__ == '__main__':
    # 配置文件的寫法
    config = {
        #            gd    gw
        'yolov5s': [0.33, 0.50],
        'yolov5m': [0.67, 0.75],
        'yolov5l': [1.00, 1.00],
        'yolov5x': [1.33, 1.25]
    }
    # 修改一次文件名字
    net_size = config['yolov5x']
    net = yolov5(nc=80, gd=net_size[0], gw=net_size[1])
    print(net)
    a = torch.randn(2, 3, 416, 416)
    y = net(a)
    print(y[0].shape, y[1].shape, y[2].shape)

二、網絡結構的解析

殘差塊 ResUnit_n

class ResUnit_n(nn.Module):
    
    def __init__(self, c1, c2, n):
        super(ResUnit_n, self).__init__()
        self.shortcut = c1 == c2
        res_unit = nn.Sequential(
            CBL(c1, c1, k=1, s=1, p=0),
            CBL(c1, c2, k=3, s=1, p=1)
        )
        self.res_unit_n = nn.Sequential(*[res_unit for _ in range(n)])
    
    def forward(self, x):
        return x + self.res_unit_n(x) if self.shortcut else self.res_unit_n(x)

CSP1_x 結構

構建思路：CSP1_n 代碼進行優化，把 CSP 看做一個趴着的動物，頭在左面，尾巴在右邊；up 是靠近天空的地方，bottom 是靠近地的，tie 就是動物的尾巴

class CSP1_n(nn.Module):
    
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, n=1, e=None):
        super(CSP1_n, self).__init__()
    
        c1 = round(c1 * e[1])
        c2 = round(c2 * e[1])
        n = round(n * e[0])
        c_ = c2 // 2
        self.up = nn.Sequential(
            CBL(c1, c_, k, s, autopad(k, p), g, act),
            ResUnit_n(c_, c_, n),
            # nn.Conv2d(c_, c_, 1, 1, 0, bias=False) 這裏最新yolov5結構中去掉了，與網上的結構圖稍微有些區別
        )
        self.bottom = nn.Conv2d(c1, c_, 1, 1, 0)
        self.tie = nn.Sequential(
            nn.BatchNorm2d(c_ * 2),
            nn.LeakyReLU(),
            nn.Conv2d(c_ * 2, c2, 1, 1, 0, bias=False)
        )
    
    def forward(self, x):
        total = torch.cat([self.up(x), self.bottom(x)], dim=1)
        out = self.tie(total)
        return out

CSPDarknet 主幹網絡構建

class CSPDarkNet(nn.Module):
    
    def __init__(self, gd=0.33, gw=0.5):
        super(CSPDarkNet, self).__init__()
        self.truck_big = nn.Sequential(
            Focus(3, 64, e=gw),
            CBL(64, 128, k=3, s=2, p=1, e=gw),
            CSP1_n(128, 128, n=3, e=[gd, gw]),
            CBL(128, 256, k=3, s=2, p=1, e=gw),
            CSP1_n(256, 256, n=9, e=[gd, gw]),
    
        )
        self.truck_middle = nn.Sequential(
            CBL(256, 512, k=3, s=2, p=1, e=gw),
            CSP1_n(512, 512, n=9, e=[gd, gw]),
        )
        self.truck_small = nn.Sequential(
            CBL(512, 1024, k=3, s=2, p=1, e=gw),
            SPP(1024, 1024, e=gw)
        )
    
    def forward(self, x):
        h_big = self.truck_big(x)  
        h_middle = self.truck_middle(h_big)
        h_small = self.truck_small(h_middle)
        return h_big, h_middle, h_small

整體網絡構建

class yolov5(nn.Module):
    
    def __init__(self, nc=80, gd=0.33, gw=0.5):
        super(yolov5, self).__init__()
        # ------------------------------Backbone------------------------------------
        self.backbone = darknet53(gd, gw, None)
    
        # ------------------------------Neck------------------------------------
        self.neck_small = nn.Sequential(
            CSP1_n(1024, 1024, n=3, e=[gd, gw]),
            CBL(1024, 512, 1, 1, 0, e=gw)
        )
        # FPN：2次上採樣 自頂而下 完成語義信息增強
        self.up_middle = nn.Sequential(
            UpSample()
        )
        self.out_set_middle = nn.Sequential(
            CSP1_n(1024, 512, n=3, e=[gd, gw]),
            CBL(512, 256, 1, 1, 0, e=gw),
        )
        self.up_big = nn.Sequential(
            UpSample()
        )
        self.out_set_tie_big = nn.Sequential(
            CSP1_n(512, 256, n=3, e=[gd, gw])
        )
    
        # PAN：2次下采樣 自底而上 完成位置信息增強
        self.pan_middle = nn.Sequential(
            CBL(256, 256, 3, 2, 1, e=gw)
        )
        self.out_set_tie_middle = nn.Sequential(
            CSP1_n(512, 512, n=3, e=[gd, gw])
        )
        self.pan_small = nn.Sequential(
            CBL(512, 512, 3, 2, 1, e=gw)
        )
        self.out_set_tie_small = nn.Sequential(
            # CSP2_n(512, 512)
            CSP1_n(1024, 1024, n=3, e=[gd, gw])
        )
        # ------------------------------Prediction------------------------------------
        # prediction
        big_ = round(256 * gw)
        middle = round(512 * gw)
        small_ = round(1024 * gw)
        self.out_big = nn.Sequential(
            nn.Conv2d(big_, 3 * (5 + nc), 1, 1, 0)
        )
        self.out_middle = nn.Sequential(
            nn.Conv2d(middle, 3 * (5 + nc), 1, 1, 0)
        )
        self.out_small = nn.Sequential(
            nn.Conv2d(small_, 3 * (5 + nc), 1, 1, 0)
        )
    
    def forward(self, x):
        h_big, h_middle, h_small = self.backbone(x)
        neck_small = self.neck_small(h_small)  
        # ----------------------------up sample 38*38--------------------------------
        up_middle = self.up_middle(neck_small)
        middle_cat = torch.cat([up_middle, h_middle], dim=1)
        out_set_middle = self.out_set_middle(middle_cat)
    
        # ----------------------------up sample 76*76--------------------------------
        up_big = self.up_big(out_set_middle)  # torch.Size([2, 128, 76, 76])
        big_cat = torch.cat([up_big, h_big], dim=1)
        out_set_tie_big = self.out_set_tie_big(big_cat)
    
        # ----------------------------PAN 36*36-------------------------------------
        neck_tie_middle = torch.cat([self.pan_middle(out_set_tie_big), out_set_middle], dim=1)
        up_middle = self.out_set_tie_middle(neck_tie_middle)
    
        # ----------------------------PAN 18*18-------------------------------------
        neck_tie_small = torch.cat([self.pan_small(up_middle), neck_small], dim=1)
        out_set_small = self.out_set_tie_small(neck_tie_small)
    
        # ----------------------------prediction-------------------------------------
        out_small = self.out_small(out_set_small)
        out_middle = self.out_middle(up_middle)
        out_big = self.out_big(out_set_tie_big)
    
        return out_small, out_middle, out_big

四種尺寸的配置文件的寫法，放在了 config 字典中，這是網絡模型的配置參數，沒有將其他的參數放到配置文件中，可以將類別也放到配置文件中。在上面的網絡代碼中寬度參數就是變量e然後傳入到每個網絡中去。

config = {
        #            gd    gw
        'yolov5s': [0.33, 0.50],
        'yolov5m': [0.67, 0.75],
        'yolov5l': [1.00, 1.00],
        'yolov5x': [1.33, 1.25]
    }
    # 修改一次文件名字
    net_size = config['yolov5x']
    net = yolov5(nc=80, gd=net_size[0], gw=net_size[1])

v5 原始代碼將 v3 中的 Head 部分單獨寫成了一個 Detect 類，主要的原因是因爲 v5 中使用了一些訓練的技巧，在 Detect 中有訓練和兩個部分，v5 原始代碼對於初學者來說是比較困難的，首先網絡的寫法，對於編碼的能力要求是相對比較高的。不過這種 yaml 配置文件來對網絡進行配置的方法在很多公司已經開始使用，這可能是未來工程話代碼的一個寫法，還是需要掌握這種寫法的。

三、總結

我個人的感覺是對於這種網絡的設計還有代碼的寫法要有天馬行空的想象力，代碼寫起來也像武俠小說中那種飄逸感。（網絡結構圖，網上有很多，我是仿照這江大白的結構圖，在其結構圖的基礎上並與最新的 v5 代碼的基礎上進行了調整）。
最新的 v5 網絡結構中出現了Transformer結構，有種 CV 領域工程化上要變天的節奏，大家可以去了解一些。

歡迎關注 GiantPandaCV, 在這裏你將看到獨家的深度學習分享，堅持原創，每天分享我們學習到的新鮮知識。(• ̀ω•́)✧

有對文章相關的問題，或者想要加入交流羣，歡迎添加 BBuf 微信：

本文由 Readfog 進行 AMP 轉碼，版權歸原作者所有。
來源：https://mp.weixin.qq.com/s/yhlAmIU2v4rydUw_qTQtWg

一、網絡完整代碼

二、網絡結構的解析

三、總結

猜你喜歡