参考文章:c(15条消息) yolov5 anchors设置详解_高祥xiang的博客-CSDN博客https://blog.csdn.net/qq_27278957/article/details/120036450一、yolov7的默认锚框都在 .\cfg\training 的yaml文件里面,比如yolov7.yaml
# anchors
- [12,16, 19,36, 40,28] # P3/8
- [36,75, 76,55, 72,146] # P4/16
- [142,110, 192,243, 459,401] # P5/32
其中, 每一行代表应用不同的特征图;一行最大,二行中等,三行最小
具体代码在 ..\utils\autoanchor.py 文件里面,
def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
prefix = colorstr('autoanchor: ')
print(f'\n{prefix}Analyzing anchors... ', end='')
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1. / thr).float().mean() # best possible recall
return bpr, aat
anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors
bpr, aat = metric(anchors)
print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
if bpr < 0.98: # threshold to recompute
print('. Attempting to improve anchors, please wait...')
na = m.anchor_grid.numel() // 2 # number of anchors
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
except Exception as e:
print(f'{prefix}ERROR: {e}')
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference
m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
print('') # newline
其中的主要参数是 bpr和aat
其中bpr 参数就是判断是否需要重新计算锚定框的依据(是否小于 0.98)。
def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
""" Creates kmeans-evolved anchors from training dataset
path: path to dataset *.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
k: kmeans evolved anchors
from utils.autoanchor import *; _ = kmean_anchors()
thr = 1. / thr
prefix = colorstr('autoanchor: ')
def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k):
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
for i, x in enumerate(k):
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
return k
if isinstance(path, str): # *.yaml file
with open(path) as f:
data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict
from utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
dataset = path # dataset
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans calculation
print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
s = wh.std(0) # sigmas for whitening
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
k *= s
wh = torch.tensor(wh, dtype=torch.float32) # filtered
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
k = print_results(k)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
npr = np.random
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
return print_results(k)
对 kmean_anchors()函数中的参数做一下简单解释(代码中已经有了英文注释):
path:包含数据集文件路径等相关信息的 yaml 文件(比如 coco128.yaml), 或者 数据集张量(yolov5 自动计算锚定框时就是用的这种方式,先把数据集标签信息读取再处理)
img_size:图像尺寸。计算数据集样本标签框的宽高比时,是需要缩放到 img_size 大小后再计算的;默认值是640
thr:数据集中标注框宽高比最大阈值,默认是使用 超参文件 hyp.scratch.yaml 中的 “anchor_t” 参数值;默认值是4.0;自动计算时,会自动根据你所使用的数据集,来计算合适的阈值。
不自动计算锚框可以在训练时设置 default=False
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
import utils.autoanchor as autoAC
# 对数据集重新计算 anchors
new_anchors = autoAC.kmean_anchors('你自己数据集的配置文件', 9, 640, 5.0, 1000, True)
[[ 7.2368 6.6779]
[ 13.46 11.71]
[ 8.3749 21.515]
[ 24.094 20.985]
[ 56.135 21.595]
[ 38.372 38.616]
[ 89.473 37.31]
[ 79.463 74.843]
[ 160.38 110.27]]
# anchors
- [7.5,7, 13.5,12, 8.5,22] # P3/8
- [24.1,21, 56.2,22, 38.5,39] # P4/16
- [89.5,37.5, 79.5,75, 160.5,110.5] # P5/32
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████| 69/69 [00:27<00:00, 2.50it/s]
all 2189 73533 0.87 0.725 0.816 0.521
car 2189 42020 0.895 0.845 0.913 0.645
truck 2189 2204 0.917 0.793 0.84 0.555
van 2189 3346 0.836 0.642 0.742 0.555
bus 2189 2540 0.897 0.685 0.786 0.541
pedestrian 2189 7208 0.823 0.676 0.764 0.362
cyclist 2189 5484 0.831 0.738 0.83 0.496
tricyclist 2189 2152 0.906 0.712 0.831 0.513
motorcyclist 2189 8579 0.854 0.713 0.826 0.501
100 epochs completed in 4.091 hours.
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████| 69/69 [00:29<00:00, 2.36it/s]
all 2189 73533 0.866 0.72 0.814 0.52
car 2189 42020 0.888 0.841 0.912 0.644
truck 2189 2204 0.905 0.79 0.839 0.554
van 2189 3346 0.842 0.634 0.738 0.553
bus 2189 2540 0.894 0.675 0.783 0.54
pedestrian 2189 7208 0.814 0.67 0.761 0.362
cyclist 2189 5484 0.83 0.732 0.827 0.494
tricyclist 2189 2152 0.906 0.707 0.831 0.513
motorcyclist 2189 8579 0.848 0.709 0.823 0.499
Speed: 0.6/0.8/1.5 ms inference/NMS/total per 640x640 image at batch-size 32
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████| 69/69 [00:29<00:00, 2.36it/s]
all 2189 73533 0.85 0.707 0.792 0.509
car 2189 42020 0.868 0.834 0.888 0.633
truck 2189 2204 0.901 0.764 0.824 0.556
van 2189 3346 0.793 0.621 0.716 0.542
bus 2189 2540 0.927 0.642 0.763 0.537
pedestrian 2189 7208 0.794 0.662 0.741 0.353
cyclist 2189 5484 0.794 0.742 0.815 0.487
tricyclist 2189 2152 0.885 0.694 0.779 0.475
motorcyclist 2189 8579 0.834 0.701 0.807 0.487
Speed: 0.6/0.8/1.5 ms inference/NMS/total per 640x640 image at batch-size 32