最近开始使用Keras来做深度学习,发现模型搭建相较于MXnet, Caffe等确实比较方便,适合于新手练手,于是找来了目标检测经典的模型Faster-RCNN的keras代码来练练手,代码的主题部分转自知乎专栏Learning Machine,作者张潇捷,链接如下:
keras版faster-rcnn算法详解(1.RPN计算)
keras版faster-rcnn算法详解 (2.roi计算及其他)
我再对代码中loss的计算,config的设置等细节进行学习
Keras版Faster-RCNN代码学习(IOU,RPN)1
Keras版Faster-RCNN代码学习(Batch Normalization)2
Keras版Faster-RCNN代码学习(loss,xml解析)3
Keras版Faster-RCNN代码学习(roipooling resnet/vgg)4
Keras版Faster-RCNN代码学习(measure_map,train/test)5
config.py
from keras import backend as K
import math
class Config:
def __init__(self):
self.verbose = True
self.network = ‘resnet50‘
# setting for data augmentation
self.use_horizontal_flips = False
self.use_vertical_flips = False
self.rot_90 = False
# anchor box scales
self.anchor_box_scales = [128, 256, 512]
# anchor box ratios
self.anchor_box_ratios = [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]]
# size to resize the smallest side of the image
self.im_size = 600
# image channel-wise mean to subtract
self.img_channel_mean = [103.939, 116.779, 123.68]
self.img_scaling_factor = 1.0
# number of ROIs at once
self.num_rois = 4
# stride at the RPN (this depends on the network configuration)
self.rpn_stride = 16
self.balanced_classes = False
# scaling the stdev
self.std_scaling = 4.0
self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
# overlaps for RPN
self.rpn_min_overlap = 0.3
self.rpn_max_overlap = 0.7
# overlaps for classifier ROIs
self.classifier_min_overlap = 0.1
self.classifier_max_overlap = 0.5
# placeholder for the class mapping, automatically generated by the parser
self.class_mapping = None
#location of pretrained weights for the base network
# weight files can be found at:
# https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5
# https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
self.model_path = ‘model_frcnn.vgg.hdf5‘
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
对代码所需要的参数进行配置
data_generators.py
import cv2
import numpy as np
import copy
#传递图像参数,增广配置参数,是否进行图像增广
def augment(img_data, config, augment=True):
assert ‘filepath‘ in img_data
assert ‘bboxes‘ in img_data
assert ‘width‘ in img_data
assert ‘height‘ in img_data
img_data_aug = copy.deepcopy(img_data)
img = cv2.imread(img_data_aug[‘filepath‘])
if augment:
rows, cols = img.shape[:2]
#图像水平翻转,对应的bbox的对角坐标也进行水平翻转,翻转概率为50%
if config.use_horizontal_flips and np.random.randint(0, 2) == 0:
img = cv2.flip(img, 1)
for bbox in img_data_aug[‘bboxes‘]:
x1 = bbox[‘x1‘]
x2 = bbox[‘x2‘]
bbox[‘x2‘] = cols - x1
bbox[‘x1‘] = cols - x2
#图像垂直翻转,对应的bbox的对角坐标也进行垂直翻转,翻转概率为50%
if config.use_vertical_flips and np.random.randint(0, 2) == 0:
img = cv2.flip(img, 0)
for bbox in img_data_aug[‘bboxes‘]:
y1 = bbox[‘y1‘]
y2 = bbox[‘y2‘]
bbox[‘y2‘] = rows - y1
bbox[‘y1‘] = rows - y2
#图像按90度旋转,对应的bbox的对角坐标也进行90度旋转,旋转概率为50%
if config.rot_90:
angle = np.random.choice([0,90,180,270],1)[0]
if angle == 270:
img = np.transpose(img, (1,0,2))
img = cv2.flip(img, 0)
elif angle == 180:
img = cv2.flip(img, -1)
elif angle == 90:
img = np.transpose(img, (1,0,2))
img = cv2.flip(img, 1)
elif angle == 0:
pass
for bbox in img_data_aug[‘bboxes‘]:
x1 = bbox[‘x1‘]
x2 = bbox[‘x2‘]
y1 = bbox[‘y1‘]
y2 = bbox[‘y2‘]
if angle == 270:
bbox[‘x1‘] = y1
bbox[‘x2‘] = y2
bbox[‘y1‘] = cols - x2
bbox[‘y2‘] = cols - x1
elif angle == 180:
bbox[‘x2‘] = cols - x1
bbox[‘x1‘] = cols - x2
bbox[‘y2‘] = rows - y1
bbox[‘y1‘] = rows - y2
elif angle == 90:
bbox[‘x1‘] = rows - y2
bbox[‘x2‘] = rows - y1
bbox[‘y1‘] = x1
bbox[‘y2‘] = x2
elif angle == 0:
pass
img_data_aug[‘width‘] = img.shape[1]
img_data_aug[‘height‘] = img.shape[0]
return img_data_aug, img
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
关于坐标计算,还是有点绕的,可通过矩阵计算或者画图描述(画图描述比较清晰,对应对角坐标会发生变化,对角坐标为一个最小的,一个最大的)如[x1,y1,x2,y2],则x1 < x2,y1 < y2,面积为(x2-x1)(y2-y1),所以对角坐标翻转后的坐标并不是对角坐标,需要调整,即找到最小的x,y和最大的x,y。如[x1,y1,x2,y2],则x1 < x2,y1 < y2,进行水平翻转后,cols - x2 < cols - x1,y1 < y2,重新组合的坐标为[cols - x2,y1,cols - x1,y2],其他同理。
IOU,RPN计算
from __future__ import absolute_import
import numpy as np
import cv2
import random
import copy
from . import data_augment
import threading
import itertools
#并集
def union(au, bu, area_intersection):
area_a = (au[2] - au[0]) * (au[3] - au[1])
area_b = (bu[2] - bu[0]) * (bu[3] - bu[1])
area_union = area_a + area_b - area_intersection
return area_union
#交集
def intersection(ai, bi):
x = max(ai[0], bi[0])
y = max(ai[1], bi[1])
w = min(ai[2], bi[2]) - x
h = min(ai[3], bi[3]) - y
if w < 0 or h < 0:
return 0
return w*h
#交并比
def iou(a, b):
# a and b should be (x1,y1,x2,y2)
if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:
return 0.0
area_i = intersection(a, b)
area_u = union(a, b, area_i)
return float(area_i) / float(area_u + 1e-6)
#图像resize
def get_new_img_size(width, height, img_min_side=600):
if width <= height:
f = float(img_min_side) / width
resized_height = int(f * height)
resized_width = img_min_side
else:
f = float(img_min_side) / height
resized_width = int(f * width)
resized_height = img_min_side
return resized_width, resized_height
class SampleSelector:
def __init__(self, class_count):
# ignore classes that have zero samples
self.classes = [b for b in class_count.keys() if class_count[b] > 0]
self.class_cycle = itertools.cycle(self.classes)
self.curr_class = next(self.class_cycle)
def skip_sample_for_balanced_class(self, img_data):
class_in_img = False
for bbox in img_data[‘bboxes‘]:
cls_name = bbox[‘class‘]
if cls_name == self.curr_class:
class_in_img = True
self.curr_class = next(self.class_cycle)
break
if class_in_img:
return False
else:
return True
def calc_rpn(C, img_data, width, height, resized_width, resized_height, img_length_calc_function):
downscale = float(C.rpn_stride)
anchor_sizes = C.anchor_box_scales
anchor_ratios = C.anchor_box_ratios
num_anchors = len(anchor_sizes) * len(anchor_ratios)
# calculate the output map size based on the network architecture
(output_width, output_height) = img_length_calc_function(resized_width, resized_height)
n_anchratios = len(anchor_ratios)
# initialise empty output objectives
y_rpn_overlap = np.zeros((output_height, output_width, num_anchors))
y_is_box_valid = np.zeros((output_height, output_width, num_anchors))
y_rpn_regr = np.zeros((output_height, output_width, num_anchors * 4))
num_bboxes = len(img_data[‘bboxes‘])
num_anchors_for_bbox = np.zeros(num_bboxes).astype(int)
best_anchor_for_bbox = -1*np.ones((num_bboxes, 4)).astype(int)
best_iou_for_bbox = np.zeros(num_bboxes).astype(np.float32)
best_x_for_bbox = np.zeros((num_bboxes, 4)).astype(int)
best_dx_for_bbox = np.zeros((num_bboxes, 4)).astype(np.float32)
# get the GT box coordinates, and resize to account for image resizing
gta = np.zeros((num_bboxes, 4))
for bbox_num, bbox in enumerate(img_data[‘bboxes‘]):
# get the GT box coordinates, and resize to account for image resizing
gta[bbox_num, 0] = bbox[‘x1‘] * (resized_width / float(width))
gta[bbox_num, 1] = bbox[‘x2‘] * (resized_width / float(width))
gta[bbox_num, 2] = bbox[‘y1‘] * (resized_height / float(height))
gta[bbox_num, 3] = bbox[‘y2‘] * (resized_height / float(height))
# rpn ground truth
for anchor_size_idx in range(len(anchor_sizes)):
for anchor_ratio_idx in range(n_anchratios):
anchor_x = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][0]
anchor_y = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][1]
for ix in range(output_width):
# x-coordinates of the current anchor box
x1_anc = downscale * (ix + 0.5) - anchor_x / 2
x2_anc = downscale * (ix + 0.5) + anchor_x / 2
# ignore boxes that go across image boundaries
if x1_anc < 0 or x2_anc > resized_width:
continue
for jy in range(output_height):
# y-coordinates of the current anchor box
y1_anc = downscale * (jy + 0.5) - anchor_y / 2
y2_anc = downscale * (jy + 0.5) + anchor_y / 2
# ignore boxes that go across image boundaries
if y1_anc < 0 or y2_anc > resized_height:
continue
# bbox_type indicates whether an anchor should be a target
bbox_type = ‘neg‘
# this is the best IOU for the (x,y) coord and the current anchor
# note that this is different from the best IOU for a GT bbox
best_iou_for_loc = 0.0
for bbox_num in range(num_bboxes):
# get IOU of the current GT box and the current anchor box
curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1_anc, y1_anc, x2_anc, y2_anc])
# calculate the regression targets if they will be needed
if curr_iou > best_iou_for_bbox[bbox_num] or curr_iou > C.rpn_max_overlap:
cx = (gta[bbox_num, 0] + gta[bbox_num, 1]) / 2.0
cy = (gta[bbox_num, 2] + gta[bbox_num, 3]) / 2.0
cxa = (x1_anc + x2_anc)/2.0
cya = (y1_anc + y2_anc)/2.0
tx = (cx - cxa) / (x2_anc - x1_anc)
ty = (cy - cya) / (y2_anc - y1_anc)
tw = np.log((gta[bbox_num, 1] - gta[bbox_num, 0]) / (x2_anc - x1_anc))
th = np.log((gta[bbox_num, 3] - gta[bbox_num, 2]) / (y2_anc - y1_anc))
if img_data[‘bboxes‘][bbox_num][‘class‘] != ‘bg‘:
# all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best
if curr_iou > best_iou_for_bbox[bbox_num]:
best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx]
best_iou_for_bbox[bbox_num] = curr_iou
best_x_for_bbox[bbox_num,:] = [x1_anc, x2_anc, y1_anc, y2_anc]
best_dx_for_bbox[bbox_num,:] = [tx, ty, tw, th]
# we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap)
if curr_iou > C.rpn_max_overlap:
bbox_type = ‘pos‘
num_anchors_for_bbox[bbox_num] += 1
# we update the regression layer target if this IOU is the best for the current (x,y) and anchor position
if curr_iou > best_iou_for_loc:
best_iou_for_loc = curr_iou
best_regr = (tx, ty, tw, th)
# if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective
if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap:
# gray zone between neg and pos
if bbox_type != ‘pos‘:
bbox_type = ‘neutral‘
# turn on or off outputs depending on IOUs
if bbox_type == ‘neg‘:
y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
elif bbox_type == ‘neutral‘:
y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0
elif bbox_type == ‘pos‘:
y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1
start = 4 * (anchor_ratio_idx + n_anchratios * anchor_size_idx)
y_rpn_regr[jy, ix, start:start+4] = best_regr
# we ensure that every bbox has at least one positive RPN region
for idx in range(num_anchors_for_bbox.shape[0]):
if num_anchors_for_bbox[idx] == 0:
# no box with an IOU greater than zero ...
if best_anchor_for_bbox[idx, 0] == -1:
continue
y_is_box_valid[
best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios *
best_anchor_for_bbox[idx,3]] = 1
y_rpn_overlap[
best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios *
best_anchor_for_bbox[idx,3]] = 1
start = 4 * (best_anchor_for_bbox[idx,2] + n_anchratios * best_anchor_for_bbox[idx,3])
y_rpn_regr[
best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], start:start+4] = best_dx_for_bbox[idx, :]
y_rpn_overlap = np.transpose(y_rpn_overlap, (2, 0, 1))
y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0)
y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1))
y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0)
y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1))
y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0)
pos_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 1, y_is_box_valid[0, :, :, :] == 1))
neg_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 0, y_is_box_valid[0, :, :, :] == 1))
num_pos = len(pos_locs[0])
# one issue is that the RPN has many more negative than positive regions, so we turn off some of the negative
# regions. We also limit it to 256 regions.
num_regions = 256
if len(pos_locs[0]) > num_regions/2:
val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions/2)
y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0
num_pos = num_regions/2
if len(neg_locs[0]) + num_pos > num_regions:
val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos)
y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0
y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=1)
y_rpn_regr = np.concatenate([np.repeat(y_rpn_overlap, 4, axis=1), y_rpn_regr], axis=1)
return np.copy(y_rpn_cls), np.copy(y_rpn_regr)
class threadsafe_iter:
"""Takes an iterator/generator and makes it thread-safe by
serializing call to the `next` method of given iterator/generator.
"""
def __init__(self, it):
self.it = it
self.lock = threading.Lock()
def __iter__(self):
return self
def next(self):
with self.lock:
return next(self.it)
def threadsafe_generator(f):
"""A decorator that takes a generator function and makes it thread-safe.
"""
def g(*a, **kw):
return threadsafe_iter(f(*a, **kw))
return g
def get_anchor_gt(all_img_data, class_count, C, img_length_calc_function, backend, mode=‘train‘):
# The following line is not useful with Python 3.5, it is kept for the legacy
# all_img_data = sorted(all_img_data)
sample_selector = SampleSelector(class_count)
while True:
if mode == ‘train‘:
np.random.shuffle(all_img_data)
for img_data in all_img_data:
try:
if C.balanced_classes and sample_selector.skip_sample_for_balanced_class(img_data):
continue
# read in image, and optionally add augmentation
if mode == ‘train‘:
img_data_aug, x_img = data_augment.augment(img_data, C, augment=True)
else:
img_data_aug, x_img = data_augment.augment(img_data, C, augment=False)
(width, height) = (img_data_aug[‘width‘], img_data_aug[‘height‘])
(rows, cols, _) = x_img.shape
assert cols == width
assert rows == height
# get image dimensions for resizing
(resized_width, resized_height) = get_new_img_size(width, height, C.im_size)
# resize the image so that smalles side is length = 600px
x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC)
try:
y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function)
except:
continue
# Zero-center by mean pixel, and preprocess image
x_img = x_img[:,:, (2, 1, 0)] # BGR -> RGB
x_img = x_img.astype(np.float32)
x_img[:, :, 0] -= C.img_channel_mean[0]
x_img[:, :, 1] -= C.img_channel_mean[1]
x_img[:, :, 2] -= C.img_channel_mean[2]
x_img /= C.img_scaling_factor
x_img = np.transpose(x_img, (2, 0, 1))
x_img = np.expand_dims(x_img, axis=0)
y_rpn_regr[:, y_rpn_regr.shape[1]//2:, :, :] *= C.std_scaling
if backend == ‘tf‘:
x_img = np.transpose(x_img, (0, 2, 3, 1))
y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1))
y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1))
yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug
except Exception as e:
print(e)
continue
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
- 169
- 170
- 171
- 172
- 173
- 174
- 175
- 176
- 177
- 178
- 179
- 180
- 181
- 182
- 183
- 184
- 185
- 186
- 187
- 188
- 189
- 190
- 191
- 192
- 193
- 194
- 195
- 196
- 197
- 198
- 199
- 200
- 201
- 202
- 203
- 204
- 205
- 206
- 207
- 208
- 209
- 210
- 211
- 212
- 213
- 214
- 215
- 216
- 217
- 218
- 219
- 220
- 221
- 222
- 223
- 224
- 225
- 226
- 227
- 228
- 229
- 230
- 231
- 232
- 233
- 234
- 235
- 236
- 237
- 238
- 239
- 240
- 241
- 242
- 243
- 244
- 245
- 246
- 247
- 248
- 249
- 250
- 251
- 252
- 253
- 254
- 255
- 256
- 257
- 258
- 259
- 260
- 261
- 262
- 263
- 264
- 265
- 266
- 267
- 268
- 269
- 270
- 271
- 272
- 273
- 274
- 275
- 276
- 277
- 278
- 279
- 280
- 281
- 282
- 283
- 284
- 285
- 286
- 287
- 288
- 289
- 290
- 291
- 292
- 293
- 294
- 295
- 296
- 297
- 298
- 299
- 300
- 301
- 302
- 303
- 304
- 305
- 306
- 307
- 308
- 309
- 310
- 311
- 312
- 313
- 314
- 315
- 316
- 317
- 318
- 319
- 320
- 321
- 322
- 323
- 324
- 325
- 326
- 327
- 328
- 329
- 330
- 331
- 332
- 333
- 334
- 335
- 336
- 337
- 338
关于RPN计算部分,参考keras版faster-rcnn算法详解(1.RPN计算)
原文地址:https://www.cnblogs.com/kekexuanxaun/p/9459368.html