该篇陈述了在采用imagenet大数据集合上使用caffenet预训练得到caffemodel,然后应用该caffemodel进一步fintuning图像风格数据库style。下面为主要步骤:
#采用别人的预训练模型,在自己的数据库上进行微调(fine-tunning)
#fine-tune是应用别人在大数据集合上训练到一定程度的caffemodel,在这进行微调。这比随机化参数要好,因为该模型可能已经接近最优!
#可以省时间,省资源。也可以克服没有大数据的困扰
#这里采用imagenet数据集上预训练模型caffemodel进行fine-tuning style recognition
#第一步,加载caffe相关模块,准备数据,主要是style数据集合
caffe_root=‘../../‘ #这里依据自己工程所在的地址,将地址转为caffe根目录。我的地址是caffe/example/test,所以是../../
import sys
#定义了caffe的python接口路径
sys.path.insert(0,caffe_root + ‘python‘)
import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
#加载相关模块
import numpy as np
from pylab import *
%matplotlib inline
import tempfile
#定义图像预处理函数
def deprocess_net_image(image):
image = image.copy()
image = image[::-1] #BGR->RGB
image = image.transpose(1, 2, 0)
image += [123, 117, 104]
image[image < 0], image[image > 255] = 0,255
image = np.round(image)
image = np.require(image, dtype=np.uint8)
return image
#第二步,下载数据集,在80K的style中下载2000张,20种风格中的五种标签,加入下载全部,full_dataset=True
#下载imagenet的mean文件,预训练模型caffemodel等
full_dataset = False
if full_dataset:
NUM_STYLE_IAMGES = NUM_STYLE_LABELS = -1
else:
NUM_STYLE_IMAGES = 2000
NUM_STYLE_LABELS = 5
import os
#change direction=chdir
#os.chdir(caffe_root)
#!data/ilsvrc12/get_ilsvrc_aux.sh
#!scripts/download_model_binary.py models/bvlc_reference_caffenet
#!python examples/finetune_flickr_style/assemble_data.py \
# --workers=-1 --seed=1701 \
# --images=$NUM_STYLE_IMAGES --label=$NUM_STYLE_LABELS
#os.chdir(‘examples‘)
#定义参数,也即定义预训练模型的路径
import os
weights = caffe_root + ‘models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel‘
assert os.path.exists(weights)
#加载imagenet标签
imagenet_label_file = caffe_root + ‘data/ilsvrc12/synset_words.txt‘
imagenet_labels = list(np.loadtxt(imagenet_label_file,str,delimiter=‘\t‘))
assert len(imagenet_labels) == 1000
print ‘loaded imagenet labels:\n‘,‘\n‘.join(imagenet_labels[:10]+[‘...‘])
#加载风格标签
style_label_file = caffe_root + ‘examples/finetune_flickr_style/style_names.txt‘
style_labels = list(np.loadtxt(style_label_file,str,delimiter=‘\n‘))
if NUM_STYLE_LABELS > 0:
style_labels = style_labels[:NUM_STYLE_LABELS]
print ‘\nLoaded style labels:\n‘,‘,‘.join(style_labels)
loaded imagenet labels:
n01440764 tench, Tinca tinca
n01443537 goldfish, Carassius auratus
n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
n01491361 tiger shark, Galeocerdo cuvieri
n01494475 hammerhead, hammerhead shark
n01496331 electric ray, crampfish, numbfish, torpedo
n01498041 stingray
n01514668 cock
n01514859 hen
n01518878 ostrich, Struthio camelus
…
Loaded style labels:
Detailed,Pastel,Melancholy,Noir,HDR
#定义网络,训练
from caffe import layers as L
from caffe import params as P
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param = dict(lr_mult=2,decay_mult=0)
learned_param = [weight_param, bias_param]
frozen_param = [dict(lr_mult=0)]*2
#这里需要将参数filter全部改成filler,官网有错误!
def conv_relu(bottom,ks,nout,stride=1,pad=0,group=1,param=learned_param,weight_filler=dict(type=‘gaussian‘,std=0.01),bias_filler=dict(type=‘constant‘,value=0.1)):
conv=L.Convolution(bottom,kernel_size=ks,stride=stride,num_output=nout,pad=pad,group=group,param=param,weight_filler=weight_filler,bias_filler=bias_filler)
return conv,L.ReLU(conv,in_place=True)
def fc_relu(bottom, nout, param=learned_param,weight_filler=dict(type=‘gaussian‘, std=0.005),bias_filler=dict(type=‘constant‘, value=0.1)):
fc = L.InnerProduct(bottom, num_output=nout, param=param,
weight_filler=weight_filler,
bias_filler=bias_filler)
return fc, L.ReLU(fc, in_place=True)
def max_pool(bottom, ks, stride=1):
return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def caffenet(data, label=None, train=True, num_classes=1000,classifier_name=‘fc8‘, learn_all=False):
#Returns a NetSpec specifying CaffeNet, following the original proto text
#specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
#详细的定义网络
n = caffe.NetSpec()
n.data = data
param = learned_param if learn_all else frozen_param
n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, param=param)
n.pool1 = max_pool(n.relu1, 3, stride=2)
n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75)
n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2, param=param)
n.pool2 = max_pool(n.relu2, 3, stride=2)
n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75)
n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1, param=param)
n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2, param=param)
n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2, param=param)
n.pool5 = max_pool(n.relu5, 3, stride=2)
n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=param)
if train:
n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True)
else:
fc7input = n.relu6
n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=param)
if train:
n.drop7 = fc8input = L.Dropout(n.relu7, in_place=True)
else:
fc8input = n.relu7
# always learn fc8 (param=learned_param)
fc8 = L.InnerProduct(fc8input, num_output=num_classes, param=learned_param)
# give fc8 the name specified by argument `classifier_name`
n.__setattr__(classifier_name, fc8)
if not train:
n.probs = L.Softmax(fc8)
if label is not None:
n.label = label
n.loss = L.SoftmaxWithLoss(fc8, n.label)
n.acc = L.Accuracy(fc8, n.label)
# write the net to a temporary file and return its filename
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(str(n.to_proto()))
return f.name
#将不带标签的dummy data作为输入,看输出
dummy_data = L.DummyData(shape=dict(dim=[1,3,227,227]))
imagenet_net_filename = caffenet(data=dummy_data,train=False)
imagenet_net = caffe.Net(imagenet_net_filename,weights,caffe.TEST)
#定义风格style的网络
def style_net(train=True, learn_all=False, subset=None):
if subset is None:
subset = ‘train‘ if train else ‘test‘
source = caffe_root + ‘data/flickr_style/%s.txt‘ % subset
transform_param = dict(mirror=train, crop_size=227,
mean_file=caffe_root + ‘data/ilsvrc12/imagenet_mean.binaryproto‘)
style_data, style_label = L.ImageData(
transform_param=transform_param, source=source,
batch_size=50, new_height=256, new_width=256, ntop=2)
return caffenet(data=style_data, label=style_label, train=train,
num_classes=NUM_STYLE_LABELS,
classifier_name=‘fc8_flickr‘,
learn_all=learn_all)
untrained_style_net = caffe.Net(style_net(train=False, subset=‘train‘),
weights, caffe.TEST)
untrained_style_net.forward()
style_data_batch = untrained_style_net.blobs[‘data‘].data.copy()
style_label_batch = np.array(untrained_style_net.blobs[‘label‘].data, dtype=np.int32)
#定义随机初始化参数的预测输出、imagenet的预测输出和采用了预训练模型的style网络预测输出
def disp_preds(net, image, labels, k=5, name=‘ImageNet‘):
input_blob = net.blobs[‘data‘]
net.blobs[‘data‘].data[0, ...] = image
probs = net.forward(start=‘conv1‘)[‘probs‘][0]
top_k = (-probs).argsort()[:k]
print ‘top %d predicted %s labels =‘ % (k, name)
print ‘\n‘.join(‘\t(%d) %5.2f%% %s‘ % (i+1, 100*probs[p], labels[p])
for i, p in enumerate(top_k))
def disp_imagenet_preds(net, image):
disp_preds(net, image, imagenet_labels, name=‘ImageNet‘)
def disp_style_preds(net, image):
disp_preds(net, image, style_labels, name=‘style‘)
batch_index = 23
image = style_data_batch[batch_index]
plt.imshow(deprocess_net_image(image))
print ‘actual label =‘, style_labels[style_label_batch[batch_index]]
actual label = Pastel
#预测错误,可能imagenet的1000类中可能该类
disp_imagenet_preds(imagenet_net,image)
top 5 predicted ImageNet labels =
(1) 7.01% n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier
(2) 4.90% n03544143 hourglass
(3) 4.36% n03584829 iron, smoothing iron
(4) 3.48% n04517823 vacuum, vacuum cleaner
(5) 2.85% n04317175 stethoscope
#也可以通过fc7来预测结果
diff = untrained_style_net.blobs[‘fc7‘].data[0] - imagenet_net.blobs[‘fc7‘].data[0]
error = (diff ** 2).sum()
#assert error < 1e-8
#删除untrained_style_net来节省内存
del untrained_style_net
#定义网络的solver文件,该文件主要用来定义训练时候的参数
from caffe.proto import caffe_pb2
def solver(train_net_path, test_net_path=None, base_lr=0.001):
s = caffe_pb2.SolverParameter()
# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
if test_net_path is not None:
s.test_net.append(test_net_path)
s.test_interval = 1000 # Test after every 1000 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.
# The number of iterations over which to average the gradient.
# Effectively boosts the training batch size by the given factor, without
# affecting memory utilization.
s.iter_size = 1
s.max_iter = 100000 # # of times to update the net (training iterations)
# Solve using the stochastic gradient descent (SGD) algorithm.
# Other choices include ‘Adam‘ and ‘RMSProp‘.
s.type = ‘SGD‘
# Set the initial learning rate for SGD.
s.base_lr = base_lr
# Set `lr_policy` to define how the learning rate changes during training.
# Here, we ‘step‘ the learning rate by multiplying it by a factor `gamma`
# every `stepsize` iterations.
s.lr_policy = ‘step‘
s.gamma = 0.1
s.stepsize = 20000
# Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help prevent
# the model from overfitting.
s.momentum = 0.9
s.weight_decay = 5e-4
# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000
# Snapshots are files used to store networks we‘ve trained. Here, we‘ll
# snapshot every 10K iterations -- ten times during training.
s.snapshot = 10000
s.snapshot_prefix = caffe_root + ‘models/finetune_flickr_style/finetune_flickr_style‘
# Train on the GPU. Using the CPU to train large networks is very slow.
s.solver_mode = caffe_pb2.SolverParameter.GPU
# Write the solver to a temporary file and return its filename.
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(str(s))
return f.name
#这里,当网络和solver文件都定义好了之后,可以在终端输入指令直接进行训练,指令为:
#build/tools/caffe train -solver models/finetune_flickr_style/solver.prototxt \
# -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel \
# -gpu 0
#该例程采用python接口
def run_solvers(niter, solvers, disp_interval=10):
"""Run solvers for niter iterations,
returning the loss and accuracy recorded each iteration.
`solvers` is a list of (name, solver) tuples."""
#应用blobs类来存储loss和acc
blobs = (‘loss‘, ‘acc‘)
loss, acc = ({name: np.zeros(niter) for name, _ in solvers}
for _ in blobs)
for it in range(niter):
for name, s in solvers:
s.step(1) # run a single SGD step in Caffe
loss[name][it], acc[name][it] = (s.net.blobs[b].data.copy()
for b in blobs)
if it % disp_interval == 0 or it + 1 == niter:
loss_disp = ‘; ‘.join(‘%s: loss=%.3f, acc=%2d%%‘ %
(n, loss[n][it], np.round(100*acc[n][it]))
for n, _ in solvers)
print ‘%3d) %s‘ % (it, loss_disp)
# Save the learned weights from both nets.
weight_dir = tempfile.mkdtemp()
weights = {}
for name, s in solvers:
filename = ‘weights.%s.caffemodel‘ % name
weights[name] = os.path.join(weight_dir, filename)
s.net.save(weights[name])
return loss, acc, weights
#开始训练
niter = 400 # number of iterations to train
# Reset style_solver as before.
#风格style网络
style_solver_filename = solver(style_net(train=True))
style_solver = caffe.get_solver(style_solver_filename)
style_solver.net.copy_from(weights)
# For reference, we also create a solver that isn‘t initialized from
# the pretrained ImageNet weights.
#定义随机scratch网络
scratch_style_solver_filename = solver(style_net(train=True))
scratch_style_solver = caffe.get_solver(scratch_style_solver_filename)
print ‘Running solvers for %d iterations...‘ % niter
solvers = [(‘pretrained‘, style_solver),
(‘scratch‘, scratch_style_solver)]
loss, acc, weights = run_solvers(niter, solvers)
print ‘Done.‘
train_loss, scratch_train_loss = loss[‘pretrained‘], loss[‘scratch‘]
train_acc, scratch_train_acc = acc[‘pretrained‘], acc[‘scratch‘]
style_weights, scratch_style_weights = weights[‘pretrained‘], weights[‘scratch‘]
# Delete solvers to save memory.
del style_solver, scratch_style_solver, solvers
#绘制采用预训练模型的loss函数曲线和采用随机参数模型的loss曲线
plot(np.vstack([train_loss, scratch_train_loss]).T)
xlabel(‘Iteration #‘)
ylabel(‘Loss‘)
#绘制两个网络(预训练模型的网络和随机参数的网络精度曲线)
plot(np.vstack([train_acc, scratch_train_acc]).T)
xlabel(‘Iteration #‘)
ylabel(‘Accuracy‘)
#查看两个网络的测试精度
def eval_style_net(weights, test_iters=10):
test_net = caffe.Net(style_net(train=False), weights, caffe.TEST)
accuracy = 0
for it in xrange(test_iters):
accuracy += test_net.forward()[‘acc‘]
accuracy /= test_iters
return test_net, accuracy
test_net, accuracy = eval_style_net(style_weights)
print ‘Accuracy, trained from ImageNet initialization: %3.1f%%‘ % (100*accuracy, )
scratch_test_net, scratch_accuracy = eval_style_net(scratch_style_weights)
print ‘Accuracy, trained from random initialization: %3.1f%%‘ % (100*scratch_accuracy, )
Accuracy, trained from ImageNet initialization: 51.4%
Accuracy, trained from random initialization: 23.6%
#点对点网络,更加简单方便相比较上面
#fc8输出预测
end_to_end_net = style_net(train=True, learn_all=True)
# Set base_lr to 1e-3, the same as last time when learning only the classifier.
# You may want to play around with different values of this or other
# optimization parameters when fine-tuning. For example, if learning diverges
# (e.g., the loss gets very large or goes to infinity/NaN), you should try
# decreasing base_lr (e.g., to 1e-4, then 1e-5, etc., until you find a value
# for which learning does not diverge).
base_lr = 0.001
#这个是采用了caffemodel预训练模型
style_solver_filename = solver(end_to_end_net, base_lr=base_lr)
style_solver = caffe.get_solver(style_solver_filename)
style_solver.net.copy_from(style_weights)
#这个是随机参数网络
scratch_style_solver_filename = solver(end_to_end_net, base_lr=base_lr)
scratch_style_solver = caffe.get_solver(scratch_style_solver_filename)
scratch_style_solver.net.copy_from(scratch_style_weights)
print ‘Running solvers for %d iterations...‘ % niter
solvers = [(‘pretrained, end-to-end‘, style_solver),
(‘scratch, end-to-end‘, scratch_style_solver)]
_, _, finetuned_weights = run_solvers(niter, solvers)
print ‘Done.‘
style_weights_ft = finetuned_weights[‘pretrained, end-to-end‘]
scratch_style_weights_ft = finetuned_weights[‘scratch, end-to-end‘]
# Delete solvers to save memory.
del style_solver, scratch_style_solver, solvers
Running solvers for 400 iterations…
0) pretrained, end-to-end: loss=0.734, acc=64%; scratch, end-to-end: loss=1.583, acc=28%
10) pretrained, end-to-end: loss=1.255, acc=62%; scratch, end-to-end: loss=1.632, acc=14%
20) pretrained, end-to-end: loss=0.873, acc=66%; scratch, end-to-end: loss=1.626, acc=12%
30) pretrained, end-to-end: loss=0.863, acc=70%; scratch, end-to-end: loss=1.587, acc=22%
40) pretrained, end-to-end: loss=0.752, acc=72%; scratch, end-to-end: loss=1.569, acc=26%
50) pretrained, end-to-end: loss=0.779, acc=70%; scratch, end-to-end: loss=1.596, acc=34%
60) pretrained, end-to-end: loss=0.789, acc=74%; scratch, end-to-end: loss=1.531, acc=32%
70) pretrained, end-to-end: loss=0.500, acc=76%; scratch, end-to-end: loss=1.549, acc=34%
80) pretrained, end-to-end: loss=0.792, acc=72%; scratch, end-to-end: loss=1.450, acc=42%
90) pretrained, end-to-end: loss=0.791, acc=72%; scratch, end-to-end: loss=1.482, acc=34%
100) pretrained, end-to-end: loss=0.582, acc=76%; scratch, end-to-end: loss=1.491, acc=32%
110) pretrained, end-to-end: loss=0.424, acc=84%; scratch, end-to-end: loss=1.621, acc=26%
120) pretrained, end-to-end: loss=0.457, acc=82%; scratch, end-to-end: loss=1.538, acc=28%
130) pretrained, end-to-end: loss=0.693, acc=70%; scratch, end-to-end: loss=1.513, acc=26%
140) pretrained, end-to-end: loss=0.481, acc=84%; scratch, end-to-end: loss=1.495, acc=30%
150) pretrained, end-to-end: loss=0.431, acc=80%; scratch, end-to-end: loss=1.462, acc=38%
160) pretrained, end-to-end: loss=0.422, acc=88%; scratch, end-to-end: loss=1.427, acc=34%
170) pretrained, end-to-end: loss=0.483, acc=76%; scratch, end-to-end: loss=1.618, acc=34%
180) pretrained, end-to-end: loss=0.357, acc=88%; scratch, end-to-end: loss=1.489, acc=34%
190) pretrained, end-to-end: loss=0.419, acc=84%; scratch, end-to-end: loss=1.440, acc=38%
200) pretrained, end-to-end: loss=0.538, acc=78%; scratch, end-to-end: loss=1.443, acc=32%
210) pretrained, end-to-end: loss=0.406, acc=86%; scratch, end-to-end: loss=1.696, acc=20%
220) pretrained, end-to-end: loss=0.366, acc=82%; scratch, end-to-end: loss=1.376, acc=40%
230) pretrained, end-to-end: loss=0.173, acc=92%; scratch, end-to-end: loss=1.483, acc=26%
240) pretrained, end-to-end: loss=0.258, acc=92%; scratch, end-to-end: loss=1.273, acc=46%
250) pretrained, end-to-end: loss=0.410, acc=82%; scratch, end-to-end: loss=1.364, acc=48%
260) pretrained, end-to-end: loss=0.335, acc=90%; scratch, end-to-end: loss=1.376, acc=34%
270) pretrained, end-to-end: loss=0.367, acc=80%; scratch, end-to-end: loss=1.424, acc=46%
280) pretrained, end-to-end: loss=0.374, acc=84%; scratch, end-to-end: loss=1.231, acc=48%
290) pretrained, end-to-end: loss=0.247, acc=90%; scratch, end-to-end: loss=1.235, acc=52%
300) pretrained, end-to-end: loss=0.317, acc=86%; scratch, end-to-end: loss=1.394, acc=34%
310) pretrained, end-to-end: loss=0.136, acc=96%; scratch, end-to-end: loss=1.284, acc=38%
320) pretrained, end-to-end: loss=0.308, acc=90%; scratch, end-to-end: loss=1.343, acc=42%
330) pretrained, end-to-end: loss=0.382, acc=82%; scratch, end-to-end: loss=1.675, acc=36%
340) pretrained, end-to-end: loss=0.209, acc=90%; scratch, end-to-end: loss=1.432, acc=42%
350) pretrained, end-to-end: loss=0.311, acc=88%; scratch, end-to-end: loss=1.251, acc=48%
360) pretrained, end-to-end: loss=0.325, acc=86%; scratch, end-to-end: loss=1.430, acc=36%
370) pretrained, end-to-end: loss=0.306, acc=88%; scratch, end-to-end: loss=1.462, acc=48%
380) pretrained, end-to-end: loss=0.345, acc=86%; scratch, end-to-end: loss=1.299, acc=46%
390) pretrained, end-to-end: loss=0.182, acc=96%; scratch, end-to-end: loss=1.303, acc=40%
399) pretrained, end-to-end: loss=0.242, acc=90%; scrat
#测试精度
test_net, accuracy = eval_style_net(style_weights_ft)
print ‘Accuracy, finetuned from ImageNet initialization: %3.1f%%‘ % (100*accuracy, )
scratch_test_net, scratch_accuracy = eval_style_net(scratch_style_weights_ft)
print ‘Accuracy, finetuned from random initialization: %3.1f%%‘ % (100*scratch_accuracy, )
Accuracy, finetuned from ImageNet initialization: 55.6%
Accuracy, finetuned from random initialization: 45.8%
#重新查看对图像的预测,可见经过训练99.96%预测为pastel,即模型更加自信的认为该图像是pastel。
plt.imshow(deprocess_net_image(image))
disp_style_preds(test_net, image)
top 5 predicted style labels =
(1) 99.96% Pastel
(2) 0.02% Melancholy
(3) 0.01% Detailed
(4) 0.01% Noir
(5) 0.00% HDR
#test网络进行测试
batch_index = 19
image = test_net.blobs[‘data‘].data[batch_index]
plt.imshow(deprocess_net_image(image))
print ‘actual label =‘, style_labels[int(test_net.blobs[‘label‘].data[batch_index])]
actual label = HDR
#显示预测五种风格的概率
disp_style_preds(test_net, image)
top 5 predicted style labels =
(1) 51.14% HDR
(2) 46.59% Melancholy
(3) 2.08% Pastel
(4) 0.14% Noir
(5) 0.06% Detailed
#随机网络对该副图像的预测
disp_style_preds(scratch_test_net, image)
top 5 predicted style labels =
(1) 61.20% HDR
(2) 14.25% Detailed
(3) 13.17% Melancholy
(4) 6.92% Pastel
(5) 4.46% Noir
#imagenet网络对于该图像的预测
disp_imagenet_preds(imagenet_net, image)
top 5 predicted ImageNet labels =
(1) 22.45% n04604644 worm fence, snake fence, snake-rail fence, Virginia fence
(2) 14.84% n09193705 alp
(3) 9.58% n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
(4) 8.13% n09468604 valley, vale
(5) 6.42% n02793495 barn