1.导入CIFAR-10数据集
CIFAR-10是由 Hinton 的学生 Alex Krizhevsky 和 Ilya Sutskever 整理的一个用于识别普适物体的小型数据集。一共包含 10 个类别的 RGB 彩色图片:飞机( a叩lane )、汽车( automobile )、鸟类( bird )、猫( cat )、鹿( deer )、狗( dog )、蛙类( frog )、马( horse )、船( ship )和卡车( truck )。图片的尺寸为 32×32,3个通道 ,数据集中一共有 50000 张训练圄片和 10000 张测试图片。 CIFAR-10数据集有3个版本,这里使用python版本。
1.1 导入需要的库
1 2 3 4 5 6 7 |
import os import math import numpy as np import pickle as p import tensorflow as tf import matplotlib.pyplot as plt %matplotlib inline |
1.2 定义批量导入数据的函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
def load_CIFAR_batch(filename): """ load single batch of cifar """ with open(filename, 'rb')as f: # 一个样本由标签和图像数据组成 # (3072=32x32x3) # ... # data_dict = p.load(f, encoding='bytes') images= data_dict[b'data'] labels = data_dict[b'labels'] # 把原始数据结构调整为: BCWH images = images.reshape(10000, 3, 32, 32) # tensorflow处理图像数据的结构:BWHC # 把通道数据C移动到最后一个维度 images = images.transpose (0,2,3,1) labels = np.array(labels) return images, labels |
1.3 定义加载数据函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
def load_CIFAR_data(data_dir): """load CIFAR data""" images_train=[] labels_train=[] for i in range(5): f=os.path.join(data_dir,'data_batch_%d' % (i+1)) print('loading ',f) # 调用 load_CIFAR_batch( )获得批量的图像及其对应的标签 image_batch,label_batch=load_CIFAR_batch(f) images_train.append(image_batch) labels_train.append(label_batch) Xtrain=np.concatenate(images_train) Ytrain=np.concatenate(labels_train) del image_batch ,label_batch Xtest,Ytest=load_CIFAR_batch(os.path.join(data_dir,'test_batch')) print('finished loadding CIFAR-10 data') # 返回训练集的图像和标签,测试集的图像和标签 return (Xtrain,Ytrain),(Xtest,Ytest) |
1.4 加载数据
1 2 |
data_dir = r'data\cifar-10-batches-py' (x_train,y_train),(x_test,y_test) = load_CIFAR_data(data_dir) |
运行结果
loading data\cifar-10-batches-py\data_batch_1
loading data\cifar-10-batches-py\data_batch_2
loading data\cifar-10-batches-py\data_batch_3
loading data\cifar-10-batches-py\data_batch_4
loading data\cifar-10-batches-py\data_batch_5
finished loadding CIFAR-10 data
1.5 可视化加载数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
label_dict = {0:"airplane", 1:" automobile", 2:"bird", 3:"cat", 4:"deer", 5:"dog", 6:"frog", 7:"horse", 8:"ship", 9:"truck"} def plot_images_labels(images, labels, num): total = len(images) fig = plt.gcf() fig.set_size_inches(15, math.ceil(num / 10) * 7) for i in range(0, num): choose_n = np.random.randint(0, total) ax = plt.subplot(math.ceil(num / 5), 5, 1 + i) ax.imshow(images[choose_n], cmap='binary') title = label_dict[labels[choose_n]] ax.set_title(title, fontsize=10) plt.show() plot_images_labels(x_train, y_train, 10) |
运行结果
2 .数据预处理并设置超参数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
x_train = x_train.astype('float32') / 255.0 x_test = x_test.astype('float32') / 255.0 train_num = len(x_train) num_classes = 10 learning_rate = 0.0002 batch_size = 64 training_steps = 20000 display_step = 1000 conv1_filters = 32 conv2_filters = 64 fc1_units = 256 |
3.使用tf,data构建数据管道
1 2 3 |
AUTOTUNE = tf.data.experimental.AUTOTUNE train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_data = train_data.shuffle(5000).repeat(training_steps).batch(batch_size).prefetch(buffer_size=AUTOTUNE) |
4.定义卷积层及池化层
1 2 3 4 5 6 7 8 9 10 |
def conv2d(x, W, b, strides=1): #tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None) x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=2): #tf.nn.max_pool(value, ksize, strides, padding, name=None) return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') |
5.定义并初始化权重参数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
random_normal = tf.initializers.RandomNormal() weights = { 'wc1': tf.Variable(random_normal([3, 3, 3, conv1_filters])),#[k_width, k_height, input_chn, output_chn] 'wc2': tf.Variable(random_normal([3, 3, conv1_filters, conv2_filters])),#输入通道:32,输出通道:64,卷积后图像尺寸不变,依然是16x16 'wd1': tf.Variable(random_normal([4096, fc1_units])),#将池第2个池化层的64个8x8的图像转换为一维的向量,长度是 64*8*8=4096 'out': tf.Variable(random_normal([fc1_units, num_classes])) } biases = { 'bc1': tf.Variable(tf.zeros([conv1_filters])), 'bc2': tf.Variable(tf.zeros([conv2_filters])), 'bd1': tf.Variable(tf.zeros([fc1_units])), 'out': tf.Variable(tf.zeros([num_classes])) } |
6.构建模型
1 2 3 4 5 6 7 8 9 10 11 12 |
def conv_net(x): x = tf.reshape(x, [-1, 32, 32, 3])## 32x32图像,通道为3(RGB) conv1 = conv2d(x, weights['wc1'], biases['bc1']) pool1 = maxpool2d(conv1, k=2) #将32x32图像缩小为16x16,池化不改变通道数量,因此依然是32个 conv2 = conv2d(pool1, weights['wc2'], biases['bc2']) pool2 = maxpool2d(conv2, k=2) flat = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(flat, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) return tf.nn.softmax(out) |
7.定义损失函数、评估函数
1 2 3 4 5 6 7 8 9 10 11 12 13 |
def cross_entropy(y_pred, y_true): y_pred = tf.clip_by_value(y_pred, 1e-9, 1.) loss_ = tf.keras.losses.sparse_categorical_crossentropy(y_true=y_true, y_pred=y_pred) return tf.reduce_mean(loss_) def accuracy(y_pred, y_true): correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.reshape(tf.cast(y_true, tf.int64), [-1])) return tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) optimizer = tf.optimizers.Adam(learning_rate) |
8.定义梯度计算函数
自定义梯度计算过程:
(1)打开一个 GradientTape() 作用域
(2)在此作用域内,调用模型(正向传播)并计算损失
(3)在作用域之外,检索模型权重相对于损失的梯度
(4)根据梯度使用优化器来更新模型的权重
(5)利用优化器进行反向传播(更新梯度)
1 |
[{"metadata":{"trusted":false},"cell_type":"code","source":"def run_optimization(x, y):\n with tf.GradientTape() as g:\n pred = conv_net(x)\n loss = cross_entropy(pred, y)\n \n trainable_variables = list(weights.values()) + list(biases.values())\n gradients = g.gradient(loss, trainable_variables)\n optimizer.apply_gradients(zip(gradients, trainable_variables))\n","execution_count":13,"outputs":[]}] |
9.训练模型
1 2 3 4 5 6 7 8 9 10 11 12 13 |
train_loss_list = [] train_acc_list = [] for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): run_optimization(batch_x, batch_y) if step % display_step == 0: pred = conv_net(batch_x) loss = cross_entropy(pred, batch_y) acc = accuracy(pred, batch_y) train_loss_list.append(loss) train_acc_list.append(acc) print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc)) |
10.可视化运行结果
1 2 3 4 5 6 7 |
plt.title('the train and validate') plt.xlabel('Times') plt.ylabel('Loss value') plt.plot(train_loss_list, color=(1, 0, 0), label='Loss train') plt.plot(train_acc_list, color=(0, 0, 1), label='Accuracy train') plt.legend(loc='best') plt.show() |
运行结果
11.测试模型
1 2 3 4 5 6 7 8 9 10 |
test_total_batch = int(len(x_test) / batch_size) test_acc_sum = 0.0 for i in range(test_total_batch): test_image_batch = x_test[i*batch_size:(i+1)*batch_size] test_label_batch = y_test[i*batch_size:(i+1)*batch_size] pred = conv_net(test_image_batch) test_batch_acc = accuracy(pred,test_label_batch) test_acc_sum += test_batch_acc test_acc = float(test_acc_sum / test_total_batch) print("Test accuracy:{:.6f}".format(test_acc)) |
运行结果
Test accuracy:0.704327