- STL-10数据集选择
- 数据集简介
- 数据集处理
- CNN网络设计
- 对VggNet进行修改
- 对ResNet进行修改
- 进行训练
- 用修改后的VggNet进行训练
- 训练效果
- 用修改后的ResNet进行训练
- 训练效果
该数据集包括10个类,分别为airplane,bird, car, cat, deer, dog, horse, monkey, ship, truck,训练集中每个类有500张训练图片,测试集中每个类有800张测试图片。
STL-10数据集官方链接: STL-10
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | from __future__ import print_function import sys import os, sys, tarfile, errno import numpy as np import matplotlib.pyplot as plt if sys.version_info >= (3, 0, 0): import urllib.request as urllib # ugly but works else: import urllib try: from imageio import imsave except: from scipy.misc import imsave print(sys.version_info) # image shape HEIGHT = 96 WIDTH = 96 DEPTH = 3 # size of a single image in bytes SIZE = HEIGHT * WIDTH * DEPTH # path to the directory with the data DATA_DIR = './stl10_binary' # url of the binary data DATA_URL = '' # path to the binary train file with image data DATA_PATH = './stl10_binary/train_X.bin' # path to the binary train file with labels LABEL_PATH = './stl10_binary/train_y.bin' def read_labels(path_to_labels): """ :param path_to_labels: path to the binary file containing labels from the STL-10 dataset :return: an array containing the labels """ with open(path_to_labels, 'rb') as f: labels = np.fromfile(f, dtype=np.uint8) return labels def read_all_images(path_to_data): """ :param path_to_data: the file containing the binary images from the STL-10 dataset :return: an array containing all the images """ with open(path_to_data, 'rb') as f: # read whole file in uint8 chunks everything = np.fromfile(f, dtype=np.uint8) # We force the data into 3x96x96 chunks, since the # images are stored in "column-major order", meaning # that "the first 96*96 values are the red channel, # the next 96*96 are green, and the last are blue." # The -1 is since the size of the pictures depends # on the input file, and this way numpy determines # the size on its own. images = np.reshape(everything, (-1, 3, 96, 96)) # Now transpose the images into a standard image format # readable by, for example, matplotlib.imshow # You might want to comment this line or reverse the shuffle # if you will use a learning algorithm like CNN, since they like # their channels separated. images = np.transpose(images, (0, 3, 2, 1)) return images def read_single_image(image_file): """ CAREFUL! - this method uses a file as input instead of the path - so the position of the reader will be remembered outside of context of this method. :param image_file: the open file containing the images :return: a single image """ # read a single image, count determines the number of uint8's to read image = np.fromfile(image_file, dtype=np.uint8, count=SIZE) # force into image matrix image = np.reshape(image, (3, 96, 96)) # transpose to standard format # You might want to comment this line or reverse the shuffle # if you will use a learning algorithm like CNN, since they like # their channels separated. image = np.transpose(image, (2, 1, 0)) return image def plot_image(image): """ :param image: the image to be plotted in a 3-D matrix format :return: None """ plt.imshow(image) def save_image(image, name): imsave("%s.png" % name, image, format="png") # def download_and_extract(): # """ # Download and extract the STL-10 dataset # :return: None # """ # dest_directory = DATA_DIR # if not os.path.exists(dest_directory): # os.makedirs(dest_directory) # filename = DATA_URL.split('/')[-1] # filepath = os.path.join(dest_directory, filename) # if not os.path.exists(filepath): # def _progress(count, block_size, total_size): # sys.stdout.write('\rDownloading %s %.2f%%' % (filename, # float(count * block_size) / float(total_size) * 100.0)) # sys.stdout.flush() # # filepath, _ = urllib.urlretrieve(DATA_URL, filepath, reporthook=_progress) # print('Downloaded', filename) #, 'r:gz').extractall(dest_directory) def save_images(images, labels): print("Saving images to disk") i = 0 for image in images: label = labels[i] directory = './img/' + str(label) + '/' try: os.makedirs(directory, exist_ok=True) except OSError as exc: if exc.errno == errno.EEXIST: pass filename = directory + str(i) print(filename) save_image(image, filename) i = i + 1 if __name__ == "__main__": # download data if needed # download_and_extract() # test to check if the image is read correctly with open(DATA_PATH) as f: image = read_single_image(f) plot_image(image) # test to check if the whole dataset is read correctly images = read_all_images(DATA_PATH) print(images.shape) labels = read_labels(LABEL_PATH) print(labels.shape) # save images to disk save_images(images, labels) |
1 2 3 4 5 | # path to the binary train file with image data DATA_PATH = './stl10_binary/test_X.bin' # path to the binary train file with labels LABEL_PATH = './stl10_binary/test_y.bin' |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | function [layers, lgraph] = get_vggnet() layers = [ imageInputLayer([96 96 3],'Name','imageinput','DataAugmentation','randfliplr') convolution2dLayer([5 5],64,'Name','conv_1',"Padding","same") batchNormalizationLayer('Name','bn_1') reluLayer('Name','relu_1') maxPooling2dLayer([2 2],'Name','maxpool_1','Padding','same','Stride',[2 2]) convolution2dLayer([5 5],128,'Name','conv_2','Padding','same') batchNormalizationLayer('Name','bn_2') reluLayer('Name','relu_2') maxPooling2dLayer([2 2],'Name','maxpool_2','Padding','same','Stride',[2 2]) convolution2dLayer([5 5],128,'Name','conv_3','Padding','same') batchNormalizationLayer('Name','bn_3') reluLayer('Name','relu_3') dropoutLayer(0.4,'Name','dp_1') maxPooling2dLayer([2 2],'Name','maxpool_3','Padding','same','Stride',[2 2]) convolution2dLayer([5 5],256,'Name','conv_4','Padding','same') batchNormalizationLayer('Name','bn_4') reluLayer('Name','relu_4') dropoutLayer(0.4,'Name','dp_2') maxPooling2dLayer([2 2],'Name','maxpool_4','Padding','same','Stride',[2 2]) convolution2dLayer([5 5],256,'Name','conv_5','Padding','same') batchNormalizationLayer('Name','bn_5') reluLayer('Name','relu_5') dropoutLayer(0.4,'Name','dp_3') maxPooling2dLayer([2 2],'Name','maxpool_5','Padding','same','Stride',[2 2]) dropoutLayer(0.5,'Name','dp_4') fullyConnectedLayer(512,'Name','fc_1') reluLayer('Name','relu_6') fullyConnectedLayer(512,'Name','fc_2') reluLayer('Name','relu_7') dropoutLayer(0.5,'Name','dp_5') fullyConnectedLayer(10,'Name','fc_3') softmaxLayer('Name','softmax') classificationLayer('Name','classoutput')]; lgraph = layerGraph(layers); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | function [layers, lgraph] = get_resnet() netWidth = 16; layers = [ imageInputLayer([96 96 3],'Name','input','DataAugmentation','randfliplr') convolution2dLayer(3,netWidth,'Padding','same','Name','convInp') batchNormalizationLayer('Name','bn_res') reluLayer('Name','relu_sp') convolutionalUnit(netWidth,1,'conv_sa1') additionLayer(2,'Name','add_11') reluLayer('Name','relu_11') convolutionalUnit(netWidth,1,'conv_sa2') additionLayer(2,'Name','add_12') reluLayer('Name','relu_12') dropoutLayer(0.4,'Name','dp_1') convolutionalUnit(2*netWidth,2,'conv_sc1') additionLayer(2,'Name','add_21') reluLayer('Name','relu_21') convolutionalUnit(2*netWidth,1,'conv_sc2') additionLayer(2,'Name','add_22') reluLayer('Name','relu_22') dropoutLayer(0.4,'Name','dp_2') convolutionalUnit(4*netWidth,2,'conv_se1') additionLayer(2,'Name','add_31') reluLayer('Name','relu_31') convolutionalUnit(4*netWidth,1,'conv_se2') additionLayer(2,'Name','add_32') reluLayer('Name','relu_32') dropoutLayer(0.4,'Name','dp_3') averagePooling2dLayer(8,'Name','globalPool') dropoutLayer(0.5,'Name','dp_4') fullyConnectedLayer(10,'Name','fcFinal') softmaxLayer('Name','softmax') classificationLayer('Name','classoutput') ]; lgraph = layerGraph(layers); lgraph = connectLayers(lgraph,'relu_sp','add_11/in2'); lgraph = connectLayers(lgraph,'relu_11','add_12/in2'); skip1 = [ convolution2dLayer(1,2*netWidth,'Stride',2,'Name','skipConv1') batchNormalizationLayer('Name','skipBN1')]; lgraph = addLayers(lgraph,skip1); lgraph = connectLayers(lgraph,'relu_12','skipConv1'); lgraph = connectLayers(lgraph,'skipBN1','add_21/in2'); lgraph = connectLayers(lgraph,'relu_21','add_22/in2'); skip2 = [ convolution2dLayer(1,4*netWidth,'Stride',2,'Name','skipConv2') batchNormalizationLayer('Name','skipBN2')]; lgraph = addLayers(lgraph,skip2); lgraph = connectLayers(lgraph,'relu_22','skipConv2'); lgraph = connectLayers(lgraph,'skipBN2','add_31/in2'); lgraph = connectLayers(lgraph,'relu_31','add_32/in2'); layers = lgraph.Layers; function layers = convolutionalUnit(numF,stride,tag) layers = [ convolution2dLayer(3,numF,'Padding','same','Stride',stride,'Name',[tag,'conv1']) batchNormalizationLayer('Name',[tag,'BN1']) reluLayer('Name',[tag,'relu1']) convolution2dLayer(3,numF,'Padding','same','Name',[tag,'conv2']) batchNormalizationLayer('Name',[tag,'BN2'])]; |
1 2 3 4 5 6 7 8 9 10 | options_train = trainingOptions('sgdm',... 'MaxEpochs',MaxEpochs,... 'InitialLearnRate',0.01,... 'L2Regularization', 0.01, ... 'Verbose',true,'MiniBatchSize', 128,... 'Shuffle','every-epoch',... 'Plots','training-progress',... 'ValidationData',handles.augimdsValidation , ... 'ValidationFrequency',10, ... 'ExecutionEnvironment', ExecutionEnvironment); |
参数 | 值 |
训练步数 | 100 |
学习率 | 0.01 |
批次数 | 128 |
L2正则化惩罚参数 | 0.01 |
1 2 3 4 5 6 7 8 9 10 | options_train = trainingOptions('sgdm',... 'MaxEpochs',MaxEpochs,... 'InitialLearnRate',0.001,... 'L2Regularization', 0.01, ... 'Verbose',true,'MiniBatchSize', 128,... 'Shuffle','every-epoch',... 'Plots','training-progress',... 'ValidationData',handles.augimdsValidation , ... 'ValidationFrequency',10, ... 'ExecutionEnvironment', ExecutionEnvironment); |
参数 | 值 |
训练步数 | 100 |
学习率 | 0.001 |
批次数 | 128 |
L2正则化惩罚参数 | 0.01 |