Keras/Tensorflow : CIFAR-10のVGG-likeなアーキテクチャを作った.
1. 動作環境
OS: Ubuntu 16.04Package Version ------------------- ------- python 3.5.0 tensorboard 1.9.0 tensorflow 1.9.0 h5py 2.8.0 Keras 2.2.2 Keras-Applications 1.0.4 Keras-Preprocessing 1.0.2
2. プログラム
import os import numpy as np import keras.backend.tensorflow_backend as KTF import tensorflow as tf from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.normalization import BatchNormalization from keras.layers import Conv2D, MaxPooling2D from keras.optimizers import SGD, Adam from keras.preprocessing.image import ImageDataGenerator from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, CSVLogger from datetime import datetime # Set Meta Parameters & Information img_row = 32.0 img_col = 32.0 img_ch = 255.0 batch_size = 128 nb_classes = 10 nb_epoch = 200 nb_data = 32*32 log_dir = '../train_log/vgg-like_log' dataset_dir = '../../CIFAR-10/datasets' model_name = 'vgg-like__' + datetime.now().strftime("%Y-%m-%d-%H-%M-%S") model_cp_path = os.path.join(log_dir, (model_name + '_checkpoint.h5')) model_csv_path = os.path.join(log_dir, (model_name + '_csv.csv')) # Load CIFAR-10 Dataset argmax_ch = 255.0 (X_train, y_train), (X_test, y_test) = cifar10.load_data() # convert pixel value range 0.0-255.0 to 0.0-1.0 X_train = X_train.astype('float32') / img_ch X_test = X_test.astype('float32') / img_ch # convert class label (0-9) to one-hot encoding format y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) # save datasets as "np.ndarray" format files np.save('X_train', X_train) np.save('y_train', y_train) np.save('X_test' , X_test) np.save('y_test' , y_test) # Data Augumatation datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True vertical_flip=False) datagen.fit(X_train) old_session = KTF.get_session() with tf.Graph().as_default(): session = tf.Session('') KTF.set_session(session) KTF.set_learning_phase(1) # build model model = Sequential() with tf.name_scope('inference') as scope: model.add(Conv2D(64, (3, 3), padding='same', input_shape=X_train.shape[1:])) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(128, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(128, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(256, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(256, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(256, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(256, (3, 3), padding='same')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.summary() # set callbacks cp_cb = ModelCheckpoint(model_cp_path, monitor='val_loss', save_best_only=True) tb_cb = TensorBoard(log_dir=log_dir, histogram_freq=1) csv_cb = CSVLogger(model_csv_path) callbacks = [cp_cb, tb_cb, csv_cb] # compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit_generator( datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=len(X_train) / batch_size, epochs=nb_epoch, verbose=1, callbacks=callbacks, validation_data=(X_test, y_test)) # validation score = model.evaluate(X_test, y_test, verbose=0) print('val score : ', score[0]) print('val accuracy : ', score[1]) # save model "INSTANCE" f1_name = model_name + '_instance' f1_path = os.path.join(log_dir, f1_name) + '.h5' model.save(f1_path) # save model "WEIGHTs" f2_name = model_name + '_weights' f2_path = os.path.join(log_dir, f2_name) + '.h5' model.save_weights(f2_path) # save model "ARCHITECHTURE" f3_name = model_name + '_architechture' f3_path = os.path.join(log_dir, f3_name) + '.json' json_string = model.to_json() with open(f3_path, 'w') as f: f.write(json_string) # end of session KTF.set_session(old_session)
3.アーキテクチャ
Kerasのkeras.models.Model()クラスもつ属性(attribute)である"summary()"を使う.プログラム上のmodel.summary()で、標準出力にモデルの構造(architechture)の要約情報が表示される.
Modelクラス (functional API) - Keras Documentation
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_1 (Conv2D) (None, 32, 32, 64) 1792 _________________________________________________________________ batch_normalization_1 (Batch (None, 32, 32, 64) 256 _________________________________________________________________ activation_1 (Activation) (None, 32, 32, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 32, 32, 64) 36928 _________________________________________________________________ batch_normalization_2 (Batch (None, 32, 32, 64) 256 _________________________________________________________________ activation_2 (Activation) (None, 32, 32, 64) 0 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 16, 16, 64) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 16, 16, 64) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 16, 16, 128) 73856 _________________________________________________________________ batch_normalization_3 (Batch (None, 16, 16, 128) 512 _________________________________________________________________ activation_3 (Activation) (None, 16, 16, 128) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 16, 16, 128) 147584 _________________________________________________________________ batch_normalization_4 (Batch (None, 16, 16, 128) 512 _________________________________________________________________ activation_4 (Activation) (None, 16, 16, 128) 0 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 8, 8, 128) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 8, 8, 128) 0 _________________________________________________________________ conv2d_5 (Conv2D) (None, 8, 8, 256) 295168 _________________________________________________________________ batch_normalization_5 (Batch (None, 8, 8, 256) 1024 _________________________________________________________________ activation_5 (Activation) (None, 8, 8, 256) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ batch_normalization_6 (Batch (None, 8, 8, 256) 1024 _________________________________________________________________ activation_6 (Activation) (None, 8, 8, 256) 0 _________________________________________________________________ conv2d_7 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ batch_normalization_7 (Batch (None, 8, 8, 256) 1024 _________________________________________________________________ activation_7 (Activation) (None, 8, 8, 256) 0 _________________________________________________________________ conv2d_8 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ batch_normalization_8 (Batch (None, 8, 8, 256) 1024 _________________________________________________________________ activation_8 (Activation) (None, 8, 8, 256) 0 _________________________________________________________________ max_pooling2d_3 (MaxPooling2 (None, 4, 4, 256) 0 _________________________________________________________________ dropout_3 (Dropout) (None, 4, 4, 256) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 4096) 0 _________________________________________________________________ dense_1 (Dense) (None, 1024) 4195328 _________________________________________________________________ activation_9 (Activation) (None, 1024) 0 _________________________________________________________________ dropout_4 (Dropout) (None, 1024) 0 _________________________________________________________________ dense_2 (Dense) (None, 1024) 1049600 _________________________________________________________________ activation_10 (Activation) (None, 1024) 0 _________________________________________________________________ dropout_5 (Dropout) (None, 1024) 0 _________________________________________________________________ dense_3 (Dense) (None, 10) 10250 _________________________________________________________________ activation_11 (Activation) (None, 10) 0 ================================================================= Total params: 7,586,378 Trainable params: 7,583,562 Non-trainable params: 2,816 _________________________________________________________________
4.学習結果
validation accuracy は 90% くらい