uni-3 log

    Search by

    keras(tensorflow)でSegNet

    SegNetは、ケンブリッジ大学が開発した画素単位でのラベリング機能を実現する、A Deep Convolutional Encoder-Decoder Architectureのこと

    keras2系+tensorflowで実装してみた

    とりあえず動かしたソースコードを貼っていく 解説はいずれやりたい・・・

    環境

    • ubuntu: 17.04
    • python: 3.6
    • keras: 2.0.8
    • tensorflow: 1.3.0

    データの読み込み

    画像のセグメンテーション用に公開しているデータを使うalexgkendall/SegNet-Tutorialリポジトリの、./CamVid内のデータを使う

    クラスは12あり、順番に

    [Sky, Building, Pole, Road_marking, Road, Pavement, Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled]

    の情報がラベル付けされている

    dataset.py
    import cv2
    import numpy as np
    
    from keras.applications import imagenet_utils
    
    import os
    
    DataPath = './CamVid/'
    data_shape = 360*480
    
    class Dataset:
        def __init__(self, classes=12, train_file='train.txt', test_file='test.txt'):
            self.train_file = train_file
            self.test_file = test_file
            self.data_shape = 360*480
            self.classes = classes
    
        def normalized(self, rgb):
            #return rgb/255.0
            norm=np.zeros((rgb.shape[0], rgb.shape[1], 3),np.float32)
    
            b=rgb[:,:,0]
            g=rgb[:,:,1]
            r=rgb[:,:,2]
    
            norm[:,:,0]=cv2.equalizeHist(b)
            norm[:,:,1]=cv2.equalizeHist(g)
            norm[:,:,2]=cv2.equalizeHist(r)
    
            return norm
    
        def one_hot_it(self, labels):
            x = np.zeros([360,480,12])
            for i in range(360):
                for j in range(480):
                    x[i,j,labels[i][j]] = 1
            return x
    
        def load_data(self, mode='train'):
            data = []
            label = []
            if (mode == 'train'):
                filename = self.train_file
            else:
                filename = self.test_file
    
            with open(DataPath + filename) as f:
                txt = f.readlines()
                txt = [line.split(' ') for line in txt]
    
            for i in range(len(txt)):
                data.append(self.normalized(cv2.imread(os.getcwd() + txt[i][0][7:])))
                label.append(self.one_hot_it(cv2.imread(os.getcwd() + txt[i][1][7:][:-1])[:,:,0]))
                print('.',end='')
            #print("train data file", os.getcwd() + txt[i][0][7:])
            #print("label data raw", cv2.imread(os.getcwd() + '/CamVid/trainannot/0001TP_006690.png'))
            return np.array(data), np.array(label)
    
    
        def preprocess_inputs(self, X):
        ### @ https://github.com/fchollet/keras/blob/master/keras/applications/imagenet_utils.py
            """Preprocesses a tensor encoding a batch of images.
            # Arguments
                x: input Numpy tensor, 4D.
                data_format: data format of the image tensor.
                mode: One of "caffe", "tf".
                    - caffe: will convert the images from RGB to BGR,
                        then will zero-center each color channel with
                        respect to the ImageNet dataset,
                        without scaling.
                    - tf: will scale pixels between -1 and 1,
                        sample-wise.
            # Returns
                Preprocessed tensor.
            """
            return imagenet_utils.preprocess_input(X)
    
        def reshape_labels(self, y):
            return np.reshape(y, (len(y), self.data_shape, self.classes))

    モデル

    model.py
    from keras.layers import Input
    from keras.layers.core import Activation, Flatten, Reshape
    from keras.layers.convolutional import Convolution2D, Conv2D, MaxPooling2D, UpSampling2D
    from keras.layers.normalization import BatchNormalization
    from keras.models import Model
    from keras.utils import np_utils
    
    def SegNet(input_shape=(360, 480, 3), classes=12):
        ### @ https://github.com/alexgkendall/SegNet-Tutorial/blob/master/Example_Models/bayesian_segnet_camvid.prototxt
        img_input = Input(shape=input_shape)
        x = img_input
        # Encoder
        x = Conv2D(64, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
    
        x = Conv2D(128, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
    
        x = Conv2D(256, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
    
        x = Conv2D(512, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    
        # Decoder
        x = Conv2D(512, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    
        x = UpSampling2D(size=(2, 2))(x)
        x = Conv2D(256, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    
        x = UpSampling2D(size=(2, 2))(x)
        x = Conv2D(128, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    
        x = UpSampling2D(size=(2, 2))(x)
        x = Conv2D(64, (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
    
        x = Conv2D(classes, (1, 1), padding="valid")(x)
        x = Reshape((input_shape[0] * input_shape[1], classes))(x)
        x = Activation("softmax")(x)
        model = Model(img_input, x)
        return model

    学習

    モデルデータの保存+tensorboardに学習過程を書き出す

    train.py
    import os
    import glob
    import numpy as np
    import keras
    
    from model import SegNet
    
    import dataset
    
    input_shape = (360, 480, 3)
    classes = 12
    epochs = 100
    batch_size = 1
    log_filepath='./logs/'
    
    data_shape = 360*480
    
    class_weighting = [0.2595, 0.1826, 4.5640, 0.1417, 0.5051, 0.3826, 9.6446, 1.8418, 6.6823, 6.2478, 3.0, 7.3614]
    
    ## set gpu usage
    import tensorflow as tf
    config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction = 0.8))
    session = tf.Session(config=config)
    keras.backend.tensorflow_backend.set_session(session)
    
    def main():
        print("loading data...")
        ds = dataset.Dataset(classes=classes)
        train_X, train_y = ds.load_data('train') # need to implement, y shape is (None, 360, 480, classes)
    
        train_X = ds.preprocess_inputs(train_X)
        train_Y = ds.reshape_labels(train_y)
        print("input data shape...", train_X.shape)
        print("input label shape...", train_Y.shape)
    
        test_X, test_y = ds.load_data('test') # need to implement, y shape is (None, 360, 480, classes)
        test_X = ds.preprocess_inputs(test_X)
        test_Y = ds.reshape_labels(test_y)
    
        tb_cb = keras.callbacks.TensorBoard(log_dir=log_filepath, histogram_freq=1, write_graph=True, write_images=True)
        print("creating model...")
        model = SegNet(input_shape=input_shape, classes=classes)
        model.compile(loss="categorical_crossentropy", optimizer='adadelta', metrics=["accuracy"])
    
        model.fit(train_X, train_Y, batch_size=batch_size, epochs=epochs,
                  verbose=1, class_weight=class_weighting , validation_data=(test_X, test_Y), shuffle=True
                  , callbacks=[tb_cb])
    
        model.save('seg.h5')
    
    if __name__ == '__main__':
        main()

    実行

    python train.py
    ...
    367/367 [==============================] - 139s - loss: 0.0492 - acc: 0.9806 - val_loss: 1.5358 - val_acc: 0.7349

    GeForce GTX 1050で行ったが、学習は100 epochで数時間かかった

    学習結果

    tensorboard --logdir logs
    TensorBoard 0.1.5 at http://localhost:6006 (Press CTRL+C to quit) 

    tensorboard

    収束してなさそう?

    予測結果

    保存したモデルを読み込んで読み込んだ画像データを画素単位でラベリングし、セグメントされた領域を色付けして、画像を書き出す

    predict.py
    import numpy as np
    import keras
    from PIL import Image
    
    from model import SegNet
    
    import dataset
    
    height = 360
    width = 480
    classes = 12
    epochs = 100
    batch_size = 1
    log_filepath='./logs_100/'
    
    data_shape = 360*480
    
    def writeImage(image, filename):
        """ label data to colored image """
        Sky = [128,128,128]
        Building = [128,0,0]
        Pole = [192,192,128]
        Road_marking = [255,69,0]
        Road = [128,64,128]
        Pavement = [60,40,222]
        Tree = [128,128,0]
        SignSymbol = [192,128,128]
        Fence = [64,64,128]
        Car = [64,0,128]
        Pedestrian = [64,64,0]
        Bicyclist = [0,128,192]
        Unlabelled = [0,0,0]
        r = image.copy()
        g = image.copy()
        b = image.copy()
        label_colours = np.array([Sky, Building, Pole, Road_marking, Road, Pavement, Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])
        for l in range(0,12):
            r[image==l] = label_colours[l,0]
            g[image==l] = label_colours[l,1]
            b[image==l] = label_colours[l,2]
        rgb = np.zeros((image.shape[0], image.shape[1], 3))
        rgb[:,:,0] = r/1.0
        rgb[:,:,1] = g/1.0
        rgb[:,:,2] = b/1.0
        im = Image.fromarray(np.uint8(rgb))
        im.save(filename)
    
    def predict(test):
        model = keras.models.load_model('seg_100.h5')
        probs = model.predict(test, batch_size=1)
    
        prob = probs[0].reshape((height, width, classes)).argmax(axis=2)
        return prob
    
    def main():
        print("loading data...")
        ds = dataset.Dataset(test_file='val.txt', classes=classes)
        test_X, test_y = ds.load_data('test') # need to implement, y shape is (None, 360, 480, classes)
        test_X = ds.preprocess_inputs(test_X)
        test_Y = ds.reshape_labels(test_y)
    
        prob = predict(test_X)
        writeImage(prob, 'val.png')
    
    if __name__ == '__main__':
        main()

    input

    input

    output

    output

    おおむねセグメントされてる

    参考

    2021, Built with Gatsby. This site uses Google Analytics.