RでMXNet(4)

Kaggleの「Dogs vs. Cats」をやってみる。
こちらを参考にさせて頂いた。

  • train.zip(25000枚のjpeg画像)をダウンロードして解凍。
  • 猫、犬の画像をそれぞれcatフォルダ、dogフォルダに分ける。

データの準備

library(EBImage)

#jpeg画像を配列に変換
x_train <- array(0, c(56, 56, 3, 25000))

path <- "D:/rworks/dogcat/train/cat/"
setwd(path)
files_cat <- list.files()

for (i in 1:12500) {
    img <- readImage(files_cat[i])
    resize <- resize(img, w = 56, h = 56)
    x_train[,,,i] <- resize@.Data
}

path <- "D:/rworks/dogcat/train/dog/"
setwd(path)
files_dog <- list.files()

for (i in 1:12500) {
    img <- readImage(files_dog[i])
    resize <- resize(img, w = 56, h = 56)
    x_train[,,, 12500 + i] <- resize@.Data
}

#答え(Cat:0,Dog:1)
t_train <- c(rep(0, 12500), rep(1, 12500))

#25000のうち22500を訓練データにして残りをテストデータにする
sampling <- sample(25000, 22500)
x_train_mini <- x_train[,,, sampling]
t_train_mini <- t_train[sampling]
x_test_mini <- x_train[,,, - sampling]
t_test_mini <- t_train[-sampling]

ニューラルネットワークの構築

library(mxnet)

mydata <- mx.symbol.Variable("data")

conv1 <- mx.symbol.Convolution(
    data = mydata,
    kernel = c(3, 3),
    num_filter = 32)

act1 <- mx.symbol.Activation(data = conv1, act_type = "relu")

pool1 <- mx.symbol.Pooling(
    data = act1,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

conv2 <- mx.symbol.Convolution(
    data = pool1,
    kernel = c(3, 3),
    num_filter = 64)

act2 <- mx.symbol.Activation(data = conv2, act_type = "relu")

pool2 <- mx.symbol.Pooling(
    data = act2,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

fc1 <- mx.symbol.FullyConnected(data = pool2, num_hidden = 200)

drop1 <- mx.symbol.Dropout(data=fc1)

act3 <- mx.symbol.Activation(data = drop1, act_type = "relu")

fc2 <- mx.symbol.FullyConnected(data = act3, num_hidden = 2)

drop2 <- mx.symbol.Dropout(data = fc2)

softmax <- mx.symbol.SoftmaxOutput(data = drop2)

mx.set.seed(1)

model <- mx.model.FeedForward.create(
    softmax, X = x_train_mini,
    y = t_train_mini,
    ctx = mx.cpu(),
    num.round = 30,
    array.batch.size = 100,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    initializer = mx.init.Xavier(factor_type = "in", rnd_type = "gaussian", magnitude = 2),
    eval.data = list(data = x_test_mini, label = t_test_mini),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.557276785714286
[1] Validation-accuracy=0.6684
[2] Train-accuracy=0.627555555555556
[2] Validation-accuracy=0.6976
[3] Train-accuracy=0.663066666666667
[3] Validation-accuracy=0.7332

・・・

[28] Train-accuracy=0.815111111111111
[28] Validation-accuracy=0.7948
[29] Train-accuracy=0.820444444444445
[29] Validation-accuracy=0.7892
[30] Train-accuracy=0.817377777777778
[30] Validation-accuracy=0.796

テストデータのaccuracy:79.6%

> pred <- predict(model, x_train_mini)
> pred_train <- apply(pred, 2, which.max) - 1
> sum(pred_train == t_train_mini) / 22500
[1] 0.9488

訓練データのaccuracy:94.9%

訓練後のデータを保存する場合にはこちらを参照

mx.model.save.RData <- function(model, filename) {
    if (!inherits(model, "MXFeedForwardModel")) stop("Not a MXNet model!")
    model_rdata <- list()
    model_rdata[['symbol_json']] <- model$symbol$as.json()
    model_rdata[['arg.params']] <- lapply(model$arg.params, as.array)
    model_rdata[['aux.params']] <- lapply(model$aux.params, as.array)
    saveRDS(model_rdata, filename)
}

mx.model.load.RData <- function(filename) {
    model_rdata <- readRDS(filename)
    symbol <- mx.symbol.load.json(model_rdata$symbol_json)
    arg.params <- lapply(model_rdata$arg.params, mx.nd.array)
    aux.params <- lapply(model_rdata$aux.params, mx.nd.array)
    model <- list(symbol = symbol, arg.params = arg.params, aux.params = aux.params)
    return(structure(model, class = "MXFeedForwardModel"))
}

(2017年6月27日追記)こちらのほうがおすすめ。

フィルター数32の1番目の畳み込み層をフィルタ数16の二つの畳み込み層に分けてみる

  • わずかに改善
library(mxnet)

mydata <- mx.symbol.Variable("data")

conv1 <- mx.symbol.Convolution(
    data = mydata,
    kernel = c(3, 3),
    num_filter = 16)

conv1_5 <- mx.symbol.Convolution(
    data = conv1,
    kernel = c(3, 3),
    num_filter = 16)

act1 <- mx.symbol.Activation(data = conv1_5, act_type = "relu")

pool1 <- mx.symbol.Pooling(
    data = act1,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

conv2 <- mx.symbol.Convolution(
    data = pool1,
    kernel = c(3, 3),
    num_filter = 64)

act2 <- mx.symbol.Activation(data = conv2, act_type = "relu")

pool2 <- mx.symbol.Pooling(
    data = act2,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

fc1 <- mx.symbol.FullyConnected(data = pool2, num_hidden = 200)

drop1 <- mx.symbol.Dropout(data = fc1)

act3 <- mx.symbol.Activation(data = drop1, act_type = "relu")

fc2 <- mx.symbol.FullyConnected(data = act3, num_hidden = 2)

drop2 <- mx.symbol.Dropout(data = fc2)

softmax <- mx.symbol.SoftmaxOutput(data = drop2)

mx.set.seed(1)

model <- mx.model.FeedForward.create(
    softmax, X = x_train_mini,
    y = t_train_mini,
    ctx = mx.cpu(),
    num.round = 30,
    array.batch.size = 100,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    initializer = mx.init.Xavier(factor_type = "in", rnd_type = "gaussian", magnitude = 2),
    eval.data = list(data = x_test_mini, label = t_test_mini),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.585848214285714
[1] Validation-accuracy=0.7144
[2] Train-accuracy=0.644711111111111
[2] Validation-accuracy=0.7272
[3] Train-accuracy=0.675244444444445
[3] Validation-accuracy=0.7536

・・・

[28] Train-accuracy=0.823822222222223
[28] Validation-accuracy=0.79
[29] Train-accuracy=0.815111111111112
[29] Validation-accuracy=0.7984
[30] Train-accuracy=0.826044444444445
[30] Validation-accuracy=0.806

1番目と2番目の畳み込み層の間に活性化関数を入れてみる

  • ほとんど変わらず
library(mxnet)

mydata <- mx.symbol.Variable("data")

conv1 <- mx.symbol.Convolution(
    data = mydata,
    kernel = c(3, 3),
    num_filter = 16)

act0 <- mx.symbol.Activation(data = conv1, act_type = "relu")

conv1_5 <- mx.symbol.Convolution(
    data = act0,
    kernel = c(3, 3),
    num_filter = 16)

act1 <- mx.symbol.Activation(data = conv1_5, act_type = "relu")

pool1 <- mx.symbol.Pooling(
    data = act1,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

conv2 <- mx.symbol.Convolution(
    data = pool1,
    kernel = c(3, 3),
    num_filter = 64)

act2 <- mx.symbol.Activation(data = conv2, act_type = "relu")

pool2 <- mx.symbol.Pooling(
    data = act2,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

fc1 <- mx.symbol.FullyConnected(data = pool2, num_hidden = 200)

drop1 <- mx.symbol.Dropout(data = fc1)

act3 <- mx.symbol.Activation(data = drop1, act_type = "relu")

fc2 <- mx.symbol.FullyConnected(data = act3, num_hidden = 2)

drop2 <- mx.symbol.Dropout(data = fc2)

softmax <- mx.symbol.SoftmaxOutput(data = drop2)

mx.set.seed(1)

model <- mx.model.FeedForward.create(
    softmax, X = x_train_mini,
    y = t_train_mini,
    ctx = mx.cpu(),
    num.round = 30,
    array.batch.size = 100,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    initializer = mx.init.Xavier(factor_type = "in", rnd_type = "gaussian", magnitude = 2),
    eval.data = list(data = x_test_mini, label = t_test_mini),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.558973214285714
[1] Validation-accuracy=0.6536
[2] Train-accuracy=0.642577777777778
[2] Validation-accuracy=0.7352
[3] Train-accuracy=0.681155555555555
[3] Validation-accuracy=0.7532

・・・

[28] Train-accuracy=0.824933333333334
[28] Validation-accuracy=0.8012
[29] Train-accuracy=0.820311111111112
[29] Validation-accuracy=0.8072
[30] Train-accuracy=0.827511111111112
[30] Validation-accuracy=0.798

2番目の畳み込み層をフィルタ数32、全結合層のニューロン数を300に増やしてみる

  • そろそろCPUの限界を感じ始めた(num.round=20に)
  • 時間の割には改善なし
library(mxnet)

mydata <- mx.symbol.Variable("data")

conv1 <- mx.symbol.Convolution(
    data = mydata,
    kernel = c(3, 3),
    num_filter = 16)

conv1_5 <- mx.symbol.Convolution(
    data = conv1,
    kernel = c(3, 3),
    num_filter = 32)

act1 <- mx.symbol.Activation(data = conv1_5, act_type = "relu")

pool1 <- mx.symbol.Pooling(
    data = act1,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

conv2 <- mx.symbol.Convolution(
    data = pool1,
    kernel = c(3, 3),
    num_filter = 64)

act2 <- mx.symbol.Activation(data = conv2, act_type = "relu")

pool2 <- mx.symbol.Pooling(
    data = act2,
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))

fc1 <- mx.symbol.FullyConnected(data = pool2, num_hidden = 300)

drop1 <- mx.symbol.Dropout(data = fc1)

act3 <- mx.symbol.Activation(data = drop1, act_type = "relu")

fc2 <- mx.symbol.FullyConnected(data = act3, num_hidden = 2)

drop2 <- mx.symbol.Dropout(data = fc2)

softmax <- mx.symbol.SoftmaxOutput(data = drop2)

mx.set.seed(1)

model <- mx.model.FeedForward.create(
    softmax, X = x_train_mini,
    y = t_train_mini,
    ctx = mx.cpu(),
    num.round = 20,
    array.batch.size = 100,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    initializer = mx.init.Xavier(factor_type = "in", rnd_type = "gaussian", magnitude = 2),
    eval.data = list(data = x_test_mini, label = t_test_mini),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.5896875
[1] Validation-accuracy=0.7084
[2] Train-accuracy=0.6484
[2] Validation-accuracy=0.7348
[3] Train-accuracy=0.692933333333333
[3] Validation-accuracy=0.74

・・・

[18] Train-accuracy=0.852711111111111
[18] Validation-accuracy=0.7916
[19] Train-accuracy=0.853066666666668
[19] Validation-accuracy=0.7856
[20] Train-accuracy=0.855155555555556
[20] Validation-accuracy=0.7948

訓練データ200~300枚程度を手作業で背景を取り除くようにトリミングしてみる

  • あとで縮小しやすいように正方形でトリミング
  • 結果はわずかに改善
  • 訓練データの重要性を改めて認識した
Start training with 1 devices
[1] Train-accuracy=0.581026785714286
[1] Validation-accuracy=0.6712
[2] Train-accuracy=0.654933333333333
[2] Validation-accuracy=0.7428
[3] Train-accuracy=0.689022222222222
[3] Validation-accuracy=0.7696

・・・

[18] Train-accuracy=0.853777777777778
[18] Validation-accuracy=0.8084
[19] Train-accuracy=0.855688888888889
[19] Validation-accuracy=0.8132
[20] Train-accuracy=0.854355555555556
[20] Validation-accuracy=0.8172