RでMXNet(9)

vgg16モデルを使ったfinetune
Kaggleの「Dogs vs. Cats」をやってみる

学習データ、テストデータの準備

  • 学習データは犬の写真2000枚、猫の写真2000枚
  • テストデータは犬の写真1250枚、猫の写真1250枚
library(EBImage)

#jpeg画像を配列に変換
x_train <- array(0, c(224, 224, 3, 4000))

path <- "G:/cat/"
setwd(path)
files_cat <- list.files()

for (i in 1:2000) {
    img <- readImage(files_cat[i])
    resize <- resize(img, w = 224, h = 224)
    x_train[,,, i] <- resize@.Data
}

path <- "G:/dog"
setwd(path)
files_dog <- list.files()

for (i in 1:2000) {
    img <- readImage(files_dog[i])
    resize <- resize(img, w = 224, h = 224)
    x_train[,,, 2000 + i] <- resize@.Data
}

x_train <- x_train * 255

#入力は「RGB」(Kerasなどでは「BGR」である)
x_train[,, 1,] <- x_train[,, 1,] - 123.68
x_train[,, 2,] <- x_train[,, 2,] - 116.779
x_train[,, 3,] <- x_train[,, 3,] - 103.939


#答え(Cat:0,Dog:1)
t_train <- c(rep(0, 2000), rep(1, 2000))
library(EBImage)

#jpeg画像を配列に変換
x_test <- array(0, c(224, 224, 3, 2500))

path <- "G:/cat_test/"
setwd(path)
files_cat <- list.files()

for (i in 1:1250) {
    img <- readImage(files_cat[i])
    resize <- resize(img, w = 224, h = 224)
    x_test[,,, i] <- resize@.Data
}

path <- "G:/dog_test"
setwd(path)
files_dog <- list.files()

for (i in 1:1250) {
    img <- readImage(files_dog[i])
    resize <- resize(img, w = 224, h = 224)
    x_test[,,, 1250 + i] <- resize@.Data
}

x_test <- x_test * 255

#入力は「RGB」(Kerasなどでは「BGR」である)
x_test[,, 1,] <- x_test[,, 1,] - 123.68
x_test[,, 2,] <- x_test[,, 2,] - 116.779
x_test[,, 3,] <- x_test[,, 3,] - 103.939


#答え(Cat:0,Dog:1)
t_test <- c(rep(0, 1250), rep(1, 1250))

畳み込み層のみ計算

  • 今回は畳み込み層を固定して、全結合層のみ変更する
vgg16 <- mx.model.load("vgg16", 0)

all_layers <- vgg16$symbol$get.internals()
pool5 <- all_layers[[match("pool5_output", all_layers$outputs)]]

new.params <- vgg16$arg.params

new.params$fc6_weight <- NULL
new.params$fc6_bias <- NULL

new.params$fc7_weight <- NULL
new.params$fc7_bias <- NULL

new.params$fc8_weight <- NULL
new.params$fc8_bias <- NULL

new.model <- list(symbol = pool5, arg.params = new.params, aux.params = list())
class(new.model) <- "MXFeedForwardModel"

pred <- predict(new.model, x_train, ctx = mx.gpu())

新規に学習

rand <- sample(4000, 4000)
x_train <- pred[,,, rand]
t_train <- t_train[rand]

rm(rand)

mydata <- mx.symbol.Variable("data")

flat <- mx.symbol.flatten(data = mydata)

fc1 <- mx.symbol.FullyConnected(data = flat, name="new_fc1",num_hidden = 200)
act1 <- mx.symbol.Activation(data = fc1, act_type = "relu")

fc2 <- mx.symbol.FullyConnected(data = act1, name="new_fc2",num_hidden = 200)
act2 <- mx.symbol.Activation(data = fc2, act_type = "relu")

fc3 <- mx.symbol.FullyConnected(data = act2, name="new_fc3",num_hidden = 2)

softmax <- mx.symbol.SoftmaxOutput(data = fc3)

mx.set.seed(1)

model <- mx.model.FeedForward.create(
    softmax, X = x_train,
    y = t_train,
    ctx = mx.cpu(),
    num.round = 5,
    array.batch.size = 10,
    learning.rate = 0.001, momentum = 0.9,
    eval.metric = mx.metric.accuracy,
    initializer = mx.init.normal(0.002),
    array.layout = "auto")

ニューモデルの作成

vgg16 <- mx.model.load("vgg16", 0)

all_layers <- vgg16$symbol$get.internals()
pool5 <- all_layers[[match("pool5_output", all_layers$outputs)]]

fc6 <- mx.symbol.FullyConnected(data = pool5, name = "fc6", num_hidden = 200)
act6 <- mx.symbol.Activation(data = fc6, act_type = "relu")

fc7 <- mx.symbol.FullyConnected(data = act6, name = "fc7", num_hidden = 200)
act7 <- mx.symbol.Activation(data = fc7, act_type = "relu")

fc8 <- mx.symbol.FullyConnected(data = act7, name = "fc8", num_hidden = 2)

softmax <- mx.symbol.SoftmaxOutput(data = fc8)

new.params <- vgg16$arg.params

new.params$fc6_weight <- model$arg.params$new_fc1_weight
new.params$fc6_bias <- model$arg.params$new_fc1_bias

new.params$fc7_weight <- model$arg.params$new_fc2_weight
new.params$fc7_bias <- model$arg.params$new_fc2_bias

new.params$fc8_weight <- model$arg.params$new_fc3_weight
new.params$fc8_bias <- model$arg.params$new_fc3_bias

new.model <- list(symbol = softmax, arg.params = new.params, aux.params = list())
class(new.model) <- "MXFeedForwardModel"

テストデータを用いた評価

pred <- apply(predict(new.model, x_test, ctx = mx.gpu()), 2, which.max) - 1
sum(pred == t_test) / 2500

結果

[1] 0.9852