RでMXNet(2)

optimizerを変えて畳み込みNNを作ってみた。

library(mxnet)

#訓練データの読み込み
x_train <- array(t(readRDS("x_train")),c(28,28,1,60000)) / 255
t_train <- apply(readRDS("t_train"), 1, which.max) - 1

#テストデータの読み込み
x_test <- array(t(readRDS("x_test")), c(28, 28, 1, 10000))
t_test <- apply(readRDS("t_test"), 1, which.max) - 1

#畳み込みNN
data <- mx.symbol.Variable("data")
conv <- mx.symbol.Convolution(
    data, name = "conv",
    kernel = c(5, 5),
    num_filter = 30)
act1 <- mx.symbol.Activation(conv, name = "relu1", act_type = "relu")
pool <- mx.symbol.Pooling(
    act1, name ="pool",
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))
fc1 <- mx.symbol.FullyConnected(pool, name = "fc1", num_hidden = 100)
act2 <- mx.symbol.Activation(fc1, name = "relu2", act_type = "relu")
fc2 <- mx.symbol.FullyConnected(act2, name = "fc2", num_hidden = 10)
softmax <- mx.symbol.SoftmaxOutput(fc2, name = "sm")

mx.set.seed(400)

model <- mx.model.FeedForward.create(
    softmax, X = x_train,
    y = t_train,
    ctx = mx.cpu(),
    num.round = 15,
    array.batch.size = 1000,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    eval.data = list(data = x_test, label = t_test),
    initializer = mx.init.normal(0.01),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.732254237288135
[1] Validation-accuracy=0.8897
[2] Train-accuracy=0.905133333333333
[2] Validation-accuracy=0.9161
[3] Train-accuracy=0.928166666666666
[3] Validation-accuracy=0.9353
[4] Train-accuracy=0.946616666666667
[4] Validation-accuracy=0.9516

・・・

[13] Train-accuracy=0.987566666666666
[13] Validation-accuracy=0.9833
[14] Train-accuracy=0.988583333333333
[14] Validation-accuracy=0.984
[15] Train-accuracy=0.98945
[15] Validation-accuracy=0.9847

活性化関数をtanh関数に変えて、Xavierの初期値を使ってみた。

library(mxnet)

#訓練データの読み込み
x_train <- array(t(readRDS("x_train")),c(28,28,1,60000)) / 255
t_train <- apply(readRDS("t_train"), 1, which.max) - 1

#テストデータの読み込み
x_test <- array(t(readRDS("x_test")), c(28, 28, 1, 10000))
t_test <- apply(readRDS("t_test"), 1, which.max) - 1

#畳み込みNN
data <- mx.symbol.Variable("data")
conv <- mx.symbol.Convolution(
    data, name = "conv",
    kernel = c(5, 5),
    num_filter = 30)
act1 <- mx.symbol.Activation(conv, name = "tanh1", act_type = "tanh")
pool <- mx.symbol.Pooling(
    act1, name ="pool",
    pool_type = "max",
    kernel = c(2, 2),
    stride = c(2, 2))
fc1 <- mx.symbol.FullyConnected(pool, name = "fc1", num_hidden = 100)
act2 <- mx.symbol.Activation(fc1, name = "tanh2", act_type = "tanh")
fc2 <- mx.symbol.FullyConnected(act2, name = "fc2", num_hidden = 10)
softmax <- mx.symbol.SoftmaxOutput(fc2, name = "sm")

mx.set.seed(400)

model <- mx.model.FeedForward.create(
    softmax, X = x_train,
    y = t_train,
    ctx = mx.cpu(),
    num.round = 15,
    array.batch.size = 1000,
    optimizer = "adam",
    eval.metric = mx.metric.accuracy,
    eval.data = list(data = x_test, label = t_test),
    initializer = mx.init.Xavier(),
    array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.856135593220339
[1] Validation-accuracy=0.9145
[2] Train-accuracy=0.94735
[2] Validation-accuracy=0.9083
[3] Train-accuracy=0.964616666666667
[3] Validation-accuracy=0.8688
[4] Train-accuracy=0.9749
[4] Validation-accuracy=0.8313

・・・

[13] Train-accuracy=0.997116666666667
[13] Validation-accuracy=0.8038
[14] Train-accuracy=0.997566666666666
[14] Validation-accuracy=0.8091
[15] Train-accuracy=0.998033333333334
[15] Validation-accuracy=0.8135

トレイニングデータの正解率は非常に高いが、テストデータの正解率は非常に低い。
これが過学習だと思う。
次はDropoutを勉強しよう。

Xavierでfactor_type="in"、magnitude=2を指定すればHeの初期値?