RでMXNet(3)

MNIST 99%以上を目指す!
と言っても下記の本で紹介されているニューラルネットワークをMXNetで書いただけ。

初期設定

library(mxnet)

データの読み込み

#訓練データの読み込み
x_train <- array(t(readRDS("x_train")),c(28,28,1,60000)) / 255
t_train <- apply(readRDS("t_train"), 1, which.max) - 1

#テストデータの読み込み
x_test <- array(t(readRDS("x_test")), c(28, 28, 1, 10000))
t_test <- apply(readRDS("t_test"), 1, which.max) - 1

ニューラルネットワークの構築

mydata <- mx.symbol.Variable("data")

conv1 <- mx.symbol.Convolution(
     data=mydata,
     kernel = c(3,3),
   pad = c(1,1),
   stride = c(1,1),
   num_filter = 16)

act1 <- mx.symbol.Activation(data=conv1, act_type = "relu")

conv2 <- mx.symbol.Convolution(
   data=act1,
   kernel = c(3,3),
   pad = c(1,1),
   stride = c(1,1),
   num_filter = 16)

act2 <- mx.symbol.Activation(data=conv2, act_type = "relu")

pool1 <- mx.symbol.Pooling(
   data=act2,
   pool_type = "max",
   kernel = c(2,2),
   stride = c(2,2))

conv3 <- mx.symbol.Convolution(
   data=pool1,
   kernel = c(3,3),
   pad = c(1,1),
   stride = c(1,1),
   num_filter = 32)

act3 <- mx.symbol.Activation(data=conv3, act_type = "relu")

conv4 <- mx.symbol.Convolution(
   data=act3,
   kernel = c(3,3),
   pad = c(2,2),
   stride = c(1,1),
   num_filter = 32)

act4 <- mx.symbol.Activation(data=conv4, act_type = "relu")

pool2 <- mx.symbol.Pooling(
   data=act4,
   pool_type = "max",
   kernel = c(2,2),
   stride = c(2,2))

conv5 <- mx.symbol.Convolution(
   data=pool2,
   kernel = c(3,3),
   pad = c(1,1),
   stride = c(1,1),
   num_filter = 64)

act5 <- mx.symbol.Activation(data=conv5, act_type = "relu")

conv6 <- mx.symbol.Convolution(
   data=act5,
   kernel = c(3,3),
   pad = c(1,1),
   stride = c(1,1),
   num_filter = 64)

act6 <- mx.symbol.Activation(data=conv6, act_type = "relu")

pool3 <- mx.symbol.Pooling(
   data=act6,
   pool_type = "max",
   kernel = c(2,2),
   stride = c(2,2))

fc1 <- mx.symbol.FullyConnected(data=pool3, num_hidden = 50)

act7 <- mx.symbol.Activation(data=fc1, act_type = "relu")

drop1 <- mx.symbol.Dropout(data=act7)

fc2 <- mx.symbol.FullyConnected(data=drop1, num_hidden = 10)

drop2 <- mx.symbol.Dropout(data=fc2)

softmax <- mx.symbol.SoftmaxOutput(drop2, name = "sm")

mx.set.seed(1)

model <- mx.model.FeedForward.create(
   softmax, X = x_train,
   y = t_train,
   ctx = mx.cpu(),
   num.round = 20,
   array.batch.size = 1000,
   optimizer = "adam",
   eval.metric = mx.metric.accuracy,
   initializer = mx.init.Xavier(factor_type="in",rnd_type="gaussian",magnitude=2),
   eval.data = list(data = x_test, label = t_test),
   array.layout = "auto")

結果

Start training with 1 devices
[1] Train-accuracy=0.233593220338983
[1] Validation-accuracy=0.8341
[2] Train-accuracy=0.354283333333333
[2] Validation-accuracy=0.9439
[3] Train-accuracy=0.411583333333333
[3] Validation-accuracy=0.9697

・・・

[18] Train-accuracy=0.5672
[18] Validation-accuracy=0.9934
[19] Train-accuracy=0.569166666666667
[19] Validation-accuracy=0.9944
[20] Train-accuracy=0.569616666666667
[20] Validation-accuracy=0.9932
  • Train-accuracyの数値があがってこない
  • どうやらTrain-accuracyの計算においてDropout層を無視していない様子
  • Validation-accuracyはDropout層を無視して正確に計算されていそう

実際のTrain-accuracy

> pred <- predict(model,x_train)
> pred_train <- apply(pred,2,which.max) - 1
> sum(pred_train == t_train)/60000
[1] 0.99485
  • predict関数ではDropout層を無視して正確に計算されていそう