GluonでMNIST

下記の本で紹介されているニューラルネットワークをGluonで書いてみる。

環境

Windows10 Pro 64bit
GeForce GTX 1080 (CUDA 8.0, cuDNN 6.0)

Anacondaに仮想環境を作成

conda create -n gluon python=2.7 anaconda
activate gluon
pip install mxnet-cu80==0.12.0

データの取得、ネットワークの構築、実行

from __future__ import print_function
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np
mx.random.seed(1)
ctx = mx.gpu()
batch_size = 1000
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=16, kernel_size=3, strides=(1, 1), padding=(1, 1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=16, kernel_size=3,  strides=(1, 1), padding=(1, 1),activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3,  strides=(1, 1), padding=(1, 1),activation='relu'))
    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3,  strides=(1, 1), padding=(2, 2),activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

    net.add(gluon.nn.Conv2D(channels=64, kernel_size=3,  strides=(1, 1), padding=(1, 1),activation='relu'))
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=3,  strides=(1, 1), padding=(1, 1),activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))

    net.add(gluon.nn.Dense(50, activation="relu"))
    net.add(gluon.nn.Dropout(0.5))
    net.add(gluon.nn.Dense(10))
    net.add(gluon.nn.Dropout(0.5))

net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': .001})
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]
epochs = 20
for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch {0:<3}Train_acc {1:<16}, Test_acc {2:<16}".format(e+1, train_accuracy, test_accuracy))

結果


Epoch 1  Train_acc 0.930766666667  , Test_acc 0.9327          
Epoch 2  Train_acc 0.961966666667  , Test_acc 0.9613          
Epoch 3  Train_acc 0.973416666667  , Test_acc 0.9734          
Epoch 4  Train_acc 0.979916666667  , Test_acc 0.9797          
Epoch 5  Train_acc 0.9838          , Test_acc 0.983           
Epoch 6  Train_acc 0.985883333333  , Test_acc 0.9853          
Epoch 7  Train_acc 0.98785         , Test_acc 0.987           
Epoch 8  Train_acc 0.989316666667  , Test_acc 0.9868          
Epoch 9  Train_acc 0.990283333333  , Test_acc 0.9882          
Epoch 10 Train_acc 0.991466666667  , Test_acc 0.9893          
Epoch 11 Train_acc 0.991383333333  , Test_acc 0.9898          
Epoch 12 Train_acc 0.9937          , Test_acc 0.9912          
Epoch 13 Train_acc 0.993866666667  , Test_acc 0.9915          
Epoch 14 Train_acc 0.99315         , Test_acc 0.989           
Epoch 15 Train_acc 0.994433333333  , Test_acc 0.9915          
Epoch 16 Train_acc 0.994266666667  , Test_acc 0.9905          
Epoch 17 Train_acc 0.994433333333  , Test_acc 0.9916          
Epoch 18 Train_acc 0.9945          , Test_acc 0.9911          
Epoch 19 Train_acc 0.995466666667  , Test_acc 0.9915          
Epoch 20 Train_acc 0.995933333333  , Test_acc 0.9923