MXNetでseq2seq(Attentionなし)

はじめに

ゼロから作るDeep Learning ? ―自然言語処理編

ゼロから作るDeep Learning ? ―自然言語処理編

こちらで紹介されているseq2seqモデルをMXNetで書いただけ。
データもそのまま使わせてもらった。

モデルの作成

import mxnet as mx
from mxnet.gluon import nn, rnn, Block

#ctx = mx.gpu()
ctx = mx.cpu()

class Seq2SeqEncoder(Block):
    def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs):
        super(Seq2SeqEncoder, self).__init__(**kwargs)

        with self.name_scope():
            self.embedding = nn.Embedding(vocab_size, embed_size)
            self.rnn = rnn.LSTM(num_hiddens, num_layers=1)

    def forward(self, X, *args):
        X = self.embedding(X) 
        X = X.swapaxes(0, 1)  
        state = self.rnn.begin_state(batch_size=X.shape[1], ctx=ctx)
        out, state = self.rnn(X, state)
        return out, state

class Seq2SeqDecoder(Block):
    def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs):
        super(Seq2SeqDecoder, self).__init__(**kwargs)

        with self.name_scope():
            self.embedding = nn.Embedding(vocab_size, embed_size)
            self.rnn = rnn.LSTM(num_hiddens, num_layers=1)
            self.dense = nn.Dense(vocab_size, flatten=False)

    def forward(self, X, state):
        X = self.embedding(X)
        X = X.swapaxes(0, 1)
        out, state = self.rnn(X, state)
        out = out.swapaxes(0, 1)
        out = self.dense(out)
        return out, state

class EncoderDecoder(Block):
    def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs):
        super(EncoderDecoder, self).__init__(**kwargs)

        with self.name_scope():
            self.encoder = Seq2SeqEncoder(vocab_size, embed_size, num_hiddens)
            self.decoder = Seq2SeqDecoder(vocab_size, embed_size, num_hiddens)

    def forward(self, enc_X, dec_X, *args):
        enc_outputs, enc_state = self.encoder(enc_X, *args)
        return self.decoder(dec_X, enc_state)

実行

import numpy as np
import sequence

#データの読み込み
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
char_to_id, id_to_char = sequence.get_vocab()

#ハイパーパラメータ
vocab_size = len(char_to_id)    #文字数 13
embed_size = 16
num_hidden = 128                #LSTMのノード数

batch_size = 128
epoch = 25

#逆順の文
X = [np.flip(x, axis=0) for x in x_train]

import seq2seq
import mxnet as mx
from mxnet import autograd
from mxnet.gluon import Trainer
from mxnet.gluon.loss import SoftmaxCrossEntropyLoss

X = mx.nd.array(X)
Y = mx.nd.array(t_train)

#ctx = mx.gpu()
ctx = mx.cpu()

#モデルの作成
model = seq2seq.EncoderDecoder(vocab_size, embed_size, num_hidden)
model.initialize(mx.init.Xavier(), ctx=ctx)

#学習アルゴリズムの設定
trainer = Trainer(model.collect_params(), 'adam')
loss_func = SoftmaxCrossEntropyLoss()

#データの準備
train_data = mx.io.NDArrayIter(X, Y, batch_size=batch_size, shuffle=True)

#学習の開始
print('start training...')

loss_n = []

for i in range(1, epoch+1):
    train_data.reset()
    for batch in train_data:
        encoder_input = batch.data[0].as_in_context(ctx)
        decoder_input = batch.label[0][:,:-1].as_in_context(ctx)
        label = batch.label[0][:,1:].as_in_context(ctx)
        with autograd.record():
            output, status = model(encoder_input, decoder_input)
            loss = loss_func(output, label)
            loss_n.append(np.mean(loss.asnumpy()))
            loss.backward()
        trainer.step(batch.data[0].shape[0])

    ll = np.mean(loss_n)
    print('%d epoch  loss = %f'%(i, ll))
    loss_n = []

model.save_parameters('rnn_model.params')

結果の確認(テストデータの正解率を計算)

import numpy as np
import sequence

#データの読み込み
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
char_to_id, id_to_char = sequence.get_vocab()

#ハイパーパラメータ
vocab_size = len(char_to_id)    #文字数 13
embed_size = 16
num_hidden = 128                #LSTMのノード数

#逆順の文
X = [np.flip(x, axis=0) for x in x_test]

import seq2seq
import mxnet as mx

X = mx.nd.array(X)

#ctx = mx.gpu()
ctx = mx.cpu()

#モデルの読み込み
model = seq2seq.EncoderDecoder(vocab_size, embed_size, num_hidden)
model.load_parameters('rnn_model.params',ctx=ctx)

acc = 0

for i in range(len(x_test)):
    encoder_input = X[i].expand_dims(0)
    encoder_output, encoder_state = model.encoder(encoder_input)

    decoder_state = encoder_state
    decoder_input = mx.nd.array(char_to_id['_']).reshape((1,1))

    output = []

    for _ in range(4):
        decoder_output, decoder_state = model.decoder(decoder_input,decoder_state)
        id = decoder_output[0][0].asnumpy().argmax()
        output.append(id)
        decoder_input = mx.nd.array(id).reshape((1,1))

    if sum(t_test[i,1:]==output)==4:
        acc += 1
    
acc_ratio = (acc/len(x_test))*100

print('acc = %.3f'%(acc_ratio))

結果

acc = 56.220