はじめに
ゼロから作るDeep Learning ? ―自然言語処理編
- 作者: 斎藤康毅
- 出版社/メーカー: オライリージャパン
- 発売日: 2018/07/21
- メディア: 単行本(ソフトカバー)
- この商品を含むブログ (3件) を見る
データもそのまま使わせてもらった。
モデルの作成
import mxnet as mx from mxnet.gluon import nn, rnn, Block #ctx = mx.gpu() ctx = mx.cpu() class Seq2SeqEncoder(Block): def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs): super(Seq2SeqEncoder, self).__init__(**kwargs) with self.name_scope(): self.embedding = nn.Embedding(vocab_size, embed_size) self.rnn = rnn.LSTM(num_hiddens, num_layers=1) def forward(self, X, *args): X = self.embedding(X) X = X.swapaxes(0, 1) state = self.rnn.begin_state(batch_size=X.shape[1], ctx=ctx) out, state = self.rnn(X, state) return out, state class Seq2SeqDecoder(Block): def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs): super(Seq2SeqDecoder, self).__init__(**kwargs) with self.name_scope(): self.embedding = nn.Embedding(vocab_size, embed_size) self.rnn = rnn.LSTM(num_hiddens, num_layers=1) self.dense = nn.Dense(vocab_size, flatten=False) def forward(self, X, state): X = self.embedding(X) X = X.swapaxes(0, 1) out, state = self.rnn(X, state) out = out.swapaxes(0, 1) out = self.dense(out) return out, state class EncoderDecoder(Block): def __init__(self, vocab_size, embed_size, num_hiddens, **kwargs): super(EncoderDecoder, self).__init__(**kwargs) with self.name_scope(): self.encoder = Seq2SeqEncoder(vocab_size, embed_size, num_hiddens) self.decoder = Seq2SeqDecoder(vocab_size, embed_size, num_hiddens) def forward(self, enc_X, dec_X, *args): enc_outputs, enc_state = self.encoder(enc_X, *args) return self.decoder(dec_X, enc_state)
実行
import numpy as np import sequence #データの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() #ハイパーパラメータ vocab_size = len(char_to_id) #文字数 13 embed_size = 16 num_hidden = 128 #LSTMのノード数 batch_size = 128 epoch = 25 #逆順の文 X = [np.flip(x, axis=0) for x in x_train] import seq2seq import mxnet as mx from mxnet import autograd from mxnet.gluon import Trainer from mxnet.gluon.loss import SoftmaxCrossEntropyLoss X = mx.nd.array(X) Y = mx.nd.array(t_train) #ctx = mx.gpu() ctx = mx.cpu() #モデルの作成 model = seq2seq.EncoderDecoder(vocab_size, embed_size, num_hidden) model.initialize(mx.init.Xavier(), ctx=ctx) #学習アルゴリズムの設定 trainer = Trainer(model.collect_params(), 'adam') loss_func = SoftmaxCrossEntropyLoss() #データの準備 train_data = mx.io.NDArrayIter(X, Y, batch_size=batch_size, shuffle=True) #学習の開始 print('start training...') loss_n = [] for i in range(1, epoch+1): train_data.reset() for batch in train_data: encoder_input = batch.data[0].as_in_context(ctx) decoder_input = batch.label[0][:,:-1].as_in_context(ctx) label = batch.label[0][:,1:].as_in_context(ctx) with autograd.record(): output, status = model(encoder_input, decoder_input) loss = loss_func(output, label) loss_n.append(np.mean(loss.asnumpy())) loss.backward() trainer.step(batch.data[0].shape[0]) ll = np.mean(loss_n) print('%d epoch loss = %f'%(i, ll)) loss_n = [] model.save_parameters('rnn_model.params')
結果の確認(テストデータの正解率を計算)
import numpy as np import sequence #データの読み込み (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt') char_to_id, id_to_char = sequence.get_vocab() #ハイパーパラメータ vocab_size = len(char_to_id) #文字数 13 embed_size = 16 num_hidden = 128 #LSTMのノード数 #逆順の文 X = [np.flip(x, axis=0) for x in x_test] import seq2seq import mxnet as mx X = mx.nd.array(X) #ctx = mx.gpu() ctx = mx.cpu() #モデルの読み込み model = seq2seq.EncoderDecoder(vocab_size, embed_size, num_hidden) model.load_parameters('rnn_model.params',ctx=ctx) acc = 0 for i in range(len(x_test)): encoder_input = X[i].expand_dims(0) encoder_output, encoder_state = model.encoder(encoder_input) decoder_state = encoder_state decoder_input = mx.nd.array(char_to_id['_']).reshape((1,1)) output = [] for _ in range(4): decoder_output, decoder_state = model.decoder(decoder_input,decoder_state) id = decoder_output[0][0].asnumpy().argmax() output.append(id) decoder_input = mx.nd.array(id).reshape((1,1)) if sum(t_test[i,1:]==output)==4: acc += 1 acc_ratio = (acc/len(x_test))*100 print('acc = %.3f'%(acc_ratio))
結果
acc = 56.220