結論
正解率が上がらずうまくいかなかった。
その原因として以下のことが考えらえる。
- bertモデルの使い方が根本的に違う
- CoLAデータセットが今回の問題に適切でなかった
- モデルのチューニング(ハイパーパラメーターなど)が適切でない
おそらく1番目か2番目と思われる。
とりあえず記録としてコードをのせておく。
問題
以下のサイトから問題を使わせてもらった。
quizknock.com
学習コード
import mxnet as mx import gluonnlp as nlp from bert import data, model ctx = mx.gpu() bert_base, vocabulary = nlp.model.get_model('bert_12_768_12', dataset_name='book_corpus_wiki_en_uncased', pretrained=True, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) bert_classifier = model.classification.BERTClassifier(bert_base, num_classes=2, dropout=0.1) bert_classifier.classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) bert_classifier.hybridize(static_alloc=True) loss_function = mx.gluon.loss.SoftmaxCELoss() loss_function.hybridize(static_alloc=True) metric = mx.metric.Accuracy() #cola_dev = nlp.data.GlueCoLA('dev', root='./datasets/cola') bert_tokenizer = nlp.data.BERTTokenizer(vocabulary, lower=True) transform = data.transform.BERTDatasetTransform(bert_tokenizer, max_seq_length=20, has_label=True, pad=True, pair=False) #data_train = cola_dev.transform(transform) import pandas as pd df = pd.read_table('in_domain_train.tsv', header=None) mylist = [] for i in range(len(df)): mylist.append([df.iloc[i,3], df.iloc[i,1]]) mydata = mx.gluon.data.SimpleDataset(mylist) data_train = mydata.transform(transform) bert_dataloader = mx.gluon.data.DataLoader(data_train, batch_size=64, shuffle=True) trainer = mx.gluon.Trainer(bert_classifier.collect_params(), 'adam', {'learning_rate': 5e-6, 'epsilon': 1e-9}) num_epochs = 10 for epoch_id in range(num_epochs): metric.reset() for token_ids, valid_length, segment_ids, label in bert_dataloader: with mx.autograd.record(): token_ids = token_ids.as_in_context(ctx) valid_length = valid_length.as_in_context(ctx) segment_ids = segment_ids.as_in_context(ctx) label = label.as_in_context(ctx) out = bert_classifier(token_ids, segment_ids, valid_length.astype('float32')) ls = loss_function(out, label) ls.backward() trainer.step(len(label)) metric.update([label], [out]) print('[Epoch {}] acc={:.3f}'.format(epoch_id+1, metric.get()[1])) bert_classifier.save_parameters('bert.params')
推論コード
import mxnet as mx import gluonnlp as nlp from bert import data, model ctx = mx.cpu() bert_base, vocabulary = nlp.model.get_model('bert_12_768_12', dataset_name='book_corpus_wiki_en_uncased', pretrained=True, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) bert_classifier = model.classification.BERTClassifier(bert_base, num_classes=2, dropout=0.1) bert_classifier.load_parameters('bert.params') bert_tokenizer = nlp.data.BERTTokenizer(vocabulary, lower=True) transform_test = data.transform.BERTDatasetTransform(bert_tokenizer, max_seq_length=20, #class_labels=all_labels, has_label=False, pad=True, pair=False) question = 'He is still painting the picture but will finish it %s a week or two.' choice =['in', 'until', 'about', 'for'] print (question%'( )') for i in range(len(choice)): test_data = transform_test([question%choice[i]]) # Load the data to the GPU token_ids = mx.nd.array(test_data[0]).astype('uint8').expand_dims(0).as_in_context(ctx) valid_length = mx.nd.array(test_data[1]).reshape((1,)).as_in_context(ctx) segment_ids = mx.nd.array(test_data[2]).astype('uint8').expand_dims(0).as_in_context(ctx) # Forward computation out = bert_classifier(token_ids, segment_ids, valid_length) out = mx.nd.softmax(out) print('%s : %f'%(choice[i],out[0,1].asscalar()))
結果
He is still painting the picture but will finish it ( ) a week or two. in : 0.995472 until : 0.995320 about : 0.995532 for : 0.995265
不正解!!
補足
なぜか推論の時にパラメータ―をgpuで読み込むとエラーがでた。
環境
Windows10 Pro NVIDIA GeForce GTX1080 CUDA 10.1 Python 3.6.8
certifi==2019.9.11 chardet==3.0.4 gluonnlp==0.8.1 graphviz==0.8.4 idna==2.6 mxnet-cu101==1.6.0b20191004 numpy==1.16.5 pandas==0.25.2 python-dateutil==2.8.0 pytz==2019.3 requests==2.18.4 six==1.12.0 urllib3==1.2