まずはデータをフォルダに分ける。
Kaggleからデータをダウンロードすると「train」フォルダに猫の画像、犬の画像それぞれ12500枚入っている。
今回はその中から各2000枚を訓練データ、各1250枚をテストデータとする。
ダウンロードした中には「test」フォルダも含まれるが今回は使用しない。
import glob import random import shutil #cat cat_list = glob.glob('dogs-vs-cats/train/train/cat*') random.shuffle(cat_list) out_dir = 'python/dog_cat/train/cat' for file_name in cat_list[:2000]: shutil.copy(file_name, out_dir) out_dir = 'python/dog_cat/test/cat' for file_name in cat_list[2000:3250]: shutil.copy(file_name, out_dir) #dog dog_list = glob.glob('dogs-vs-cats/train/train/dog*') random.shuffle(dog_list) out_dir = 'python/dog_cat/train/dog' for file_name in dog_list[:2000]: shutil.copy(file_name, out_dir) out_dir = 'python/dog_cat/test/dog' for file_name in dog_list[2000:3250]: shutil.copy(file_name, out_dir)
実行
import mxnet as mx from mxnet.gluon.data.vision import transforms from gluoncv.model_zoo import get_model from mxnet import autograd, gluon from mxnet.gluon import nn jitter_param = 0.4 lighting_param = 0.1 transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256,keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset_train = gluon.data.vision.ImageFolderDataset('python/dog_cat/train') dataset_test = gluon.data.vision.ImageFolderDataset('python/dog_cat/test') train_data = gluon.data.DataLoader( dataset_train.transform_first(transform_train), batch_size=32, shuffle=True) test_data = gluon.data.DataLoader( dataset_test.transform_first(transform_test), batch_size=32, shuffle=False) finetune_net = get_model('SENet_154', pretrained=True) with finetune_net.name_scope(): finetune_net.output = nn.Dense(2) finetune_net.output.initialize(mx.init.Xavier(), ctx = mx.gpu()) finetune_net.features.collect_params().setattr('grad_req', 'null') finetune_net.collect_params().reset_ctx(mx.gpu()) finetune_net.hybridize() def evaluate_accuracy(dataloader, net): sample_n = 0 acc = [] for batch in dataloader: data = batch[0].as_in_context(mx.gpu()) label = batch[1].as_in_context(mx.gpu()) output = net(data) predictions = mx.nd.argmax(output, axis=1).astype('int32') sample_n += data.shape[0] acc.append(mx.nd.sum(predictions==label).asscalar()) return sum(acc) / sample_n trainer = gluon.Trainer(finetune_net.collect_params(), 'adam') Loss = gluon.loss.SoftmaxCrossEntropyLoss() epoch = 5 print('start training...') for epoch in range(1, epoch + 1): for batch in train_data: data = batch[0].as_in_context(mx.gpu()) label = batch[1].as_in_context(mx.gpu()) with autograd.record(): output = finetune_net(data) loss = Loss(output, label) loss.backward() trainer.step(data.shape[0]) test_acc = evaluate_accuracy(test_data, finetune_net) print('{:<2} epoch test_acc = {:<10,.5f}'.format(epoch, test_acc)) finetune_net.save_parameters('finetune.params')
結果
1 epoch test_acc = 0.98840 2 epoch test_acc = 0.98960 3 epoch test_acc = 0.99240 4 epoch test_acc = 0.99040 5 epoch test_acc = 0.99080