MXNetでCIFAR-100を扱う

from mxnet import gluon
#fine_label (= Classes)
data = gluon.data.vision.CIFAR100(train=False, fine_label = True, root='cifar100')

with open('cifar100/fine_label_names.txt', 'r') as f:
    fine_labels = f.read().rstrip().split()

fine_id = [y for x, y in data]

max(fine_id)
99
#coarse_label (=Superclasses)
data = gluon.data.vision.CIFAR100(train=False, fine_label = False, root='cifar100')

with open('cifar100/coarse_label_names.txt', 'r') as f:
    coarse_labels = f.read().rstrip().split()

coarse_id = [y for x, y in data]

max(coarse_id)
19

画像の表示

人の画像を表示してみる。

import mxnet as mx
from mxnet import image, gluon
import random

data = gluon.data.vision.CIFAR100(train=False, fine_label = False, root='cifar100')

person_img = [x.expand_dims(0) for x, y in data if y==14]

random.shuffle(person_img)

img = mx.nd.concat(*person_img[0:3], dim=0)

img_pre = img.astype('float32')/255
img_pre = mx.nd.transpose(img_pre, (0,3,1,2))

net = gluon.nn.SymbolBlock.imports("DBPN/DBPN_8x-symbol.json",['data'], "DBPN/DBPN_8x-0000.params")
output = net(img_pre)

row_list = []
for i in range(output.shape[0]):
    img_sr = (mx.nd.transpose(output[i], (1,2,0))*255).astype('uint8')
    img_large = image.imresize(img[i], 256, 256)
    row_list.append(mx.nd.concat(img_large, img_sr, dim=1))
final = mx.nd.concat(*row_list, dim=0)

from PIL import Image
img = Image.fromarray(final.asnumpy())
img.save('person_result.jpg')

左:単純な8倍の拡大
右:超解像
f:id:touch-sp:20190821184151j:plain

画像を拡大して保存

import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet.gluon.data.vision import CIFAR100
from PIL import Image

ctx = mx.gpu()

import os
for i in range(100):
    data_path = os.path.join('cifar_data', 'train', '%d'%i)
    if not os.path.exists(data_path):
        os.makedirs(data_path)

sr_net = gluon.nn.SymbolBlock.imports("DBPN/DBPN_8x-symbol.json",['data'], "DBPN/DBPN_8x-0000.params")
sr_net.collect_params().reset_ctx(ctx)

def sr(myarray):
    img_pre = (myarray.astype('float32')/255)
    img_pre = mx.nd.transpose(img_pre, (0,3,1,2))
    img_post = sr_net(img_pre.as_in_context(ctx))
    img_post = (mx.nd.transpose(img_post, (0,2,3,1))*255).astype('uint8')
    return img_post

dataset_test = CIFAR100(train=True, fine_label = True, root='cifar100')

test_data = gluon.data.DataLoader(dataset_test, batch_size=8, shuffle=False)

count=0

for batch in test_data:
    data = sr(batch[0])
    label = batch[1]
    for i in range(data.shape[0]):
        out_dir = os.path.join('cifar_data', 'train', '%d'%label[i].asscalar(), '%d.jpg'%count)
        img = Image.fromarray(data[i].asnumpy())
        img.save(out_dir)
        count += 1

実験

単純な8倍の拡大と超解像の画像でそれぞれクラス分類をおこなったら精度に差はでるか?

  • 単純な8倍の拡大
1  epoch test_acc = 0.56770
2  epoch test_acc = 0.65600
3  epoch test_acc = 0.66760
4  epoch test_acc = 0.70110
5  epoch test_acc = 0.71110
6  epoch test_acc = 0.71060
7  epoch test_acc = 0.73120
8  epoch test_acc = 0.72740
9  epoch test_acc = 0.72700
10 epoch test_acc = 0.72700
11 epoch test_acc = 0.73960
12 epoch test_acc = 0.73580
13 epoch test_acc = 0.73260
14 epoch test_acc = 0.73380
15 epoch test_acc = 0.73580
16 epoch test_acc = 0.74250
17 epoch test_acc = 0.72570
18 epoch test_acc = 0.73810
19 epoch test_acc = 0.73940
20 epoch test_acc = 0.74330
21 epoch test_acc = 0.74510
22 epoch test_acc = 0.73590
23 epoch test_acc = 0.74350
24 epoch test_acc = 0.75110
25 epoch test_acc = 0.73720
26 epoch test_acc = 0.75120
27 epoch test_acc = 0.74590
28 epoch test_acc = 0.74740
29 epoch test_acc = 0.75920
30 epoch test_acc = 0.74710
1  epoch test_acc = 0.58930
2  epoch test_acc = 0.65410
3  epoch test_acc = 0.67590
4  epoch test_acc = 0.69740
5  epoch test_acc = 0.70870
6  epoch test_acc = 0.71810
7  epoch test_acc = 0.73100
8  epoch test_acc = 0.73940
9  epoch test_acc = 0.73770
10 epoch test_acc = 0.73370
11 epoch test_acc = 0.75020
12 epoch test_acc = 0.75160
13 epoch test_acc = 0.74890
14 epoch test_acc = 0.75480
15 epoch test_acc = 0.75410
16 epoch test_acc = 0.75180
17 epoch test_acc = 0.75140
18 epoch test_acc = 0.74950
19 epoch test_acc = 0.75700
20 epoch test_acc = 0.76210
21 epoch test_acc = 0.76140
22 epoch test_acc = 0.75660
23 epoch test_acc = 0.75630
24 epoch test_acc = 0.76270
25 epoch test_acc = 0.75030
26 epoch test_acc = 0.76480
27 epoch test_acc = 0.76720
28 epoch test_acc = 0.75920
29 epoch test_acc = 0.76090
30 epoch test_acc = 0.76300

30 epochまでで74~75%と75~76%の違い。
わずか1%改善。誤差範囲?

クラス分類のコード

certifi==2019.6.16
chardet==3.0.4
cycler==0.10.0
gluoncv==0.4.0.post0
graphviz==0.8.4
idna==2.6
kiwisolver==1.1.0
matplotlib==3.1.1
mxnet-cu101==1.6.0b20190825
numpy==1.16.4
Pillow==6.1.0
pyparsing==2.4.2
python-dateutil==2.8.0
requests==2.18.4
scipy==1.3.1
six==1.12.0
tqdm==4.35.0
urllib3==1.22
import numpy as np 
import mxnet as mx
from mxnet.gluon.data.vision import transforms

from gluoncv.model_zoo import get_model
from mxnet import autograd, gluon
from mxnet.gluon import nn

jitter_param = 0.4
lighting_param = 0.1
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224,scale=(0.7, 1.0)),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_test = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

dataset_train = gluon.data.vision.ImageRecordDataset('train_list.rec')
dataset_test = gluon.data.vision.ImageRecordDataset('val_list.rec')

train_data = gluon.data.DataLoader(
    dataset_train.transform_first(transform_train), batch_size=32, shuffle=True)

test_data = gluon.data.DataLoader(
    dataset_test.transform_first(transform_test), batch_size=32, shuffle=False)

finetune_net = get_model('mobilenet1.0', pretrained=True, root='MobileNet')

with finetune_net.name_scope():
    finetune_net.output = nn.Dense(100)
finetune_net.output.initialize(mx.init.Xavier())
finetune_net.collect_params().reset_ctx(mx.gpu())
finetune_net.hybridize()

def evaluate_accuracy(dataloader, net):
    sample_n = 0
    acc = 0
    for batch in dataloader:
        data = batch[0].as_in_context(mx.gpu())
        label = batch[1].asnumpy().astype('int32')
        output = net(data).as_in_context(mx.cpu())
        predictions = np.argmax(output.asnumpy(), axis=1).astype('int32')
        sample_n += data.shape[0]
        acc += sum(predictions==label)
    return acc / sample_n

trainer = gluon.Trainer(finetune_net.collect_params(), 'adam')
Loss = gluon.loss.SoftmaxCrossEntropyLoss()

epoch = 30

print('start training...')

for epoch in range(1, epoch + 1):
    for batch in train_data:
        data = batch[0].as_in_context(mx.gpu())
        label = batch[1].as_in_context(mx.gpu())
        with autograd.record():
            output = finetune_net(data)
            loss = Loss(output, label)
            loss.backward()
        trainer.step(data.shape[0])

    test_acc = evaluate_accuracy(test_data, finetune_net)
    
    print('{:<2} epoch test_acc = {:<10,.5f}'.format(epoch, test_acc))

finetune_net.save_parameters('finetune.params')