GluonTSで多変量時系列を扱う

N = 10  # number of time series
T = 100  # number of timesteps
prediction_length = 30
freq = '1H'

custom_datasetx = np.random.normal(size=(N, 2, T))
start = pd.Timestamp("01-01-2019", freq=freq)

train_ds = ListDataset(
    [
        {'target': x, 'start': start}
        for x in custom_datasetx[:, :, :-prediction_length]
    ],
    freq=freq,
    one_dim_target=False,
)
  • 「estimator」の定義
estimator = DeepAREstimator(
    prediction_length=prediction_length,
    context_length=prediction_length,
    freq=freq,
    trainer=Trainer(epochs=5),
    distr_output=MultivariateGaussianOutput(dim=2)
)
  • 「to_pandas」の拡張
def to_pandas_multi(instance: dict, dim=0):
    target = instance["target"][dim]
    start = instance["start"]
    freq = start.freqstr
    index = pd.date_range(start=start, periods=len(target), freq=freq)
    return pd.Series(target, index=index)
  • 「copy_dim」を使用する
for x, y in zip(test_ds, forecast_it):
    to_pandas_multi(x,dim=0).plot()
    y.copy_dim(0).plot(color='g', prediction_intervals=prediction_intervals)
    plt.grid(which='both')
    plt.legend(legend, loc='upper left')

GluonTSを使ってみる (2)

archive.ics.uci.edu
「Bike Sharing Dataset Data Set」で値を予測する。

  • use no features

f:id:touch-sp:20190914174955p:plain

  • use 'feat_dynamic_real' and 'feat_dynamic_cat'

f:id:touch-sp:20190914175009p:plain

コード

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

df = pd.read_csv('day.csv',index_col=1)

feat1 = np.array(df.hum).reshape((1,-1))
feat2 = np.array(df.temp).reshape((1,-1))
feat3 = np.array(df.windspeed).reshape((1,-1))
features_real = np.concatenate([feat1, feat2, feat3], axis=0)

feat4 = np.array(df.weekday).reshape((1,-1))
feat5 = np.array(df.workingday).reshape((1,-1))
feat6 = np.array(df.weathersit-1).reshape((1,-1))
feat7 = np.array(df.season-1).reshape((1,-1))
features_cat = np.concatenate([feat4, feat5, feat6, feat7], axis=0)

from gluonts.dataset.common import ListDataset

training_data = ListDataset(
    [{"start": df.index[0], 
        "target": df.cnt[:-14],
        "feat_dynamic_real": features_real[:,:-14],
        "feat_dynamic_cat": features_cat[:,:-14]
        }],
    freq = "1D")

test_data = ListDataset(
    [{"start": df.index[0], 
        "target": df.cnt,
        'feat_dynamic_real': features_real,
        "feat_dynamic_cat": features_cat
        }],
    freq = "1D")

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
#次の prediction_length 値を、先行して与えられた context_length 値から予測
estimator = DeepAREstimator(freq="1D", 
                            prediction_length=14, 
                            context_length=28,
                            use_feat_dynamic_real = True,
                            trainer=Trainer(epochs=50))
predictor = estimator.train(training_data=training_data)

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.util import to_pandas

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,  # test dataset
    predictor=predictor,  # predictor
    num_eval_samples=100,  # number of sample paths we want for evaluation
)

plot_length = 30
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

for x, y in zip(test_data, forecast_it):
    to_pandas(x)[-plot_length:].plot()
    y.plot(color='g', prediction_intervals=prediction_intervals)
    plt.grid(which='both')
    plt.legend(legend, loc='upper left')

plt.show()

問題点

非常にうまくいっているようにみえるがこれにはからくりがある。
将来の2週間を予測する際に、将来の「天候」「気温」「湿度」「風速」などをモデルに提供している。
正確にわかるはずのない将来データを使用している点で問題あり。

疑問

「feat_dynamic_cat」は利用されているのか?
なくても同様の結果になる。
試しにすべての「features」を「feat_dynamic_real」にいれてみたところ一番良い結果が得られた。

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

def one_hot(x, start_zero = True):
    if not start_zero:
        x = x-1
    num = len(x) 
    category_n = x.max() + 1

    vec_list = []
    for i in range(category_n):
        vec_list.append(np.zeros(num))
    
    for i, cat in enumerate(x):
        vec_list[cat][i] = 1
    
    vec_list = [x.reshape((1,-1)) for x in vec_list]

    return np.concatenate(vec_list, axis=0)

df = pd.read_csv('day.csv',index_col=1)

feat1 = np.array(df.hum).reshape((1,-1))
feat2 = np.array(df.temp).reshape((1,-1))
feat3 = np.array(df.windspeed).reshape((1,-1))

feat4 = one_hot(df.weekday, start_zero=True)
feat5 = np.array(df.workingday).reshape((1,-1))
feat6 = one_hot(df.weathersit, start_zero=False)
feat7 = one_hot(df.season, start_zero=False)

features_real = np.concatenate([feat1, feat2, feat3, feat4, feat5, feat6, feat7], axis=0)

from gluonts.dataset.common import ListDataset

training_data = ListDataset(
    [{"start": df.index[0], 
        "target": df.cnt[:-14],
        "feat_dynamic_real": features_real[:,:-14],
        #"feat_dynamic_cat": features_cat[:,:-14]
        }],
    freq = "1D")

test_data = ListDataset(
    [{"start": df.index[0], 
        "target": df.cnt,
        'feat_dynamic_real': features_real,
        #"feat_dynamic_cat": features_cat
        }],
    freq = "1D")

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
#次の prediction_length 値を、先行して与えられた context_length 値から予測
estimator = DeepAREstimator(freq="1D", 
                            prediction_length=14, 
                            context_length=28,
                            use_feat_dynamic_real = True,
                            trainer=Trainer(epochs=50))
predictor = estimator.train(training_data=training_data)

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.util import to_pandas

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,  # test dataset
    predictor=predictor,  # predictor
    num_eval_samples=100,  # number of sample paths we want for evaluation
)

plot_length = 30
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

for x, y in zip(test_data, forecast_it):
    to_pandas(x)[-plot_length:].plot()
    y.plot(color='g', prediction_intervals=prediction_intervals)
    plt.grid(which='both')
    plt.legend(legend, loc='upper left')

plt.show()

f:id:touch-sp:20190914230037p:plain

GluonTSについて(1)

docs.aws.amazon.com

If your dataset contains the dynamic_feat field, the algorithm uses it automatically. 
All time series have to have the same number of feature time series. 
The time points in each of the feature time series correspond one-to-one to the time points in the target. 
In addition, the entry in the dynamic_feat field should have the same length as the target. 
If the dataset contains the dynamic_feat field, but you don't want to use it, disable it by setting(num_dynamic_feat to ""). 
If the model was trained with the dynamic_feat field, you must provide this field for inference. 
In addition, each of the features has to have the length of the provided target plus the prediction_length. 
In other words, you must provide the feature value in the future. 
import pandas as pd
import numpy as np
from gluonts.dataset.common import ListDataset
from gluonts.transform import FieldName
from gluonts.dataset.util import to_pandas

train_target = np.random.rand(48,168)
train_feat_dynamic_real = np.random.rand(48,168)

start = pd.Timestamp("01-01-2019")

train_ds = ListDataset([{FieldName.TARGET: target,
                         FieldName.START: start,
                         FieldName.FEAT_DYNAMIC_REAL: fdr}
                        for (target, fdr) in zip(train_target, train_feat_dynamic_real)],
                      freq= '1H')

test_target = np.random.rand(2,168)
test_feat_dynamic_real = np.random.rand(2,168)

test_ds = ListDataset([{FieldName.TARGET: target,
                        FieldName.START: start,
                        FieldName.FEAT_DYNAMIC_REAL: fdr}
                        for (target, fdr) in zip(test_target, test_feat_dynamic_real)],
                      freq= '1H')

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer

estimator = DeepAREstimator(freq='1H', 
                            prediction_length=24, 
                            context_length=48, 
                            use_feat_dynamic_real = True,
                            trainer=Trainer(epochs=5))
predictor = estimator.train(training_data=train_ds)

test_target = np.random.rand(2,168)
test_feat_dynamic_real = np.random.rand(2,168+24)  #←ここではまった

test_ds = ListDataset([{FieldName.TARGET: target,
                        FieldName.START: start,
                        FieldName.FEAT_DYNAMIC_REAL: fdr}
                        for (target, fdr) in zip(test_target, test_feat_dynamic_real)],
                      freq= '1H')

pred = predictor.predict(test_ds)

GluonTSを使ってみる (1)

やったこと

  1. 50個のシリーズを作成した
  2. そのうちの48個を訓練データ、残りの2個をテストデータとした
  3. 訓練後にテストデータで予測をし、実際とくらべてみた。

コード

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from gluonts.dataset.common import ListDataset
from gluonts.transform import FieldName
from gluonts.dataset.util import to_pandas

def create_dataset(num_series, num_steps, period=24, mu=1, sigma=0.3):
    # create target: noise + pattern
    # noise
    noise = np.random.normal(mu, sigma, size=(num_series, num_steps))
    # pattern - sinusoid with different phase
    sin_minumPi_Pi = np.sin(np.tile(np.linspace(-np.pi, np.pi, period), int(num_steps / period)))
    sin_Zero_2Pi = np.sin(np.tile(np.linspace(0, 2 * np.pi, 24), int(num_steps / period)))
    pattern = np.concatenate((np.tile(sin_minumPi_Pi.reshape(1, -1),
                                      (int(np.ceil(num_series / 2)),1)),
                              np.tile(sin_Zero_2Pi.reshape(1, -1),
                                      (int(np.floor(num_series / 2)), 1))
                             ),
                             axis=0
                            )
    target = noise + pattern
    # create time features: use target one period earlier, append with zeros
    feat_dynamic_real = np.concatenate((np.zeros((num_series, period)),
                                        target[:, :-period]
                                       ),
                                       axis=1
                                      )
    # create categorical static feats: use the sinusoid type as a categorical feature
    feat_static_cat = np.concatenate((np.zeros(int(np.ceil(num_series / 2))),
                                      np.ones(int(np.floor(num_series / 2)))
                                     ),
                                     axis=0
                                    )
    return target, feat_dynamic_real, feat_static_cat

custom_ds_metadata = {'num_series': 50,
                      'num_steps': 24 * 7,
                      'prediction_length': 24,
                      'freq': '1H',
                      'start': [pd.Timestamp("01-01-2019", freq='1H')
                                for _ in range(100)]
                     }

data_out = create_dataset(custom_ds_metadata['num_series'],
                          custom_ds_metadata['num_steps'],
                          custom_ds_metadata['prediction_length']
                         )
#1~48を訓練データとする(feat_static_catは使用しない)
target = data_out[0][1:49]
feat_dynamic_real = data_out[1][1:49]

train_ds = ListDataset([{FieldName.TARGET: target,
                         FieldName.START: start,
                         FieldName.FEAT_DYNAMIC_REAL: fdr}
                        for (target, start, fdr) in zip(target,
                                                        custom_ds_metadata['start'],
                                                        feat_dynamic_real)],
                      freq=custom_ds_metadata['freq'])

#最初と最後のデータをテストデータとする(feat_static_catは使用しない)
target = np.concatenate(
        [data_out[0][0].reshape((1,-1)), 
        data_out[0][49].reshape((1,-1))],
        axis=0)

feat_dynamic_real = np.concatenate(
        [data_out[1][0].reshape((1,-1)), 
        data_out[1][49].reshape((1,-1))],
        axis=0)

test_ds_all = ListDataset([{FieldName.TARGET: target,
                        FieldName.START: start,
                        FieldName.FEAT_DYNAMIC_REAL: fdr}
                       for (target, start, fdr) in zip(target,
                                                        custom_ds_metadata['start'],
                                                        feat_dynamic_real)],
                     freq=custom_ds_metadata['freq'])

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
#次の prediction_length 値を、先行して与えられた context_length 値から予測
estimator = DeepAREstimator(freq=custom_ds_metadata['freq'], 
                            prediction_length=custom_ds_metadata['prediction_length'], 
                            context_length=custom_ds_metadata['prediction_length']*2, 
                            use_feat_dynamic_real = True,
                            trainer=Trainer(epochs=10))
predictor = estimator.train(training_data=train_ds)

# save the trained model in model/
from pathlib import Path
predictor.serialize(Path("tmp"))

#load
#from gluonts.model.predictor import Predictor
#predictor_deserialized = Predictor.deserialize(Path("tmp"))

from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds_all,  # test dataset
    predictor=predictor,  # predictor
    num_eval_samples=100,  # number of sample paths we want for evaluation
)

plot_length = 150
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

for i, (x, y) in enumerate(zip(test_ds_all, forecast_it)):
    plt.subplot(2,1,i+1)
    to_pandas(x)[-plot_length:].plot()
    y.plot(color='g', prediction_intervals=prediction_intervals)
    plt.grid(which='both')
    plt.legend(legend, loc='upper left')

plt.show()

結果

f:id:touch-sp:20190913010912p:plain

GluonTSで日経平均を予測してみる

今回やってみたこと

1年前までの株価のデータからその先1年の株価を予測してみた
予測するために過去2年間のデータ(24回分のデータ)を使用するモデルとした

データのダウンロード

日経平均プロフィルのダウンロードセンターから月次データをダウンロードする。
ダウンロードセンター - 日経平均プロフィル
(日次データは土日祝日のデータがなく等間隔の時系列データになっていないため月次データを使用した)

データの前処理

  • 1行目のタイトルを変更する

「データ日付」→「date」
終値」→「value

  • 最終行を削除して「nikkei.csv」という名前で保存する(その際に「utf-8」に変換)

コード

import pandas as pd
import matplotlib
matplotlib.use('Agg') 
from matplotlib import pyplot as plt

df = pd.read_csv('nikkei.csv',index_col=0)
#直近1年分のデータ(12個のデータ)は訓練データに含めない
df_all = df[['value']]
df_train = df[['value']][:-12]

from gluonts.dataset.common import ListDataset
training_data = ListDataset(
    [{"start": df_train.index[0], "target": df_train.value}],
    freq = "1M")

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
#次の prediction_length 値を、先行して与えられた context_length 値から予測
estimator = DeepAREstimator(freq="1M", 
                            prediction_length=12, 
                            context_length=24, 
                            trainer=Trainer(epochs=10))
predictor = estimator.train(training_data=training_data)

df_all[100:].plot(linewidth=2)
plt.grid(which='both')
plt.savefig('real.png') 

plt.figure()

from gluonts.dataset.util import to_pandas
for test_entry, forecast in zip(training_data, predictor.predict(training_data)):
    to_pandas(test_entry)[100:].plot(linewidth=2)
    forecast.plot(color='g', prediction_intervals=[50.0, 90.0])
plt.grid(which='both')
plt.savefig('prediction.png')

結果

上が実際の変動を表したグラフ
下が予測
f:id:touch-sp:20190911124408p:plain
f:id:touch-sp:20190911124440p:plain
うまくいったようにも見えるがそれはたまたまであり、実際はこんなに簡単に今後1年の株価の予測ができるわけがない。諸外国の経済状況、日本の金融政策、自然災害など様々な影響因子があるわけでそれらをどうやって予測に反映させるかが問題。
株価予測を突き詰めるつもりはないが、影響を及ぼす因子をどのようにモデルに与えるかといった技術的なところは学習したい。

動作環境

Windows 10 Pro
Python 3.6.8
boto3==1.9.226
botocore==1.12.226
certifi==2019.6.16
chardet==3.0.4
cycler==0.10.0
dataclasses==0.6
docutils==0.15.2
gluonts==0.3.3
graphviz==0.8.4
holidays==0.9.11
idna==2.6
jmespath==0.9.4
kiwisolver==1.1.0
matplotlib==3.1.1
mxnet==1.4.1
numpy==1.14.6
pandas==0.25.1
pydantic==0.28
pyparsing==2.4.2
python-dateutil==2.8.0
pytz==2019.2
requests==2.18.4
s3transfer==0.2.1
six==1.12.0
tqdm==4.35.0
ujson==1.35
urllib3==1.22

たった20行弱で行動認識のデモ(gluoncv)2

初めに

GluonCV 0.5.0が公開された。
「inceptionv3_kinetics400」のpretrained modelが使用できるようになった。
今回は静止画のデモのみ行った。

環境

Windows 10 Pro 
GPUなし
Python 3.6.8

バージョンの確認(pip freeze)

「mxnet-mkl」「gluoncv」だけをpip経由でインストールすればOK。
他のものは勝手についてきた。

certifi==2019.6.16
chardet==3.0.4
cycler==0.10.0
gluoncv==0.5.0
graphviz==0.8.4
idna==2.6
kiwisolver==1.1.0
matplotlib==3.1.1
mxnet-mkl==1.5.0
numpy==1.16.5
Pillow==6.1.0
pyparsing==2.4.2
python-dateutil==2.8.0
requests==2.18.4
scipy==1.3.1
six==1.12.0
tqdm==4.35.0
urllib3==1.22

写真(「sample.jpg」)

f:id:touch-sp:20190909191638j:plain

コード

from mxnet import nd, image
from mxnet.gluon.data.vision import transforms
from gluoncv.model_zoo import get_model

transform_fn = transforms.Compose([
    transforms.Resize((400,300)),
    transforms.CenterCrop(size=299),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

img = transform_fn(image.imread('sample.jpg'))
net = get_model('inceptionv3_kinetics400', pretrained=True)
pred = net(img.expand_dims(axis=0))
classes = net.classes
id = nd.argmax(pred, axis=1).astype('int').asscalar()
prob = nd.softmax(pred)[0][id].asscalar()
print('[%s]  probability=%f'%(classes[id], prob))

結果

[snowboarding]  probability=0.996021

感想

400パターンの行動が学習されている。
これを使えば人画像のimage captioningがうまくできるかも。

異常検知

数少ない画像(正常画像:500、異常画像:10)をうまく分類する方法を検討する。
FashionMNIST画像を使用する。
正常画像としてスニーカー画像500枚
異常画像としてブーツ画像10枚

データの準備

今回はグレー画像をカラーに変換して使用する。

import cv2
import random
import mxnet as mx
from mxnet.gluon import data

train_data = data.vision.datasets.FashionMNIST(train=True)

Sneaker = []
for each_data, label in train_data:
    if label==7:
        img = cv2.cvtColor(each_data.asnumpy(), cv2.COLOR_GRAY2RGB)
        Sneaker.append(mx.nd.array(img))
Sneaker = random.sample(Sneaker, 500)

mx.nd.save('Sneaker', Sneaker)

Boot = []
for each_data, label in train_data:
    if label==9:
        img = cv2.cvtColor(each_data.asnumpy(), cv2.COLOR_GRAY2RGB)
        Boot.append(mx.nd.array(img))
Boot = random.sample(Boot, 10)

mx.nd.save('Boot', Boot)


test_data = data.vision.datasets.FashionMNIST(train=False)

test_Sneaker = []
for each_data, label in test_data:
    if label==7:
        img = cv2.cvtColor(each_data.asnumpy(), cv2.COLOR_GRAY2RGB)
        test_Sneaker.append(mx.nd.array(img))

mx.nd.save('test_Sneaker', test_Sneaker)

test_Boot = []
for each_data, label in test_data:
    if label==9:
        img = cv2.cvtColor(each_data.asnumpy(), cv2.COLOR_GRAY2RGB)
        test_Boot.append(mx.nd.array(img))

mx.nd.save('test_Boot', test_Boot)

単純なクラス分類を試してみる

  • コード
import numpy as np 
import mxnet as mx
from mxnet import autograd, gluon
from mxnet.gluon import nn, data
from mxnet.gluon.data.vision import transforms
from gluoncv.model_zoo import get_model

jitter_param = 0.4
lighting_param = 0.1
transform_train = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_data = mx.nd.load('Sneaker') + mx.nd.load('Boot')
train_label = [0]*500 + [1]*10

train_dataset = data.dataset.ArrayDataset(train_data, train_label)
train_data = data.DataLoader(
    train_dataset.transform_first(transform_train), 
    batch_size=32,
    shuffle=True)

test_data = mx.nd.load('test_Sneaker') + mx.nd.load('test_Boot')
test_label = [0]*1000 + [1]*1000

test_dataset = data.dataset.ArrayDataset(test_data, test_label)
test_data = data.DataLoader(
    test_dataset.transform_first(transform_test), 
    batch_size=32,
    shuffle=False)

finetune_net = get_model('mobilenet1.0', pretrained=True, root='MobileNet')

with finetune_net.name_scope():
    finetune_net.output = nn.Dense(2)
finetune_net.output.initialize(mx.init.Xavier())
finetune_net.hybridize()

def evaluate_accuracy(dataloader, net):
    sample_n = 0
    acc = 0
    for batch in dataloader:
        data = batch[0]
        label = batch[1].asnumpy().astype('int32')
        output = net(data)
        predictions = np.argmax(output.asnumpy(), axis=1).astype('int32')
        sample_n += data.shape[0]
        acc += sum(predictions==label)
    return acc / sample_n

trainer = gluon.Trainer(finetune_net.collect_params(), 'adam')
Loss = gluon.loss.SoftmaxCrossEntropyLoss()

epoch = 10

print('start training...')

for epoch in range(1, epoch + 1):
    for batch in train_data:
        data = batch[0]
        label = batch[1]
        with autograd.record():
            output = finetune_net(data)
            loss = Loss(output, label)
            loss.backward()
        trainer.step(data.shape[0])

    train_acc = evaluate_accuracy(train_data, finetune_net)
    test_acc = evaluate_accuracy(test_data, finetune_net)
    
    print('{:<2} epoch train_acc = {:<10,.5f} test_acc = {:<10,.5f}'.format(epoch, train_acc, test_acc))

    finetune_net.save_parameters('finetune_%d.params'%epoch)
  • 結果
1  epoch train_acc = 0.21765    test_acc = 0.56150
2  epoch train_acc = 0.99608    test_acc = 0.79000
3  epoch train_acc = 0.99804    test_acc = 0.88500
4  epoch train_acc = 1.00000    test_acc = 0.81750
5  epoch train_acc = 0.99608    test_acc = 0.89800
6  epoch train_acc = 1.00000    test_acc = 0.87300
7  epoch train_acc = 1.00000    test_acc = 0.87100
8  epoch train_acc = 1.00000    test_acc = 0.87100
9  epoch train_acc = 1.00000    test_acc = 0.76400
10 epoch train_acc = 1.00000    test_acc = 0.78600
  • 結果を図示するコード
import numpy as np 
import mxnet as mx
from mxnet.gluon import nn, data
from mxnet.gluon.data.vision import transforms
from gluoncv.model_zoo import get_model

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_data = mx.nd.load('Sneaker') + mx.nd.load('Boot')
train_label = [0]*500 + [1]*10

train_dataset = data.dataset.ArrayDataset(train_data, train_label)
train_data = data.DataLoader(
    train_dataset.transform_first(transform_test), 
    batch_size=32,
    shuffle=False)

test_data = mx.nd.load('test_Sneaker') + mx.nd.load('test_Boot')
test_label = [0]*1000 + [1]*1000

test_dataset = data.dataset.ArrayDataset(test_data, test_label)
test_data = data.DataLoader(
    test_dataset.transform_first(transform_test), 
    batch_size=32,
    shuffle=False)

finetune_net = get_model('mobilenet1.0', pretrained=False, root='MobileNet')

with finetune_net.name_scope():
    finetune_net.output = nn.Dense(2)
finetune_net.load_parameters('finetune_5.params')
finetune_net.hybridize()

train_result = []
for batch in train_data:
    output = finetune_net.features(batch[0])
    train_result.append(output)

train_x = mx.nd.concat(*train_result, dim=0)

test_result = []
for batch in test_data:
    output = finetune_net.features(batch[0])
    test_result.append(output)

test_x = mx.nd.concat(*test_result, dim=0)

from sklearn.manifold import TSNE
train_result = TSNE(n_components=2).fit_transform(train_x.asnumpy())
test_result = TSNE(n_components=2).fit_transform(test_x.asnumpy())

#結果を散布図にして表示
import matplotlib.pyplot as plt

plt.scatter(train_result[:,0], train_result[:,1], c = train_label, cmap='bwr')
plt.show()

plt.scatter(test_result[:,0], test_result[:,1], c = test_label, cmap='bwr')
plt.show()
  • 結果の図示

f:id:touch-sp:20190903172409p:plain
f:id:touch-sp:20190903172425p:plain
上が訓練データ、下がテストデータ
訓練データでは異常画像が十分分類されており、これ以上の改善は望めないことがわかる。

ここからが本題

(書きかけ)