GluonTSを使ってみる (1)

やったこと

  1. 50個のシリーズを作成した
  2. そのうちの48個を訓練データ、残りの2個をテストデータとした
  3. 訓練後にテストデータで予測をし、実際とくらべてみた。

コード

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from gluonts.dataset.common import ListDataset
from gluonts.transform import FieldName
from gluonts.dataset.util import to_pandas

def create_dataset(num_series, num_steps, period=24, mu=1, sigma=0.3):
    # create target: noise + pattern
    # noise
    noise = np.random.normal(mu, sigma, size=(num_series, num_steps))
    # pattern - sinusoid with different phase
    sin_minumPi_Pi = np.sin(np.tile(np.linspace(-np.pi, np.pi, period), int(num_steps / period)))
    sin_Zero_2Pi = np.sin(np.tile(np.linspace(0, 2 * np.pi, 24), int(num_steps / period)))
    pattern = np.concatenate((np.tile(sin_minumPi_Pi.reshape(1, -1),
                                      (int(np.ceil(num_series / 2)),1)),
                              np.tile(sin_Zero_2Pi.reshape(1, -1),
                                      (int(np.floor(num_series / 2)), 1))
                             ),
                             axis=0
                            )
    target = noise + pattern
    # create time features: use target one period earlier, append with zeros
    feat_dynamic_real = np.concatenate((np.zeros((num_series, period)),
                                        target[:, :-period]
                                       ),
                                       axis=1
                                      )
    # create categorical static feats: use the sinusoid type as a categorical feature
    feat_static_cat = np.concatenate((np.zeros(int(np.ceil(num_series / 2))),
                                      np.ones(int(np.floor(num_series / 2)))
                                     ),
                                     axis=0
                                    )
    return target, feat_dynamic_real, feat_static_cat

custom_ds_metadata = {'num_series': 50,
                      'num_steps': 24 * 7,
                      'prediction_length': 24,
                      'freq': '1H',
                      'start': [pd.Timestamp("01-01-2019", freq='1H')
                                for _ in range(100)]
                     }

data_out = create_dataset(custom_ds_metadata['num_series'],
                          custom_ds_metadata['num_steps'],
                          custom_ds_metadata['prediction_length']
                         )
#1~48を訓練データとする(feat_static_catは使用しない)
target = data_out[0][1:49]
feat_dynamic_real = data_out[1][1:49]

train_ds = ListDataset([{FieldName.TARGET: target,
                         FieldName.START: start,
                         FieldName.FEAT_DYNAMIC_REAL: fdr}
                        for (target, start, fdr) in zip(target,
                                                        custom_ds_metadata['start'],
                                                        feat_dynamic_real)],
                      freq=custom_ds_metadata['freq'])

#最初と最後のデータをテストデータとする(feat_static_catは使用しない)
target = np.concatenate(
        [data_out[0][0].reshape((1,-1)), 
        data_out[0][49].reshape((1,-1))],
        axis=0)

feat_dynamic_real = np.concatenate(
        [data_out[1][0].reshape((1,-1)), 
        data_out[1][49].reshape((1,-1))],
        axis=0)

test_ds_all = ListDataset([{FieldName.TARGET: target,
                        FieldName.START: start,
                        FieldName.FEAT_DYNAMIC_REAL: fdr}
                       for (target, start, fdr) in zip(target,
                                                        custom_ds_metadata['start'],
                                                        feat_dynamic_real)],
                     freq=custom_ds_metadata['freq'])

from gluonts.model.deepar import DeepAREstimator
from gluonts.trainer import Trainer
#次の prediction_length 値を、先行して与えられた context_length 値から予測
estimator = DeepAREstimator(freq=custom_ds_metadata['freq'], 
                            prediction_length=custom_ds_metadata['prediction_length'], 
                            context_length=custom_ds_metadata['prediction_length']*2, 
                            use_feat_dynamic_real = True,
                            trainer=Trainer(epochs=10))
predictor = estimator.train(training_data=train_ds)

# save the trained model in model/
from pathlib import Path
predictor.serialize(Path("tmp"))

#load
#from gluonts.model.predictor import Predictor
#predictor_deserialized = Predictor.deserialize(Path("tmp"))

from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds_all,  # test dataset
    predictor=predictor,  # predictor
    num_eval_samples=100,  # number of sample paths we want for evaluation
)

plot_length = 150
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

for i, (x, y) in enumerate(zip(test_ds_all, forecast_it)):
    plt.subplot(2,1,i+1)
    to_pandas(x)[-plot_length:].plot()
    y.plot(color='g', prediction_intervals=prediction_intervals)
    plt.grid(which='both')
    plt.legend(legend, loc='upper left')

plt.show()

結果

f:id:touch-sp:20190913010912p:plain