Gluonでの移転学習の時にパラメータを固定する方法を調べてみた

GluonCVのpretrained modelは「features」部分と「output」部分からできている。
「features」部分のパラメータを固定する方法についていろいろ調べてみた。

①「setattr('grad_req', 'null')」を使用する(おそらく推奨される方法)

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.features.collect_params().setattr('grad_req', 'null')
my_net.hybridize()

my_net_features = my_net.features
my_net_features.hybridize()

trainer = gluon.Trainer(my_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output = my_net(data)
    loss = loss_func(output, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
True

②「Trainer」部分のパラメータ指定を限定する→失敗

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.output.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output = my_net(data)
    loss = loss_func(output, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
False

③ 単純に「detach()」を使う→条件付きで成功

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output0 = my_net.features(data)
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))

以下の様なエラーを吐き出す。

Traceback (most recent call last):
  File "pattern4.py", line 24, in <module>
    trainer.step(data.shape[0])
  File "E:\python\mxnet0808\lib\site-packages\mxnet\gluon\trainer.py", line 332, in step
    self._update(ignore_stale_grad)
  File "E:\python\mxnet0808\lib\site-packages\mxnet\gluon\trainer.py", line 416, in _update
    %(param.name, str(data.context)))
UserWarning: Gradient of Parameter `resnetv21_conv0_weight` on context cpu(0) has not
been updated by backward since last `step`. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient

「trainer.step」内に「ignore_stale_grad=True」を記述するとうまくいく。

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output0 = my_net.features(data)
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0], ignore_stale_grad=True)

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
True

④ ②と③を組み合わせる→成功

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.output.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output0 = my_net.features(data)
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
True

⑤「features」部分の計算を「autograd.record()」外に記述する→条件付きで成功

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
output0 = my_net.features(data)
with autograd.record():
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))

以下の様なエラーを吐き出す。

Traceback (most recent call last):
  File "pattern4.py", line 24, in <module>
    trainer.step(data.shape[0])
  File "E:\python\mxnet0808\lib\site-packages\mxnet\gluon\trainer.py", line 332, in step
    self._update(ignore_stale_grad)
  File "E:\python\mxnet0808\lib\site-packages\mxnet\gluon\trainer.py", line 416, in _update
    %(param.name, str(data.context)))
UserWarning: Gradient of Parameter `resnetv21_conv0_weight` on context cpu(0) has not
been updated by backward since last `step`. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient

「trainer.step」内に「ignore_stale_grad=True」を記述するとうまくいく。

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
output0 = my_net.features(data)
with autograd.record():
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0], ignore_stale_grad=True)

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
True

⑥ ②と⑤を組み合わせる→成功

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

my_net = get_model('ResNet50_v2', pretrained=True)
with my_net.name_scope():
    my_net.output = gluon.nn.Dense(2)
my_net.output.initialize(mx.init.Xavier())
my_net.hybridize()

trainer = gluon.Trainer(my_net.output.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
output0 = my_net.features(data)
with autograd.record():
    output1 = my_net.output(output0.detach())
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = my_net.features(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))

⑦ 全く別のネットワークとして定義しなおす(最もわかりやすい)

import mxnet as mx
from mxnet import gluon, autograd
from gluoncv.model_zoo import get_model

original_net = get_model('ResNet50_v2', pretrained=True)
original_net.hybridize()

features_net = original_net.features
features_net.hybridize()

output_net = gluon.nn.Dense(2)
output_net.initialize(mx.init.Xavier())
output_net.hybridize()

trainer = gluon.Trainer(output_net.collect_params(), 'adam')
loss_func = gluon.loss.SoftmaxCrossEntropyLoss()

data = mx.random.uniform(-1,1,shape=(4,3,244,244))
label = mx.nd.array((0,1,0,1))
with autograd.record():
    output0 = features_net(data)
    output1 = output_net(output0)
    loss = loss_func(output1, label)
    loss.backward()
trainer.step(data.shape[0])

input = mx.random.uniform(-1,1,shape=(1,3,244,244))

out1 = original_net.features(input)
out2 = features_net(input)

import numpy as np
print(np.array_equal(out1.asnumpy(), out2.asnumpy()))
  • 結果
True