RでDeep Learning(6)

環境はWindows10 & R-3.3.3

誤差逆伝播法を用いた2層ニューラルネットワーク

library(dplyr)
library(R6)

#パラメータの初期化
W1 <- matrix(rnorm(784*50),nrow=784)*0.01
W2 <- matrix(rnorm(50*10),nrow=50)*0.01
b1 <- matrix(numeric(50),nrow=1)
b2 <- matrix(numeric(10),nrow=1)

#データの読み込み
x_train <- readRDS("x_train")
x_train <- x_train/255
t_train <- readRDS("t_train")

Reluレイヤ

Relu <- R6Class("Relu",

	public = list(
		mask = NULL,
		
		forward = function(x){
			out <- x
			self$mask <- x<=0
			out[self$mask] <-0
			return(out)
		},
		
		backward = function(dout){
			dout[self$mask] <- 0
			return(dout)
		}
	)
)

Affineレイヤ

Affine <- R6Class("Affine",

	public = list(
		W = NULL,
		b = NULL,
		x = NULL,
		dW = NULL,
		db = NULL,
		
		initialize = function(W,b){
			self$W <- W
			self$b <- b
		},

		forward = function(x){
			if(x %>% is.vector()){
				batch <- 1
			}else{
				batch <- nrow(x)
			}
			self$x <- x
			out <- (x %*% self$W)+matrix(rep(self$b,batch),nrow=batch,byrow=T)
			return(out) 
		},
		
		backward = function(dout){
			dx <- dout %*% (self$W %>% t())
			self$dW <- (self$x %>% t()) %*% dout
			self$db <- apply(dout,2,sum)
			return(dx)
		}
	)
)

SoftmaxWithLossレイヤ

#softmax関数
softmax <- function(x){
	sm_in <- function(a){
		m <- max(a)
		exp_x <- exp(a-m)
		sum_exp_x <- sum(exp_x)
		b <- exp_x/sum_exp_x
		return(b)
	}
	y <- apply(x,1,sm_in) %>% t()
	return(y)
}

#交差エントロピー誤差
cross_entropy_error <- function(y,t){

	batch_size <- nrow(y)

	temp <- (-1)*sum(t*log(y))/batch_size
	return(temp)
}

SoftmaxWithLoss <- R6Class("SoftmaxWithLoss",

	public = list(
		loss = NULL,
		y = NULL,
		t = NULL,

		forward = function(x,t){
			self$t <- t
			self$y <- softmax(x)
			self$loss <- cross_entropy_error(self$y,self$t)
			return(self$loss) 
		},
		
		backward = function(dout){
			batch_size <- self$t %>% nrow()
			dx <- (self$y - self$t)/batch_size
			return(dx)
		}
	)
)
layers <- as.list(NULL)
layers[[1]] <- Affine$new(W1,b1)
layers[[2]] <- Relu$new()
layers[[3]] <- Affine$new(W2,b2)
lastlayer <- SoftmaxWithLoss$new()

predict <- function(x){
	for(i in 1:3){
		x <- layers[[i]]$forward(x)
	}
	return(x)
}

loss <- function(x,t){
	y <- predict(x)
	z <- lastlayer$forward(y,t)
	return(z)
}

gradient <- function(x,t){

	#forward
	loss_vector <<- c(loss_vector,loss(x,t))

	#backward
	dout <- lastlayer$backward(1)
	for(i in 3:1){
		dout <- layers[[i]]$backward(dout)
	}
	
	grads <- as.list(NULL)
	grads <- grads %>% c(list(W1=layers[[1]]$dW))
	grads <- grads %>% c(list(b1=layers[[1]]$db))
	grads <- grads %>% c(list(W2=layers[[3]]$dW))
	grads <- grads %>% c(list(b2=layers[[3]]$db))
	return(grads)
}
#ハイパーパラメータ
iters_num <- 10000
train_size <- 60000
batch_size <- 100
learning_rate <- 0.1

loss_vector <- NULL

for(i in 1:iters_num){

	#ミニバッチの取得
	batch_mask<-sample(train_size,batch_size)
	x_batch <- x_train[batch_mask,]
	t_batch <- t_train[batch_mask,]

	#勾配の計算
	grads <- gradient(x_batch,t_batch)

	#パラメータの更新
	layers[[1]]$W <- layers[[1]]$W - (grads$W1 * learning_rate)
	layers[[3]]$W <- layers[[3]]$W - (grads$W2 * learning_rate)
	layers[[1]]$b <- layers[[1]]$b - (grads$b1 * learning_rate)
	layers[[3]]$b <- layers[[3]]$b - (grads$b2 * learning_rate)
}

f:id:touch-sp:20170428023009p:plain:w350