NeuroWhAI의 잡블로그
[Rust] 계층(레이어), 오차역전파 - '밑바닥부터 시작하는 딥러닝' 5장 본문
use std::default::Default;
use rulinalg::matrix::{Matrix, BaseMatrix, BaseMatrixMut};
use rand;
use common::matrix_utils::PartialMatrix;
use common::utils;
use ch03::activation;
use ch04::loss;
pub trait Layer {
fn forward(&mut self, x: &Matrix<f32>) -> Matrix<f32>;
fn backward(&mut self, dout: &Matrix<f32>) -> Matrix<f32>;
}
pub struct Relu {
mask: PartialMatrix,
}
impl Relu {
pub fn new() -> Self {
Relu { mask: Default::default(), }
}
}
impl Layer for Relu {
fn forward(&mut self, x: &Matrix<f32>) -> Matrix<f32> {
self.mask = PartialMatrix::le(x, 0.0);
let mut out = utils::copy_matrix(x);
self.mask.set(&mut out, 0.0);
out
}
fn backward(&mut self, dout: &Matrix<f32>) -> Matrix<f32> {
let mut dx = utils::copy_matrix(dout);
self.mask.set(&mut dx, 0.0);
dx
}
}
pub struct Sigmoid {
out: Matrix<f32>,
}
impl Sigmoid {
pub fn new() -> Self {
Sigmoid { out: matrix![0.0], }
}
}
impl Layer for Sigmoid {
fn forward(&mut self, x: &Matrix<f32>) -> Matrix<f32> {
self.out = activation::sigmoid(utils::copy_matrix(x));
utils::copy_matrix(&self.out)
}
fn backward(&mut self, dout: &Matrix<f32>) -> Matrix<f32> {
let dx = dout.elemul(&(-&self.out + 1.0)).elemul(&self.out);
utils::copy_matrix(&dx)
}
}
pub struct Affine {
w: Matrix<f32>,
b: Matrix<f32>,
x: Matrix<f32>,
dw: Matrix<f32>,
db: Matrix<f32>,
}
impl Affine {
pub fn new(input_size: usize, output_size: usize, w_init_std: f32) -> Self {
Affine {
w: Matrix::from_fn(input_size, output_size, |_, _| rand::random::<f32>() * w_init_std),
b: Matrix::zeros(1, output_size),
x: matrix![0.0],
dw: matrix![0.0],
db: matrix![0.0],
}
}
pub fn learn(&mut self, lr: f32) {
self.w -= &self.dw * lr;
self.b -= &self.db * lr;
}
}
impl Layer for Affine {
fn forward(&mut self, x: &Matrix<f32>) -> Matrix<f32> {
self.x = utils::copy_matrix(x);
let mut out = x * &self.w;
for mut row in out.row_iter_mut() {
*row += &self.b;
}
out
}
fn backward(&mut self, dout: &Matrix<f32>) -> Matrix<f32> {
let dx = dout * self.w.transpose();
self.dw = self.x.transpose() * dout;
self.db = Matrix::new(1, dout.cols(), dout.sum_rows().into_iter().collect::<Vec<_>>());
dx
}
}
pub struct SoftmaxWithLoss {
y: Matrix<f32>,
t: Matrix<f32>,
}
impl SoftmaxWithLoss {
pub fn new() -> Self {
SoftmaxWithLoss { y: matrix![0.0], t: matrix![0.0], }
}
pub fn set_label(&mut self, t: &Matrix<f32>) {
self.t = utils::copy_matrix(t);
}
}
impl Layer for SoftmaxWithLoss {
fn forward(&mut self, x: &Matrix<f32>) -> Matrix<f32> {
self.y = activation::softmax(utils::copy_matrix(x));
let loss_val = loss::cross_entropy_error(&self.y, &self.t);
matrix![loss_val]
}
fn backward(&mut self, _: &Matrix<f32>) -> Matrix<f32> {
(&self.y - &self.t) / (self.t.rows() as f32)
}
}
use rulinalg::matrix::{Matrix, BaseMatrix};
use common::utils;
use super::layers::{self, Layer};
pub struct MultiLayerNet {
affine1: layers::Affine,
relu1: layers::Relu,
affine2: layers::Affine,
last: layers::SoftmaxWithLoss,
}
impl MultiLayerNet {
pub fn new(input_size: usize, hidden_size: usize, output_size: usize) -> Self {
MultiLayerNet {
affine1: layers::Affine::new(input_size, hidden_size, 0.01),
relu1: layers::Relu::new(),
affine2: layers::Affine::new(hidden_size, output_size, 0.01),
last: layers::SoftmaxWithLoss::new(),
}
}
pub fn predict(&mut self, x: &Matrix<f32>) -> Matrix<f32> {
let layers: Vec<&mut Layer> = vec![&mut self.affine1, &mut self.relu1, &mut self.affine2];
let mut prev_out = utils::copy_matrix(x);
for layer in layers {
prev_out = layer.forward(&prev_out);
}
prev_out
}
pub fn loss(&mut self, x: &Matrix<f32>, t: &Matrix<f32>) -> f32 {
let y = self.predict(x);
self.last.set_label(t);
for v in self.last.forward(&y).iter() {
return *v;
}
0.0
}
pub fn accuracy(&mut self, x: &Matrix<f32>, t: &Matrix<f32>) -> f32 {
let y = self.predict(x);
let y = utils::argmax(&y);
let t = utils::argmax(t);
let mut cnt = 0;
for (a, b) in y.iter().zip(t.iter()) {
if (a - b).abs() < 0.000001 {
cnt += 1;
}
}
cnt as f32 / t.rows() as f32
}
pub fn learn(&mut self, x: &Matrix<f32>, t: &Matrix<f32>, lr: f32) -> f32 {
// Forward
let loss_val = self.loss(x, t);
// Backward
{
let layers: Vec<&mut Layer> = vec![&mut self.affine1, &mut self.relu1, &mut self.affine2];
let mut dout = matrix![1.0];
dout = self.last.backward(&dout);
for layer in layers.into_iter().rev() {
dout = layer.backward(&dout);
}
}
// Learn
self.affine1.learn(lr);
self.affine2.learn(lr);
loss_val
}
}
pub fn test_layered_net() {
let mnist = Mnist::new();
let mut net = MultiLayerNet::new(784, 100, 10);
let iters_num = 20;
let train_size = mnist.train_x.rows();
let batch_size = 100;
let learning_rate = 0.1;
for _ in 0..iters_num {
let mut batch_offset = 0;
let mut loss = 0.0;
while batch_offset < train_size {
let (batch_x, batch_y) = mnist.get_train_batch(batch_offset, batch_size);
loss += net.learn(&batch_x, &batch_y, learning_rate);
batch_offset += batch_size;
}
let (train_x, train_y) = mnist.get_train_batch(0, 1000);
let (val_x, val_y) = mnist.get_validation_batch(0, 1000);
let acc_train = net.accuracy(&train_x, &train_y);
let acc_val = net.accuracy(&val_x, &val_y);
println!("Loss: {}, Acc: {}, Test Acc: {}", loss, acc_train, acc_val);
}
let (test_x, test_y) = mnist.get_test_batch(0, 1000);
let acc_test = net.accuracy(&test_x, &test_y);
println!("Final test acc: {}", acc_test);
}
Loss: 182.46555, Acc: 0.875, Test Acc: 0.858 Loss: 72.98125, Acc: 0.906, Test Acc: 0.891 Loss: 60.6465, Acc: 0.919, Test Acc: 0.911 Loss: 51.66449, Acc: 0.942, Test Acc: 0.927 Loss: 44.759678, Acc: 0.951, Test Acc: 0.929 Loss: 39.429985, Acc: 0.957, Test Acc: 0.933 Loss: 35.239746, Acc: 0.961, Test Acc: 0.941 Loss: 31.797348, Acc: 0.967, Test Acc: 0.947 Loss: 28.916616, Acc: 0.969, Test Acc: 0.953 Loss: 26.458937, Acc: 0.972, Test Acc: 0.954 Loss: 24.360697, Acc: 0.972, Test Acc: 0.956 Loss: 22.553885, Acc: 0.973, Test Acc: 0.96 Loss: 20.985374, Acc: 0.976, Test Acc: 0.965 Loss: 19.583195, Acc: 0.977, Test Acc: 0.965 Loss: 18.327232, Acc: 0.978, Test Acc: 0.968 Loss: 17.196884, Acc: 0.978, Test Acc: 0.968 Loss: 16.17377, Acc: 0.978, Test Acc: 0.971 Loss: 15.246964, Acc: 0.978, Test Acc: 0.973 Loss: 14.399693, Acc: 0.978, Test Acc: 0.975 Loss: 13.61666, Acc: 0.979, Test Acc: 0.975 Final test acc: 0.977
'개발 및 공부 > 알고리즘' 카테고리의 다른 글
[Rust] 드롭아웃(Dropout) - '밑바닥부터 시작하는 딥러닝' 6장 (0) | 2018.07.31 |
---|---|
[Rust] 매개변수 갱신법들 - '밑바닥부터 시작하는 딥러닝' 6장 (0) | 2018.07.27 |
[Rust] 계산 그래프 - '밑바닥부터 시작하는 딥러닝' 5장 (0) | 2018.07.16 |
[Rust] 2층 신경망으로 MNIST 학습 - '밑바닥부터 시작하는 딥러닝' 4장 (0) | 2018.07.15 |
[Rust] 경사 하강법 - '밑바닥부터 시작하는 딥러닝' 4장 (0) | 2018.07.15 |
Comments