张一极
公众号:视觉迷航
本文共1516个字,涉及数学内容只有大一水平,可以放心食用:
请熟悉前向传播规则 & 线性代数矩阵运算相关基础。
目的是求出最后一层的损失的梯度,这里的梯度,拆分成单个元素来看,就是:
代表的是最后一层的损失函数针对第j个神经元的偏导数,通过链式法则,
其中 代表的是最后一层的输出(未经历激活函数),代表的是,最后一层激活后的函数,所以:
那么对于最后一层的某一个神经元来说,梯度应该是这样的:
扩展到整个层的梯度,就是:
(代表的是两个矩阵每一个位置的元素对应相乘)
x
1Matrix end_layer_backward(Matrix label,Matrix acti_val,Node loss_fun(Node,Node),Node act_fun(Node))
2 {
3 Matrix loss_act = CreateMatrix(acti_val.row,acti_val.col);
4 Matrix act_output = CreateMatrix(acti_val.row,acti_val.col);
5 int index_x,index_y;
6 for(index_x=0;index_x<loss_act.row;index_x++)
7 {
8 Node t1 = label.matrix[index_x][0],z31 =acti_val.matrix[index_x][0];
9 Node a13 = 1/(1+(1/exp(z31)));
10 Node loss = loss_fun(t1,a13);
11 Node act = act_fun(z31);
12 act_output.matrix[index_x][0] = act.gradient(z31);
13 loss_act.matrix[index_x][0] = loss.gradient(a13);
14 }
15 Matrix mid_grad_end = mul_simple(loss_act,act_output);
16 cout_mat(mid_grad_end);
17 return mid_grad_end;
18 }
参数:
xxxxxxxxxx
41Matrix label:最后计算损失的label
2Matrix acti_val:上一层的输出值
3Node loss_fun(Node,Node):损失函数
4Node act_fun(Node):激活函数
目的:计算前面每一层的梯度。
我们依旧先从单个元素入手,设z为未激活的输出,也就是那一层的W*input的结果,那么代表的是,某一层()对于损失的梯度,C代表的是cost,C对的偏导数,代表了l层第j个神经元的梯度:
这里有几个需要注意的地方,第一个就是关于这个 ,它代表的是,层的所有神经元的梯度,因为l层的第j个神经元连接着下一层的所有神经元, ,这个部分代表的是层的输出,对上一层的输出的一个梯度,可以进一步简化:
结果就是层的权重,最后一部分的,这三部分结合起来,就是公式2的结果:
归纳到整个层的结果:就是
Code:
x
1 Matrix backward(Matrix grad_next, Matrix output_before,Matrix weights,Node p_(Node))
2 {
3 for(int index = 0;index<output_before.row;index++)
4 {
5 Node z = output_before.matrix[index][0];
6 Node anyone = p_(z);
7 change_va(output_before,index,0,anyone.gradient(z));
8 }
9 return mul_simple(mul(weights,grad_next),output_before);
10 }
11参数:
12 Matrix grad_next:下一层的梯度
13 Matrix output_before:上一层的输出
14 Matrix weights:权重矩阵
15 Node p_(Node):激活函数
16 exp:
17 Matrix output_end = sequaltial.end_layer_backward(label,output2_without_act,*loss,*act);
18 Matrix backward3 = sequaltial.backward(output_end,output1_without_act,weight2,*act);//BP2
19 Matrix weight_1_grad = mul(backward3,get_T(input))
20 update:
21 weight1 = subtract(weight1,times_mat(0.001,weight_1_grad));
解释:
单个神经元而言,对于第层的j和下一层的i相连的这个权重(w)而言,他的梯度就是上一层的输出(或者说这一层的输入)乘以下一层的j神经元的梯度。
x
1 Matrix weight_1_grad = mul(backward3,get_T(input)) //BP3
bias的梯度,可直接使用当前层的梯度。
这是我自己实现的框架尝试实现的反向传播,可以帮助理解:
x
1//author :张一极
2//github repo:https://github.com/AllenZYJ/Edge-Computing-Engine
3 Matrix data_mine = CreateMatrix(2,1);
4 Matrix label = CreateRandMat(2,1);
5 Matrix weight1 = CreateRandMat(2,2);
6 Matrix bais1 = ones(2,1);
7 Matrix weight2 = CreateRandMat(2,2);
8 Matrix bais2 = ones(2,1);
9 for(int epoch = 0;epoch<1;epoch++)
10 {
11 cout<<"---------epoch: "<<epoch<<"------------"<<endl;
12 cout_mat(weight1);
13 int input_dim = 2;
14 int output_dim = 2;
15 edge_network sequaltial(input_dim,output_dim);
16 Matrix output1 = sequaltial.forward(data_mine,weight1,bais1);
17 Matrix output1_without_act = sequaltial.forward_without_act(data_mine,weight1,bais1);
18 Matrix output2 = sequaltial.forward(output1,weight2,bais2);
19 Matrix output2_without_act = sequaltial.forward_without_act(output1,weight2,bais2);
20 Matrix output_end = sequaltial.end_layer_backward(label,output2_without_act,*loss,*act);
21 Matrix backward3 = sequaltial.backward(output_end,output1_without_act,weight2,*act);
22 Matrix weight_2_grad = mul(output_end,get_T(output1));
23 Matrix weight_1_grad = mul(backward3,get_T(data_mine));
24 weight1 = subtract(weight1,times_mat(0.001,weight_1_grad));
25 bais1 = subtract(bais1,times_mat(0.001,backward3));
26 weight2 = subtract(weight2,times_mat(0.001,weight_2_grad));
27 bais2 = subtract(bais2,times_mat(0.001,output_end));
28 cout<<"neraul end;"<<endl;
29 return 0;
30 }
x1result:
2---------epoch: 0------------
30.0073,0.3658,
41.893,1.1272,
50.0056014,
6-0.0142086,
7neraul end;
8---------epoch: 1------------
90.0073,0.3658,
101.893,1.1272,
110.00560088,
12-0.0142082,
13neraul end;
14---------epoch: 2------------
150.0073,0.3658,
161.893,1.1272,
170.00560037,
18-0.0142077,
19neraul end;
20---------epoch: 3------------
210.0073,0.3658,
221.893,1.1272,
230.00559986,
24-0.0142073,
25neraul end;
26---------epoch: 4------------
270.0073,0.3658,
281.893,1.1272,
290.00559935,
30-0.0142069,
31neraul end;