张一极
公众号:视觉迷航
本文共1516个字,涉及数学内容只有大一水平,可以放心食用:
请熟悉前向传播规则 & 线性代数矩阵运算相关基础。
目的是求出最后一层的损失的梯度,这里的梯度,拆分成单个元素来看,就是:
代表的是最后一层的损失函数针对第j个神经元的偏导数,通过链式法则,
其中 代表的是最后一层的输出(未经历激活函数),代表的是,最后一层激活后的函数,所以:
那么对于最后一层的某一个神经元来说,梯度应该是这样的:
扩展到整个层的梯度,就是:
(代表的是两个矩阵每一个位置的元素对应相乘)
x
1Matrix end_layer_backward(Matrix label,Matrix acti_val,Node loss_fun(Node,Node),Node act_fun(Node))2 {3 Matrix loss_act = CreateMatrix(acti_val.row,acti_val.col);4 Matrix act_output = CreateMatrix(acti_val.row,acti_val.col);5 int index_x,index_y;6 for(index_x=0;index_x<loss_act.row;index_x++)7 {8 Node t1 = label.matrix[index_x][0],z31 =acti_val.matrix[index_x][0];9 Node a13 = 1/(1+(1/exp(z31)));10 Node loss = loss_fun(t1,a13);11 Node act = act_fun(z31); 12 act_output.matrix[index_x][0] = act.gradient(z31);13 loss_act.matrix[index_x][0] = loss.gradient(a13);14 }15 Matrix mid_grad_end = mul_simple(loss_act,act_output);16 cout_mat(mid_grad_end);17 return mid_grad_end;18 }参数:
xxxxxxxxxx41Matrix label:最后计算损失的label2Matrix acti_val:上一层的输出值3Node loss_fun(Node,Node):损失函数4Node act_fun(Node):激活函数
目的:计算前面每一层的梯度。
我们依旧先从单个元素入手,设z为未激活的输出,也就是那一层的W*input的结果,那么代表的是,某一层()对于损失的梯度,C代表的是cost,C对的偏导数,代表了l层第j个神经元的梯度:
这里有几个需要注意的地方,第一个就是关于这个 ,它代表的是,层的所有神经元的梯度,因为l层的第j个神经元连接着下一层的所有神经元, ,这个部分代表的是层的输出,对上一层的输出的一个梯度,可以进一步简化:
结果就是层的权重,最后一部分的,这三部分结合起来,就是公式2的结果:
归纳到整个层的结果:就是
Code:
x
1 Matrix backward(Matrix grad_next, Matrix output_before,Matrix weights,Node p_(Node))2 {3 for(int index = 0;index<output_before.row;index++)4 {5 Node z = output_before.matrix[index][0];6 Node anyone = p_(z);7 change_va(output_before,index,0,anyone.gradient(z));8 }9 return mul_simple(mul(weights,grad_next),output_before);10 }11参数:12 Matrix grad_next:下一层的梯度13 Matrix output_before:上一层的输出14 Matrix weights:权重矩阵15 Node p_(Node):激活函数16 exp:17 Matrix output_end = sequaltial.end_layer_backward(label,output2_without_act,*loss,*act);18 Matrix backward3 = sequaltial.backward(output_end,output1_without_act,weight2,*act);//BP219 Matrix weight_1_grad = mul(backward3,get_T(input))20 update:21 weight1 = subtract(weight1,times_mat(0.001,weight_1_grad));解释:
单个神经元而言,对于第层的j和下一层的i相连的这个权重(w)而言,他的梯度就是上一层的输出(或者说这一层的输入)乘以下一层的j神经元的梯度。
x
1 Matrix weight_1_grad = mul(backward3,get_T(input)) //BP3bias的梯度,可直接使用当前层的梯度。
这是我自己实现的框架尝试实现的反向传播,可以帮助理解:
x
1//author :张一极2//github repo:https://github.com/AllenZYJ/Edge-Computing-Engine3 Matrix data_mine = CreateMatrix(2,1);4 Matrix label = CreateRandMat(2,1);5 Matrix weight1 = CreateRandMat(2,2);6 Matrix bais1 = ones(2,1);7 Matrix weight2 = CreateRandMat(2,2);8 Matrix bais2 = ones(2,1);9 for(int epoch = 0;epoch<1;epoch++)10 {11 cout<<"---------epoch: "<<epoch<<"------------"<<endl;12 cout_mat(weight1);13 int input_dim = 2;14 int output_dim = 2;15 edge_network sequaltial(input_dim,output_dim);16 Matrix output1 = sequaltial.forward(data_mine,weight1,bais1);17 Matrix output1_without_act = sequaltial.forward_without_act(data_mine,weight1,bais1);18 Matrix output2 = sequaltial.forward(output1,weight2,bais2);19 Matrix output2_without_act = sequaltial.forward_without_act(output1,weight2,bais2); 20 Matrix output_end = sequaltial.end_layer_backward(label,output2_without_act,*loss,*act);21 Matrix backward3 = sequaltial.backward(output_end,output1_without_act,weight2,*act);22 Matrix weight_2_grad = mul(output_end,get_T(output1));23 Matrix weight_1_grad = mul(backward3,get_T(data_mine));24 weight1 = subtract(weight1,times_mat(0.001,weight_1_grad));25 bais1 = subtract(bais1,times_mat(0.001,backward3));26 weight2 = subtract(weight2,times_mat(0.001,weight_2_grad));27 bais2 = subtract(bais2,times_mat(0.001,output_end));28 cout<<"neraul end;"<<endl;29 return 0;30 }x1result:2---------epoch: 0------------30.0073,0.3658,41.893,1.1272,50.0056014,6-0.0142086,7neraul end;8---------epoch: 1------------90.0073,0.3658,101.893,1.1272,110.00560088,12-0.0142082,13neraul end;14---------epoch: 2------------150.0073,0.3658,161.893,1.1272,170.00560037,18-0.0142077,19neraul end;20---------epoch: 3------------210.0073,0.3658,221.893,1.1272,230.00559986,24-0.0142073,25neraul end;26---------epoch: 4------------270.0073,0.3658,281.893,1.1272,290.00559935,30-0.0142069,31neraul end;