Author: 张一极
date:20231021-16:45
很早就把卷积的单例C++做了实现,但是一直没来得及填这个坑,现在来回顾一下这个函数部分,顺便捋顺一下思路,方便后面进一步做更多算法的实现,完整的算法在最后面,仅供交流,请勿转载。
可以结合这几篇文章食用:
卷积前向的图解:http://likedge.top/test_summary/Edge/conv/conv_test_mode0.html
padding的C++实现:http://likedge.top/test_summary/C++/padding/padding.html
计算padding大小:
输入图片矩阵大小:(mid1.wid, mid1.high)
卷积核大小:kernel_size 步长:stride
计算水平和垂直方向上的padding大小公式:
意义就是计算 mid1的宽度减去第一步的kernel_size的大小后剩余值除掉stride步长的余量与stride的差值,即补充足够再完成最后一次窗口滑动的距离。
对每个RGB通道进行padding:
xfor rgb_idx in [0, 1, 2]:mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx],mid1.matrix3d[rgb_idx].row + padding_high,mid1.matrix3d[rgb_idx].col + padding_wid)
创建滤波器kernels:对每个RGB通道和每个输出通道创建全1滤波器:
input_dim代表输入维度,output_channels代表输出维度,例如一个卷积层,输入维度3个通道,压缩为1个通道,就需要在外层维度为3,内层维度为1x3x3:

4D矩阵画不出来,所以只画了后三个维度,可以理解为多三个这样的3D矩阵。
实现如下:
xMatrix filters[input_dim][output_channels];for (int channel_index = 0; channel_index < input_dim; channel_index++){for (int filter_index = 0; filter_index < output_channels; filter_index++){Matrix kernel = ones(kernel_size, kernel_size);filters[channel_index][filter_index] = kernel;}}
单个元素的卷积如下,其实最核心操作就是conv_element,这里的crop_pic(卷积核窗口切下来的区域)与kernel(卷积核)做mul_simple点乘:
mul_simple的实现如下:
Matrix mul_simple(Matrix mid1, Matrix mid2){ if (mid1.row != mid2.row || mid1.col != mid2.col) { cout << "Error: shape A&B" << endl; return mid1; } Matrix result = CreateMatrix(mid1.row, mid1.col); for (int index_x = 0; index_x < mid1.row; index_x++) { for (int index_y = 0; index_y < mid1.col; index_y++) { result.matrix[index_x][index_y] = mid1.matrix[index_x][index_y] * mid2.matrix[index_x][index_y]; } } return result;}conv_element实现如下
xxxxxxxxxxMatrix conv_element(Matrix mid1, Matrix kernel, int kernel_size = 2, int stride = 1){ Matrix conv_result = CreateMatrix(((mid1.row - kernel_size) / stride) + 1, ((mid1.col - kernel_size) / stride) + 1); for (int x_ = 0; x_ <= (mid1.row - kernel_size) / stride; x_ += stride) { for (int y_ = 0; y_ <= (mid1.col - kernel_size) / stride; y_ += stride) { Matrix crop_pic = iloc(mid1, x_, x_ + kernel.col, y_, y_ + kernel.row); conv_result[x][y] = matrix_sum(mul_simple(crop_pic, kernel)) } } // cout<<"row: "<<conv_result.row<<" , "<<"col: "<<conv_result.col<<endl; // cout_mat(conv_result); return conv_result;}再用这个函数,实现对所有通道进行卷积:
x
for filter_idx in [0, 1, ..., output_channels-1]:
sum_rgb = 0 for channel_idx in [0, 1, 2]: element = conv_element(mid_rgb[channel_idx], filters[channel_idx][filter_idx], kernel_size, stride) sum_rgb += element feature_maps[filter_idx] = sum_rgb上面的遍历是简化版,只保留了具体逻辑,实际上的实现如下,可以对照着看:
// Compute convolution results for each filter Matrix kernel = ones(kernel_size, kernel_size); Matrix feature_maps[output_channels]; for (int filter_idx = 0; filter_idx < output_channels; filter_idx++) { Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1, ((mid1.high - kernel_size + 2*padding_high) / stride) + 1); for (int channel_idx = 0; channel_idx < input_dim; channel_idx++) { // Compute convolution result for a single RGB channel and a single filter Matrix element = conv_element(mid_rgb[channel_idx], filters[channel_idx][filter_idx], kernel_size, stride); if (verbose) { cout << "Convolution of RGB[" << channel_idx << "] channel with Filter[" << filter_idx << "] : " << endl; cout_mat(mid_rgb[channel_idx]); cout << " * " << endl; cout_mat(filters[channel_idx][filter_idx]); cout << " = " << endl; cout_mat(element); cout << endl; } // Sum convolution results for each RGB channel sum_rgb = add(sum_rgb, element, 0); } feature_maps[filter_idx] = sum_rgb; if (verbose) { cout << "Feature map [" << filter_idx << "] : " << endl; cout_mat(feature_maps[filter_idx]); }计算每个位置的卷积结果。
构造输出3D矩阵,每个深度通道存储一个特征
这个就不用多说了,就是构造一个3D矩阵存储这些特征值:
xxxxxxxxxxMatrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col); for (int i = 0; i < output_channels; i++) { output3d.matrix3d[i] = feature_maps[i]; } if (verbose) { cout << "Output Matrix3d: " << endl; cout_mat3d(output3d); } return output3d;
完整的代码
代码实现了一个输入3d矩阵,对这个3d矩阵进行卷积操作,并返回一个新的3d矩阵
代码如下:
x
Matrix3d conv_test_with_output(Matrix3d mid1, int input_dim = 3, int output_channels = 3, int stride = 1, int kernel_size = 2, int mode = 0, bool verbose = false) // padding 暂未实现 { if (verbose) { cout << "Input Matrix3d: " << endl; cout_mat3d(mid1); cout << "Parameters: input_dim = " << input_dim << ", output_channels = " << output_channels << ", stride = " << stride << ", kernel_size = " << kernel_size << ", mode = " << mode; }
// Compute padding widths and heights int padding_wid = stride - (mid1.wid - kernel_size) % stride; if (padding_wid == stride) { padding_wid = 0; } int padding_high = stride - (mid1.high - kernel_size) % stride; if (padding_high == stride) { padding_high = 0; } if (verbose) { cout << "Padding widths: " << padding_wid << ", padding heights: " << padding_high << endl; }
// Pad each RGB channel in the 3D matrix Matrix mid_rgb[input_dim]; for (int rgb_idx = 0; rgb_idx < input_dim; rgb_idx++) { mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx], mid1.matrix3d[rgb_idx].row + padding_high, mid1.matrix3d[rgb_idx].col + padding_wid); if (verbose) { cout << "RGB[" << rgb_idx << "] channel after padding: " << endl; cout_mat(mid_rgb[rgb_idx]); } }
// Construct filters Matrix filters[input_dim][output_channels]; for (int channel_index = 0; channel_index < input_dim; channel_index++) { for (int filter_index = 0; filter_index < output_channels; filter_index++) { Matrix kernel = ones(kernel_size, kernel_size); filters[channel_index][filter_index] = kernel; } }
// Compute convolution results for each filter Matrix kernel = ones(kernel_size, kernel_size); Matrix feature_maps[output_channels]; for (int filter_idx = 0; filter_idx < output_channels; filter_idx++) { Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1, ((mid1.high - kernel_size + 2*padding_high) / stride) + 1); for (int channel_idx = 0; channel_idx < input_dim; channel_idx++) { // Compute convolution result for a single RGB channel and a single filter Matrix element = conv_element(mid_rgb[channel_idx], filters[channel_idx][filter_idx], kernel_size, stride); if (verbose) { cout << "Convolution of RGB[" << channel_idx << "] channel with Filter[" << filter_idx << "] : " << endl; cout_mat(mid_rgb[channel_idx]); cout << " * " << endl; cout_mat(filters[channel_idx][filter_idx]); cout << " = " << endl; cout_mat(element); cout << endl; } // Sum convolution results for each RGB channel sum_rgb = add(sum_rgb, element, 0); } feature_maps[filter_idx] = sum_rgb; if (verbose) { cout << "Feature map [" << filter_idx << "] : " << endl; cout_mat(feature_maps[filter_idx]); } } // Construct 3D matrix to store different feature maps at different depths Matrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col); for (int i = 0; i < output_channels; i++) { output3d.matrix3d[i] = feature_maps[i]; } if (verbose) { cout << "Output Matrix3d: " << endl; cout_mat3d(output3d); } return output3d; }