Author: 张一极
date:20231021-16:45
很早就把卷积的单例C++做了实现,但是一直没来得及填这个坑,现在来回顾一下这个函数部分,顺便捋顺一下思路,方便后面进一步做更多算法的实现,完整的算法在最后面,仅供交流,请勿转载。
可以结合这几篇文章食用:
卷积前向的图解:http://likedge.top/test_summary/Edge/conv/conv_test_mode0.html
padding的C++实现:http://likedge.top/test_summary/C++/padding/padding.html
计算padding大小:
输入图片矩阵大小:(mid1.wid, mid1.high)
卷积核大小:kernel_size 步长:stride
计算水平和垂直方向上的padding大小公式:
意义就是计算 mid1的宽度减去第一步的kernel_size的大小后剩余值除掉stride步长的余量与stride的差值,即补充足够再完成最后一次窗口滑动的距离。
对每个RGB通道进行padding:
xfor rgb_idx in [0, 1, 2]:
mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx],
mid1.matrix3d[rgb_idx].row + padding_high,
mid1.matrix3d[rgb_idx].col + padding_wid)
创建滤波器kernels:对每个RGB通道和每个输出通道创建全1滤波器:
input_dim代表输入维度,output_channels代表输出维度,例如一个卷积层,输入维度3个通道,压缩为1个通道,就需要在外层维度为3,内层维度为1x3x3:
4D矩阵画不出来,所以只画了后三个维度,可以理解为多三个这样的3D矩阵。
实现如下:
xMatrix filters[input_dim][output_channels];
for (int channel_index = 0; channel_index < input_dim; channel_index++)
{
for (int filter_index = 0; filter_index < output_channels; filter_index++)
{
Matrix kernel = ones(kernel_size, kernel_size);
filters[channel_index][filter_index] = kernel;
}
}
单个元素的卷积如下,其实最核心操作就是conv_element,这里的crop_pic(卷积核窗口切下来的区域)与kernel(卷积核)做mul_simple点乘:
mul_simple的实现如下:
Matrix mul_simple(Matrix mid1, Matrix mid2)
{
if (mid1.row != mid2.row || mid1.col != mid2.col)
{
cout << "Error: shape A&B" << endl;
return mid1;
}
Matrix result = CreateMatrix(mid1.row, mid1.col);
for (int index_x = 0; index_x < mid1.row; index_x++)
{
for (int index_y = 0; index_y < mid1.col; index_y++)
{
result.matrix[index_x][index_y] = mid1.matrix[index_x][index_y] * mid2.matrix[index_x][index_y];
}
}
return result;
}
conv_element实现如下
xxxxxxxxxx
Matrix conv_element(Matrix mid1, Matrix kernel, int kernel_size = 2, int stride = 1)
{
Matrix conv_result = CreateMatrix(((mid1.row - kernel_size) / stride) + 1, ((mid1.col - kernel_size) / stride) + 1);
for (int x_ = 0; x_ <= (mid1.row - kernel_size) / stride; x_ += stride)
{
for (int y_ = 0; y_ <= (mid1.col - kernel_size) / stride; y_ += stride)
{
Matrix crop_pic = iloc(mid1, x_, x_ + kernel.col, y_, y_ + kernel.row);
conv_result[x][y] = matrix_sum(mul_simple(crop_pic, kernel))
}
}
// cout<<"row: "<<conv_result.row<<" , "<<"col: "<<conv_result.col<<endl;
// cout_mat(conv_result);
return conv_result;
}
再用这个函数,实现对所有通道进行卷积:
x
for filter_idx in [0, 1, ..., output_channels-1]:
sum_rgb = 0
for channel_idx in [0, 1, 2]:
element = conv_element(mid_rgb[channel_idx],
filters[channel_idx][filter_idx],
kernel_size, stride)
sum_rgb += element
feature_maps[filter_idx] = sum_rgb
上面的遍历是简化版,只保留了具体逻辑,实际上的实现如下,可以对照着看:
// Compute convolution results for each filter
Matrix kernel = ones(kernel_size, kernel_size);
Matrix feature_maps[output_channels];
for (int filter_idx = 0; filter_idx < output_channels; filter_idx++)
{
Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1,
((mid1.high - kernel_size + 2*padding_high) / stride) + 1);
for (int channel_idx = 0; channel_idx < input_dim; channel_idx++)
{
// Compute convolution result for a single RGB channel and a single filter
Matrix element = conv_element(mid_rgb[channel_idx],
filters[channel_idx][filter_idx],
kernel_size, stride);
if (verbose) {
cout << "Convolution of RGB[" << channel_idx << "] channel with Filter["
<< filter_idx << "] : " << endl;
cout_mat(mid_rgb[channel_idx]);
cout << " * " << endl;
cout_mat(filters[channel_idx][filter_idx]);
cout << " = " << endl;
cout_mat(element);
cout << endl;
}
// Sum convolution results for each RGB channel
sum_rgb = add(sum_rgb, element, 0);
}
feature_maps[filter_idx] = sum_rgb;
if (verbose) {
cout << "Feature map [" << filter_idx << "] : " << endl;
cout_mat(feature_maps[filter_idx]);
}
计算每个位置的卷积结果。
构造输出3D矩阵,每个深度通道存储一个特征
这个就不用多说了,就是构造一个3D矩阵存储这些特征值:
xxxxxxxxxx
Matrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col);
for (int i = 0; i < output_channels; i++)
{
output3d.matrix3d[i] = feature_maps[i];
}
if (verbose) {
cout << "Output Matrix3d: " << endl;
cout_mat3d(output3d);
}
return output3d;
完整的代码
代码实现了一个输入3d矩阵,对这个3d矩阵进行卷积操作,并返回一个新的3d矩阵
代码如下:
x
Matrix3d conv_test_with_output(Matrix3d mid1,
int input_dim = 3,
int output_channels = 3,
int stride = 1,
int kernel_size = 2,
int mode = 0,
bool verbose = false)
// padding 暂未实现
{
if (verbose) {
cout << "Input Matrix3d: " << endl;
cout_mat3d(mid1);
cout << "Parameters: input_dim = " << input_dim
<< ", output_channels = " << output_channels
<< ", stride = " << stride
<< ", kernel_size = " << kernel_size
<< ", mode = " << mode;
}
// Compute padding widths and heights
int padding_wid = stride - (mid1.wid - kernel_size) % stride;
if (padding_wid == stride) {
padding_wid = 0;
}
int padding_high = stride - (mid1.high - kernel_size) % stride;
if (padding_high == stride) {
padding_high = 0;
}
if (verbose) {
cout << "Padding widths: " << padding_wid << ", padding heights: " << padding_high << endl;
}
// Pad each RGB channel in the 3D matrix
Matrix mid_rgb[input_dim];
for (int rgb_idx = 0; rgb_idx < input_dim; rgb_idx++)
{
mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx],
mid1.matrix3d[rgb_idx].row + padding_high,
mid1.matrix3d[rgb_idx].col + padding_wid);
if (verbose) {
cout << "RGB[" << rgb_idx << "] channel after padding: " << endl;
cout_mat(mid_rgb[rgb_idx]);
}
}
// Construct filters
Matrix filters[input_dim][output_channels];
for (int channel_index = 0; channel_index < input_dim; channel_index++)
{
for (int filter_index = 0; filter_index < output_channels; filter_index++)
{
Matrix kernel = ones(kernel_size, kernel_size);
filters[channel_index][filter_index] = kernel;
}
}
// Compute convolution results for each filter
Matrix kernel = ones(kernel_size, kernel_size);
Matrix feature_maps[output_channels];
for (int filter_idx = 0; filter_idx < output_channels; filter_idx++)
{
Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1,
((mid1.high - kernel_size + 2*padding_high) / stride) + 1);
for (int channel_idx = 0; channel_idx < input_dim; channel_idx++)
{
// Compute convolution result for a single RGB channel and a single filter
Matrix element = conv_element(mid_rgb[channel_idx],
filters[channel_idx][filter_idx],
kernel_size, stride);
if (verbose) {
cout << "Convolution of RGB[" << channel_idx << "] channel with Filter["
<< filter_idx << "] : " << endl;
cout_mat(mid_rgb[channel_idx]);
cout << " * " << endl;
cout_mat(filters[channel_idx][filter_idx]);
cout << " = " << endl;
cout_mat(element);
cout << endl;
}
// Sum convolution results for each RGB channel
sum_rgb = add(sum_rgb, element, 0);
}
feature_maps[filter_idx] = sum_rgb;
if (verbose) {
cout << "Feature map [" << filter_idx << "] : " << endl;
cout_mat(feature_maps[filter_idx]);
}
}
// Construct 3D matrix to store different feature maps at different depths
Matrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col);
for (int i = 0; i < output_channels; i++)
{
output3d.matrix3d[i] = feature_maps[i];
}
if (verbose) {
cout << "Output Matrix3d: " << endl;
cout_mat3d(output3d);
}
return output3d;
}