单例说清楚卷积前向传播

Author: 张一极
date：20231021-16:45

很早就把卷积的单例C++做了实现，但是一直没来得及填这个坑，现在来回顾一下这个函数部分，顺便捋顺一下思路，方便后面进一步做更多算法的实现，完整的算法在最后面，仅供交流，请勿转载。

可以结合这几篇文章食用：

卷积前向的图解：http://likedge.top/test_summary/Edge/conv/conv_test_mode0.html
padding的C++实现：http://likedge.top/test_summary/C++/padding/padding.html

计算padding大小:
输入图片矩阵大小:(mid1.wid, mid1.high)
卷积核大小:kernel_size 步长:stride
计算水平和垂直方向上的padding大小公式:
意义就是计算 mid1的宽度减去第一步的kernel_size的大小后剩余值除掉stride步长的余量与stride的差值，即补充足够再完成最后一次窗口滑动的距离。
$p a d d i n g_w i d = s t r i d e - (m i d 1. w i d - k e r n e l_s i z e) / s t r i d e$ $p a d d i n g_h i g h = s t r i d e - (m i d 1. h i g h - k e r n e l_s i z e) % s t r i d e$
对每个RGB通道进行padding:


x
for rgb_idx in [0, 1, 2]:
    mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx],  
                                    mid1.matrix3d[rgb_idx].row + padding_high,
                                    mid1.matrix3d[rgb_idx].col + padding_wid)

创建滤波器kernels:对每个RGB通道和每个输出通道创建全1滤波器:
input_dim代表输入维度，output_channels代表输出维度，例如一个卷积层，输入维度3个通道，压缩为1个通道，就需要在外层维度为3，内层维度为1x3x3:
4D矩阵画不出来，所以只画了后三个维度，可以理解为多三个这样的3D矩阵。
实现如下：


x
  Matrix filters[input_dim][output_channels];
  for (int channel_index = 0; channel_index < input_dim; channel_index++)
  {
      
      for (int filter_index = 0; filter_index < output_channels; filter_index++)
      {
          Matrix kernel = ones(kernel_size, kernel_size);
          filters[channel_index][filter_index] = kernel;
      }
  }

进行卷积:对每个滤波器,在每个RGB通道上进行卷积,并求和得到特征图:

单个元素的卷积如下,其实最核心操作就是conv_element，这里的crop_pic（卷积核窗口切下来的区域）与kernel（卷积核）做mul_simple点乘：

mul_simple的实现如下：


Matrix mul_simple(Matrix mid1, Matrix mid2)
{
  if (mid1.row != mid2.row || mid1.col != mid2.col)
  {
    cout << "Error: shape A&B" << endl;
    return mid1;
  }
  Matrix result = CreateMatrix(mid1.row, mid1.col);
  for (int index_x = 0; index_x < mid1.row; index_x++)
  {
    for (int index_y = 0; index_y < mid1.col; index_y++)
    {
      result.matrix[index_x][index_y] = mid1.matrix[index_x][index_y] * mid2.matrix[index_x][index_y];
    }
  }
  return result;
}

conv_element实现如下


xxxxxxxxxx
Matrix conv_element(Matrix mid1, Matrix kernel, int kernel_size = 2, int stride = 1)
{
  Matrix conv_result = CreateMatrix(((mid1.row - kernel_size) / stride) + 1, ((mid1.col - kernel_size) / stride) + 1);
  for (int x_ = 0; x_ <= (mid1.row - kernel_size) / stride; x_ += stride)
  {
    for (int y_ = 0; y_ <= (mid1.col - kernel_size) / stride; y_ += stride)
    {
      Matrix crop_pic = iloc(mid1, x_, x_ + kernel.col, y_, y_ + kernel.row);
      conv_result[x][y] = matrix_sum(mul_simple(crop_pic, kernel))
      
    }
  }
  // cout<<"row: "<<conv_result.row<<" , "<<"col: "<<conv_result.col<<endl;
  // cout_mat(conv_result);
  return conv_result;
}

再用这个函数，实现对所有通道进行卷积：


x
for filter_idx in [0, 1, ..., output_channels-1]:

    sum_rgb = 0
    for channel_idx in [0, 1, 2]:
        
        element = conv_element(mid_rgb[channel_idx],
                               filters[channel_idx][filter_idx],
                               kernel_size, stride)
                               
        sum_rgb += element
        
    feature_maps[filter_idx] = sum_rgb

上面的遍历是简化版，只保留了具体逻辑，实际上的实现如下，可以对照着看：


// Compute convolution results for each filter
    Matrix kernel = ones(kernel_size, kernel_size);
    Matrix feature_maps[output_channels];
    for (int filter_idx = 0; filter_idx < output_channels; filter_idx++)
    {
        Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1, 
                                      ((mid1.high - kernel_size + 2*padding_high) / stride) + 1);
        for (int channel_idx = 0; channel_idx < input_dim; channel_idx++)
        {
            // Compute convolution result for a single RGB channel and a single filter
            Matrix element = conv_element(mid_rgb[channel_idx], 
                                          filters[channel_idx][filter_idx], 
                                          kernel_size, stride);
            if (verbose) {
                cout << "Convolution of RGB[" << channel_idx << "] channel with Filter[" 
                     << filter_idx << "] : " << endl;
                cout_mat(mid_rgb[channel_idx]);
                cout << " * " << endl;
                cout_mat(filters[channel_idx][filter_idx]);
                cout << " = " << endl;
                cout_mat(element);
                cout << endl;
            }
            // Sum convolution results for each RGB channel
        sum_rgb = add(sum_rgb, element, 0);
    }
    feature_maps[filter_idx] = sum_rgb;
    if (verbose) {
        cout << "Feature map [" << filter_idx << "] : " << endl;
        cout_mat(feature_maps[filter_idx]);
    }

计算每个位置的卷积结果。

构造输出3D矩阵,每个深度通道存储一个特征

这个就不用多说了，就是构造一个3D矩阵存储这些特征值：


xxxxxxxxxx
Matrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col);
  for (int i = 0; i < output_channels; i++)
  {
      output3d.matrix3d[i] = feature_maps[i];
  }
  if (verbose) {
      cout << "Output Matrix3d: " << endl;
      cout_mat3d(output3d);
  }
  return output3d;

完整的代码

代码仓库：https://github.com/AllenZYJ/Edge-Computing-Engine/blob/053ad935260f7fd120bb90561df5fec5b4f868ea/matrix/matrix_pro.h#L598

代码实现了一个输入3d矩阵，对这个3d矩阵进行卷积操作，并返回一个新的3d矩阵

代码如下：


x

Matrix3d conv_test_with_output(Matrix3d mid1, 
                                int input_dim = 3, 
                                int output_channels = 3, 
                                int stride = 1, 
                                int kernel_size = 2, 
                                int mode = 0, 
                                bool verbose = false)
    // padding 暂未实现
  {
    if (verbose) {
        cout << "Input Matrix3d: " << endl;
        cout_mat3d(mid1);
        cout << "Parameters: input_dim = " << input_dim 
             << ", output_channels = " << output_channels 
             << ", stride = " << stride 
             << ", kernel_size = " << kernel_size 
             << ", mode = " << mode;
    }

    // Compute padding widths and heights
    int padding_wid = stride - (mid1.wid - kernel_size) % stride;
    if (padding_wid == stride) {
        padding_wid = 0;
    }
    int padding_high = stride - (mid1.high - kernel_size) % stride;
    if (padding_high == stride) {
        padding_high = 0;
    }
    if (verbose) {
        cout << "Padding widths: " << padding_wid << ", padding heights: " << padding_high << endl;
    }

    // Pad each RGB channel in the 3D matrix
    Matrix mid_rgb[input_dim];
    for (int rgb_idx = 0; rgb_idx < input_dim; rgb_idx++)
    {   
        mid_rgb[rgb_idx] = edge_padding(mid1.matrix3d[rgb_idx], 
                                         mid1.matrix3d[rgb_idx].row + padding_high, 
                                         mid1.matrix3d[rgb_idx].col + padding_wid);
        if (verbose) {
            cout << "RGB[" << rgb_idx << "] channel after padding: " << endl;
            cout_mat(mid_rgb[rgb_idx]);
        }
    }

    // Construct filters
    Matrix filters[input_dim][output_channels];
  for (int channel_index = 0; channel_index < input_dim; channel_index++)
  {
      
      for (int filter_index = 0; filter_index < output_channels; filter_index++)
      {
          Matrix kernel = ones(kernel_size, kernel_size);
          filters[channel_index][filter_index] = kernel;
      }
  }


    // Compute convolution results for each filter
    Matrix kernel = ones(kernel_size, kernel_size);
    Matrix feature_maps[output_channels];
    for (int filter_idx = 0; filter_idx < output_channels; filter_idx++)
    {
        Matrix sum_rgb = CreateMatrix(((mid1.wid - kernel_size + 2*padding_wid) / stride) + 1, 
                                      ((mid1.high - kernel_size + 2*padding_high) / stride) + 1);
        for (int channel_idx = 0; channel_idx < input_dim; channel_idx++)
        {
            // Compute convolution result for a single RGB channel and a single filter
            Matrix element = conv_element(mid_rgb[channel_idx], 
                                          filters[channel_idx][filter_idx], 
                                          kernel_size, stride);
            if (verbose) {
                cout << "Convolution of RGB[" << channel_idx << "] channel with Filter[" 
                     << filter_idx << "] : " << endl;
                cout_mat(mid_rgb[channel_idx]);
                cout << " * " << endl;
                cout_mat(filters[channel_idx][filter_idx]);
                cout << " = " << endl;
                cout_mat(element);
                cout << endl;
            }
            // Sum convolution results for each RGB channel
        sum_rgb = add(sum_rgb, element, 0);
    }
    feature_maps[filter_idx] = sum_rgb;
    if (verbose) {
        cout << "Feature map [" << filter_idx << "] : " << endl;
        cout_mat(feature_maps[filter_idx]);
    }
  }
  // Construct 3D matrix to store different feature maps at different depths
  Matrix3d output3d = CreateMatrix3d(output_channels, feature_maps[0].row, feature_maps[0].col);
  for (int i = 0; i < output_channels; i++)
  {
      output3d.matrix3d[i] = feature_maps[i];
  }
  if (verbose) {
      cout << "Output Matrix3d: " << endl;
      cout_mat3d(output3d);
  }
  return output3d;
  }