简体   繁体   English


[英]convolution result different from that of caffe

I tried writing the convolution function myself to compare the result with caffe result. 我尝试自己编写卷积函数,以将结果与caffe结果进行比较。 I used bottom and top blob data and weights extracted while running a demo program. 我使用了运行演示程序时提取的底部和顶部blob数据以及权重。 I'm very sure the extracted data are correct. 我非常确定提取的数据是正确的。 Here is the convolution test code that I wrote but the result is different. 这是我编写的卷积测试代码,但结果有所不同。 In this example, I have 64 input feature maps and 64 output feature maps and use 3x3 kernel. 在此示例中,我有64个输入要素图和64个输出要素图,并使用3x3内核。 This program reads the bottom data from 64 files, reads weights and bias for 64x64 3x3 kernel and weights and use it to do convolution and saves the top data into 64 files. 该程序从64个文件中读取底层数据,读取64x64 3x3内核的权重和偏差,然后使用权重进行卷积并将顶层数据保存到64个文件中。 very simple program.. I would appreciate if any one can notice any mistake here and let me know. 非常简单的程序

// written by Chan Kim for simple convolution test

layer 2 type Convolution  num_bottoms 1 num_tops 1
layer 2 bottom 0 shape "1 64 600 800 (30720000)"
layer 2 top 0 shape = "1 64 600 800 (30720000)"
layer 2 kernel_size = 3
bottom size : 4*64*600*800 = 0x7530000 Bytes
top    size : 4*64*600*800 = 0x7530000 Bytes
kernel size : 4*3*3*64*64 = 0x24000 Bytes
in word size, bottom : 0x1d4c000 kernel : 0x9000 Words

#include <stdio.h>
#define NUM_IFM 64
#define NUM_OFM 64
#define HEIGHT 600
#define WIDTH 800
#define K 3 // 3x3 kernel
float bottom[1][NUM_IFM][HEIGHT][WIDTH];
float top[1][NUM_IFM][HEIGHT][WIDTH];
float weights[NUM_OFM][NUM_IFM][K][K];
float bias[NUM_OFM];
float conv[1][NUM_IFM][HEIGHT][WIDTH]; // result
char str[80];
float kern[K][K];
float in_square[K][K];
float sum;

#define layer 2 // for test


FILE *file;
int ifm_idx;
int ofm_idx;
int orix;
int ocix;
char c;
int r;
int rix, cix;
char line[80];
char *ll;
float v0, v1, v2, v3, v4, v5, v6, v7;
int kyi, kxi;
int orixm, orixp;
int ocixm, ocixp;
int kx, ky;
int i;

// --------------------------------------------------------
// reading blob data from files into blob memory
// --------------------------------------------------------
for(ifm_idx=0;ifm_idx<NUM_IFM;ifm_idx++) {
    sprintf(str, "/home/ckim/Neuro/convhw/ext-z840/L%02d_Convolution_B0_FN%03d.txt", layer, ifm_idx);
    file = fopen(str, "r");
    c = fgetc(file);
    rix = -1;
    while (c != EOF) {
        if (c == '#') {
            ll = fgets(line, 80, file);
            c = fgetc(file);
            cix = 0;
        else {
            r = ungetc(c, file);
            r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7);
            bottom[0][ifm_idx][rix][cix++] = v0;
            bottom[0][ifm_idx][rix][cix++] = v1;
            bottom[0][ifm_idx][rix][cix++] = v2;
            bottom[0][ifm_idx][rix][cix++] = v3;
            bottom[0][ifm_idx][rix][cix++] = v4;
            bottom[0][ifm_idx][rix][cix++] = v5;
            bottom[0][ifm_idx][rix][cix++] = v6;
            bottom[0][ifm_idx][rix][cix++] = v7;
            c = fgetc(file);
    printf("file %s read.. \n",str);

// --------------------------------------------------------
// reading weights from files into blob memory
// --------------------------------------------------------
sprintf(str, "/home/ckim/Neuro/convhw/ext1/L%02d_Convolution_Weights.txt", layer);
file = fopen(str, "r");
ll = fgets(line, 80, file); // read the layer config line
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
//for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
    ll = fgets(line, 80, file); // read the line '## For output map ofm_idx'
    for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
    //for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
        ll = fgets(line, 80, file); // read the line '## for input map ifm_idx'
        for(kyi = 0; kyi < K; kyi++) {
         fscanf(file, "%f %f %f", &v0, &v1, &v2); // K=3 always
            weights[ofm_idx][ifm_idx][kyi][0] = v0;
            weights[ofm_idx][ifm_idx][kyi][1] = v1;
            weights[ofm_idx][ifm_idx][kyi][2] = v2;
         ll = fgets(line, 80, file); // read off remaining line
    // read bias values
for(i = 0, ofm_idx = 0; i < NUM_OFM/8; i++){
    ll = fgets(line, 80, file);
    r = fscanf(file, "%f %f %f %f %f %f %f %f", &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7);
    bias[ofm_idx++] = v0;
    bias[ofm_idx++] = v1;
    bias[ofm_idx++] = v2;
    bias[ofm_idx++] = v3;
    bias[ofm_idx++] = v4;
    bias[ofm_idx++] = v5;
    bias[ofm_idx++] = v6;
    bias[ofm_idx++] = v7;

// --------------------------------------------------------
// perform convolution
// --------------------------------------------------------
printf("starting convolution..\n");
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
    printf("making OFM %d\n", ofm_idx);
    for(orix = 0; orix < HEIGHT; orix++) {
        for(ocix = 0; ocix < WIDTH; ocix++) {

            sum = 0.; // for single point

            // for each input maps
            for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {

                // get kernel
                kern[0][0] = weights[ofm_idx][ifm_idx][0][0];
                kern[0][1] = weights[ofm_idx][ifm_idx][0][1];
                kern[0][2] = weights[ofm_idx][ifm_idx][0][2];
                kern[1][0] = weights[ofm_idx][ifm_idx][1][0];
                kern[1][1] = weights[ofm_idx][ifm_idx][1][1];
                kern[1][2] = weights[ofm_idx][ifm_idx][1][2];
                kern[2][0] = weights[ofm_idx][ifm_idx][2][0];
                kern[2][1] = weights[ofm_idx][ifm_idx][2][1];
                kern[2][2] = weights[ofm_idx][ifm_idx][2][2];

                // zero-value padding (used in caffe)
                in_square[0][0] = (orix == 0 ||ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[0][1] = (orix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[0][2] = (orix == 0 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[1][0] = (ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[1][1] = bottom[0][ifm_idx][orix][ocix];
                in_square[1][2] = (ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[2][0] = (orix == HEIGHT-1 || ocix == 0) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[2][1] = (orix == HEIGHT-1) ? 0 : bottom[0][ifm_idx][orix][ocix];
                in_square[2][2] = (orix == HEIGHT-1 || ocix == WIDTH-1) ? 0 : bottom[0][ifm_idx][orix][ocix];

                // apply kernel
                for(ky = 0; ky < K; ky++) {
                    for(kx = 0; kx < K; kx++) {
                        sum += in_square[ky][kx]*kern[ky][kx];

                //// add bias
                //sum += bias[ifm_idx];
            } // ifm_idx

            // add bias
            sum += bias[ofm_idx];
            // store result
            conv[0][ofm_idx][orix][ocix] = sum;
        } // ocix
    } // orix

    printf("OFM %d\n", ofm_idx);
    sprintf(str, "./result/L%02d_Convolution_T0_FN%03d.txt", layer, ofm_idx);
    file = fopen(str, "w");
    printf("writing convolution result to file %s..\n", str);
    for (rix = 0; rix < HEIGHT; rix++) {
        fprintf(file, "### kr = %d ##\n", rix);
        for (cix = 0; cix < WIDTH; cix++) {
            fprintf(file, "%f ",conv[0][ofm_idx][rix][cix]);
            if (cix %8 == 7) fprintf(file, "\n");
printf("Convolution finished\n");

UPDATE : I tried transposing kernel and tried reversing input-output relationship for the extract kernel, all 4 combinations, but it doesn't match. 更新:我尝试换位内核,并尝试反转提取内核(所有4种组合)的输入-输出关系,但是不匹配。 maybe there's something wrong in the way I extracted the values from caffe execution. 也许我从caffe执行中提取值的方式出了问题。 I'm adding how I extracted bottom, top and weight data from convolution layer in caffe/src/caffe/net.cpp file. 我要添加我如何从caffe / src / caffe / net.cpp文件中的卷积层提取底部,顶部和重量数据。 They are extracted after the layer processing is done. 它们在图层处理完成后被提取。

//#define LayerShapeExtract
//#define BlobExtract
//#define WeightExtract
#define LayerExtNum 2
#define EXT_ALL_LAYERS 0
int ccc; // for debug
template <typename Dtype>
Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
  CHECK_GE(start, 0);
  CHECK_LT(end, layers_.size());
  Dtype loss = 0;
  LOG(INFO) << "## : net_input_blobs_.size() : " << net_input_blobs_.size() << std::endl;
  if (debug_info_) {
    for (int i = 0; i < net_input_blobs_.size(); ++i) {
  for (int i = start; i <= end; ++i) {
    ccc = i; printf("ccc = %d\n", ccc); // LOG(ERROR) << "Forwarding " << layer_names_[i];
    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]); // Layer::Forward
    loss += layer_loss;
    if (debug_info_) { ForwardDebugInfo(i); }

#ifdef LayerShapeExtract
// -------------------------------- start of shape extract ---------------------
  FILE *lsfp;
  lsfp = fopen("ext/layers.txt","w");
  fprintf(lsfp, "####### Layer Shape ########\n");
  fprintf(lsfp, "start = %d, end = %d\n", start,end);
  for (int li = start; li <= end; ++li) {
    fprintf(lsfp, "layer %d type %s  num_bottoms %zu num_tops %zu\n", li, layers_[li]->type(), bottom_vecs_[li].size(), top_vecs_[li].size());
    for (int bi = 0; bi < bottom_vecs_[li].size(); bi++) {
       fprintf(lsfp, "layer %d bottom %d shape \"%s\"\n", li, bi, bottom_vecs_[li][bi]->shape_string().c_str()); 
    for (int ti = 0; ti < top_vecs_[li].size(); ti++) {
       fprintf(lsfp, "layer %d top %d shape = \"%s\"\n", li, ti, top_vecs_[li][ti]->shape_string().c_str()); 
    if (strcmp(layers_[li]->type(),"Convolution") == 0) {
        fprintf(lsfp, "layer %d kernel_size = %d\n", li, layers_[li]->layer_param().convolution_param().kernel_size());
  //printf("##Net::ForwardFromTo ==> ####### End of Layer Shape ########\n");
// -------------------------------- end of shape extract ---------------------

#ifdef BlobExtract
// -------------------------------- start of blob extract ---------------------

if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!!
    char fname[50];
    FILE *extfp1;
    for (int j = 0; j < bottom_vecs_[i].size(); j++) {

        Blob<Dtype>* bp = bottom_vecs_[i][j];
        const Dtype *dptr = bp->cpu_data();
    if (bp->shape().size() == 4) {
       for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob
          for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni
             sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt",
             extfp1 = fopen(fname,"w");
             for (int kr = 0; kr < bp->shape()[2]; kr++) { // Ny
                fprintf(extfp1, "### kr = %d ##\n", kr);
                for (int kc = 0; kc < bp->shape()[3]; kc++) { // Nx
                   fprintf(extfp1, "%f ",*dptr++);
                   if (kc %8 == 7 || kc == bp->shape()[3]-1) fprintf(extfp1, "\n");
    } // if size 4
    else if (bp->shape().size() == 2) {
       for (int kn = 0; kn < bp->shape()[0]; kn++) { // Blob
          sprintf(fname, "ext/L%02d_%s_B%d_FN%03d.txt",
          extfp1 = fopen(fname,"w");
          for (int kd = 0; kd < bp->shape()[1]; kd++) { // Ni
                   fprintf(extfp1, "%f ",*dptr++);
                   if (kd %8 == 7 || kd == bp->shape()[1]-1) fprintf(extfp1, "\n");
    } // if size 2
    else {
       printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,bp->shape().size());
    } // for j

    for (int j = 0; j < top_vecs_[i].size(); j++) {

        Blob<Dtype>* tp = top_vecs_[i][j];
        const Dtype *dptr = tp->cpu_data();
    if (tp->shape().size() == 4) { // Blob
       for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob
          for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni
             sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt",
             extfp1 = fopen(fname,"w");
             for (int kr = 0; kr < tp->shape()[2]; kr++) { // Ny
                fprintf(extfp1, "### kr = %d ##\n", kr);
                for (int kc = 0; kc < tp->shape()[3]; kc++) { // Nx
                   fprintf(extfp1, "%f ",*dptr++);
                   if (kc %8 == 7 || kc == tp->shape()[3]-1) fprintf(extfp1, "\n");
    } // if size 4
    else if (tp->shape().size() == 2) {
       for (int kn = 0; kn < tp->shape()[0]; kn++) { // Blob
          sprintf(fname, "ext/L%02d_%s_T%d_FN%03d.txt",
          extfp1 = fopen(fname,"w");
          for (int kd = 0; kd < tp->shape()[1]; kd++) { // Ni
                   fprintf(extfp1, "%f ",*dptr++);
                   if (kd %8 == 7) fprintf(extfp1, "\n");
    } // if size 2
    else {
       printf("BSS != 4 or 2, i = %d, j = %d, sz = %zd\n",i,j,tp->shape().size());
    } // for j
} // print!!
// -------------------------------- end of blob extract ---------------------

#ifdef WeightExtract
// -------------------------------- start of weight extract ---------------------
if (i == LayerExtNum || EXT_ALL_LAYERS) { //print!!
    char fname[50];
    FILE *extfp1;
    printf("#*#* Ext layer type = %s\n", layers_[i]->type());
    if (strcmp(layers_[i]->type(),"Convolution") == 0){
        printf("It's Convolution Layer!\n");
        vector<shared_ptr<Blob<Dtype> > >& lyr_blobs = layers_[i]->blobs();
        // Layer
        printf("layer blob size = %zd\n", lyr_blobs.size());
        //const LayerParameter& lyr_param = layers()[i]->layer_param(); // vector shared_ptr Layer
        printf("lyr_blobs.size() = %zd\n", lyr_blobs.size());
        printf("lyr_blobs[0].shape = %s\n", lyr_blobs[0]->shape_string().c_str());
        printf("lyr_blobs[1].shape = %s\n", lyr_blobs[1]->shape_string().c_str()); // Blob
        Blob<Dtype> *wp = lyr_blobs[0].get(); // weight // shared_ptr
        Blob<Dtype> *bp = lyr_blobs[1].get(); // bias
        printf("No Ni ky kx = %d %d %d %d\n", wp->shape()[0], wp->shape()[1], wp->shape()[2], wp->shape()[3]); // Blob
        printf("Nb = %d\n", bp->shape()[0]);
        int No = wp->shape()[0];
        int Ni = wp->shape()[1];
        int Ky = wp->shape()[2];
        int Kx = wp->shape()[3];
        int Nb = bp->shape()[0];
        const Dtype *wptr = wp->cpu_data();
        const Dtype *bptr = bp->cpu_data();
        // save weights first
        sprintf(fname, "ext/L%02d_%s_Weights.txt",i,layers_[i]->type());
        extfp1 = fopen(fname, "w");
        fprintf(extfp1, "## Layer 0 Conv Weights (No = %d, Ni = %d, Ky = %d, Kx = %d, Nb = %d\n", No, Ni, Ky, Kx, Nb);
        for (int n = 0; n < No; n++){
            fprintf(extfp1, "## For output map %d ##\n", n);
            for (int c = 0; c < Ni; c++){
                fprintf(extfp1, "## for input map %d ##\n", c);
                for (int y = 0; y < Ky; y++) {
                    for (int x = 0; x < Kx; x++) {
                        fprintf(extfp1, "%f ", *wptr++);
                    fprintf(extfp1, "\n");
                fprintf(extfp1, "\n");
        fprintf(extfp1, "## Bias values for outputs ##\n");
        for (int n = 0; n < No; n++){
            fprintf(extfp1, "%f ",*bptr++);
            if (n % 8 == 7) fprintf(extfp1, "\n");
} // print!!
// -------------------------------- end of weight extract ---------------------
  return loss;

ADD : I've run for VGG_ILSVRC_16_layers which came from py-faster-rcnn code. 添加:我已经运行了来自py-faster-rcnn代码的VGG_ILSVRC_16_layers。 The parameter of the layer I extractedd the data from is as follows. 我从中提取数据的层的参数如下。 :

layer {
  name: "conv1_2"
  type: "Convolution"
  bottom: "conv1_1"
  top: "conv1_2"
  convolution_param { 
    num_output: 64
    pad: 1 kernel_size: 3

ADD : A moderator deleted my fixed code, so I add the fixed code in kerne weight read here. 添加:主持人删除了我的固定代码,因此我将固定代码添加到此处的kerne weight中。

for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
    ll = fgets(line, 80, file); // read the line '## For output map ofm_idx'
    for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {

        ll = fgets(line, 80, file); // read the line '## for input map ifm_idx'
        for(kyi = 0; kyi < K; kyi++) {
         fscanf(file, "%f %f %f\n", &v0, &v1, &v2); // K=3 always

            weights[ofm_idx][ifm_idx][kyi][0] = v0;
            weights[ofm_idx][ifm_idx][kyi][1] = v1;
            weights[ofm_idx][ifm_idx][kyi][2] = v2;
    // read bias values

Try transposing bottom, convolve and then compare. 尝试转置底部,进行卷积,然后进行比较。 (Or transpose the convolutional kernel) (或转置卷积核)

I think the bug lies in fetching bottom data(input) when performing convolution, and the right code should be: 我认为错误在于执行卷积时获取底部数据(输入),正确的代码应该是:

//bottom data index for convolution
int src_r, src_c;
//pads and strides along row and column
int row_pad = 1, col_pad = 1, row_stride = 1, col_stride = 1; 
for(ofm_idx = 0; ofm_idx < NUM_OFM; ofm_idx++) {
  for(orix = 0; orix < HEIGHT; orix++) {
    for(ocix = 0; ocix < WIDTH; ocix++) {  
      sum = 0.; 
      for(ifm_idx = 0; ifm_idx < NUM_IFM; ifm_idx++) {
        // zero-value padding (used in caffe)
        //The right indexing for bottom data,
        //your original code is wrong here
        src_r= orix * row_stride - row_pad;
        src_c= ocix * col_stride - col_pad;
        in_square[0][0] = (src_r < 0 || src_c < 0 ||
                           src_r >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r][src_c];
        in_square[0][1] = (src_r < 0 || src_c + 1 < 0 ||
                           src_r >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r][src_c + 1];
        in_square[0][2] = (src_r < 0 || src_c + 2 < 0 ||
                           src_r >= HEIGHT || src_c + 2 >= WIDTH) ? 
                           0 : bottom[0][ifm_idx][src_r][src_c + 2];
        in_square[1][0] = (src_r + 1 < 0 || src_c < 0 ||
                           src_r + 1 >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c];
        in_square[1][1] = (src_r + 1 < 0 || src_c + 1 < 0 ||
                           src_r + 1 >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c + 1];
        in_square[1][2] = (src_r + 1 < 0 || src_c + 2 < 0 ||
                           src_r + 1 >= HEIGHT || src_c + 2 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 1][src_c + 2];
        in_square[2][0] = (src_r + 2 < 0 || src_c < 0 ||
                           src_r + 2 >= HEIGHT || src_c >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c];
        in_square[2][1] = (src_r + 2 < 0 || src_c + 1 < 0 ||
                           src_r + 2 >= HEIGHT || src_c + 1 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c + 1];
        in_square[2][2] = (src_r + 2 < 0 || src_c + 2 < 0 ||
                           src_r + 2 >= HEIGHT || src_c + 2 >= WIDTH) ?
                           0 : bottom[0][ifm_idx][src_r + 2][src_c + 2];

        //apply kernel

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

相关问题 英特尔IPP卷积已弃用 - 是否有不同的IPP 2D卷积方法? - Intel IPP convolution deprecated — is there a different IPP 2D convolution method? ppm文件的图像卷积 - image convolution from ppm file 不同操作系统的不同结果 - Different result in different OS 是否是 gcc -O2 优化错误(与 -O1 的结果不同)? - Is it a gcc -O2 optimization bug (different result from -O1)? 与Mac和Linux上的exp函数略有不同的结果 - Slightly different result from exp function on Mac and Linux 为什么这个显式演员的结果与隐式演员的结果不同? - Why is the result of this explicit cast different from the implicit one? syscall(SYS_getuid) 从 getuid() 返回不同的结果 - syscall(SYS_getuid) returns different result from getuid() 如何用 2 个不同范围内的 2 个随机数相除但结果在特定范围内的结果填充二维数组? - How to fill a 2D array with a result from division of 2 random numbers in 2 different ranges but the result being in certain range? 为什么 mbedtls 的 aes 加密结果与 java 和在线工具的结果不同? - why aes encryption result from mbedtls is different than java and online tool result? 每次调用时结果不同 - Different result everytime it is called
粤ICP备18138465号  © 2020-2024 STACKOOM.COM