cout 語句導致感知器准確性發生變化

Question

我在 C++ 中實現了一個感知器，可以區分 MNIST 數據集中的 2 和 6。 但是，當我在測試循環中注釋掉 cout 語句時，我發現了一些奇怪的行為。 當我讓測試循環打印出預測數字和實際數字時，感知器的最終准確度比我沒有循環打印時要高得多。

順便說一句，我在 vscode 中運行這段代碼。

#include <iostream>
#include <fstream>
#include <chrono>
#include <string>
#include <time.h>
#include "../MNISTReader/mnistreader.hpp"

using namespace std;
using namespace chrono;

const double ALPHA = 0.02; // Learning rate
int trainingPasses = 100; // Number of training passes
int testNums[2] = {2, 6}; // Numbers to classify
bool writeToFile = false; // Whether or not to write training weights to a csv file

// Read MNIST dataset
mnistreader mnist(
    "../MNISTDataset/trainImages",
    "../MNISTDataset/trainLabels",
    "../MNISTDataset/testImages",
    "../MNISTDataset/testLabels",
    2000, // Number of training data to use
    100, // Number of testing data to use
    true
);

// Define weights and bias
double* w = (double*) malloc(mnist.imgSize * sizeof(double));
double b;

// Function used to compute the dot product of the weight matrix and the pixel data
double forwardPass(double *x) {
    double sum;
    for (int i = 0; i < mnist.imgSize; ++i) {
        sum += w[i] * x[i];
    }
    sum += b;
    return sum;
}

// Function used to train the perceptron
void train() {
    for (int i = 0; i < mnist.trainSize; ++i) {
        double pred = forwardPass(mnist.trainImgData[i]) * mnist.trainLblData[i];
        if (pred <= 0) {
            for (int j = 0; j < mnist.imgSize; ++j) {
                w[j] += ALPHA * mnist.trainLblData[i] * mnist.trainImgData[i][j];
            }
            b += ALPHA * mnist.trainLblData[i];
        }
    }
}

int main() {
    // Initialize weights and bias
    srand(1);
    for (int i = 0; i < mnist.imgSize; ++i) {
        w[i] = (double) rand() / RAND_MAX;
    }
    b = (double) rand() / RAND_MAX;

    // Preprocess data
    mnist.selectData(testNums, 2);
    for (int i = 0; i < mnist.trainSize; ++i) {
        if (mnist.trainLblData[i] == testNums[0]) {
            mnist.trainLblData[i] = -1;
        }
        else {
            mnist.trainLblData[i] = 1;
        }
    }

    // Train model
    steady_clock::time_point t0 = steady_clock::now();
    if (writeToFile) {
        ofstream wData;
        wData.open("wData.csv");
        for (int i = 0; i < trainingPasses; ++i) {
            train();
            for (int j = 0; j < mnist.imgSize; ++j) {
                wData << w[j] << ",";
            }
            wData << b << endl;
        }
        wData.close();
    }
    else {
        for (int i = 0; i < trainingPasses; ++i) {
            train();
        }
    }
    steady_clock::time_point t1 = steady_clock::now();
    cout << "Training took " << duration_cast<milliseconds>(t1 - t0).count() << " ms" << endl;

    // Test model
    int truePos = 0;
    int trueNeg = 0;
    int falsePos = 0;
    int falseNeg = 0;
    double accuracy = 0;
    for (int i = 0; i < 2; ++i) {
        double pred = forwardPass(mnist.testImgData[i], i, true);
        int actual = mnist.testLblData[i];

        cout << "[" << i << "] pred: " << (pred < 0 ? testNums[0] : testNums[1])
        << " | actual: " << actual << endl; // This print statement changes the accuracy

        if (actual == testNums[0]) {
            if (pred < 0) {
                trueNeg++;
            }
            else {
                falsePos++;
            }
        }
        else {
            if (pred < 0) {
                falseNeg++;
            }
            else {
                truePos++;
            }
        }
    }
    accuracy = 100.0 * (truePos + trueNeg) / mnist.testSize;
    cout << truePos + trueNeg << "/" << mnist.testSize << " correct" << endl;
    cout << "The model is " << accuracy << "%" << " accurate" << endl;

    return 0;
}

mnist 對象來自我創建的一個類，該類讀取 mnist 數據集並將標簽存儲在一個一維整數數組中，並將圖像存儲在一個二維數組中，其中每個元素都是一個包含 784 (28x28) 個雙精度數的數組。 數組對齊，因此 trainLblData[n] 中的標簽是 trainImgData[n] 中的數字。

這是測試循環中存在 cout 語句時的輸出

Training took 114 ms
[0] pred: 2 | actual: 2
[1] pred: 2 | actual: 6
[2] pred: 6 | actual: 6
[3] pred: 6 | actual: 6
[4] pred: 2 | actual: 2
[5] pred: 2 | actual: 2
[6] pred: 2 | actual: 2
[7] pred: 2 | actual: 2
[8] pred: 6 | actual: 6
[9] pred: 2 | actual: 6
[10] pred: 2 | actual: 6
[11] pred: 2 | actual: 2
[12] pred: 2 | actual: 2
[13] pred: 6 | actual: 6
[14] pred: 2 | actual: 2
[15] pred: 6 | actual: 6
[16] pred: 6 | actual: 6
[17] pred: 6 | actual: 6
15/18 correct
The model is 83.3333% accurate

這是注釋掉 cout 語句時的輸出

Training took 162 ms
10/18 correct
The model is 55.5556% accurate

不太確定是什么導致了這種准確性的變化。 我什至放了一個斷點並查看了“pred”變量，當 cout 語句出現時它也會改變。 我不確定是否存在一些內存問題或編譯器出現問題，但它在代碼運行之間是一致的。

Answer 1

感謝@Taekhan。 我忘記在我的 forwardPass 函數中初始化“sum”，這導致我的代碼中出現未定義的行為。 初始化它實際上解決了這個問題。

cout 語句導致感知器准確性發生變化

問題描述

1 個解決方案

解決方案1
1 已采納 2022-06-18 05:10:58

cout 語句導致感知器准確性發生變化

問題描述

1 個解決方案

解決方案1 1 已采納 2022-06-18 05:10:58

解決方案1
1 已采納 2022-06-18 05:10:58