結合整數和浮點數：性能考慮因素

Question

我有一組復雜的模板函數，它們在循環中進行計算，結合浮點數和uint32_t循環索引。 我很驚訝地發現，對於這種函數，我的測試代碼運行速度更快，雙精度浮點數比單精度浮點數快。

作為測試，我將索引的格式更改為uint16_t。 在此之后，程序的double和float版本都更快（正如預期的那樣），但現在浮動版本明顯快於雙版本。 我還用uint64_t索引測試了該程序。 在這種情況下，double和float版本同樣很慢。

我想這是因為uint32_t適合雙尾的尾數而不是浮點數。 一旦索引類型減少到uint16_t，它們也適合浮點數的尾數，轉換應該是微不足道的。 對於uint64_t，轉換為double也需要舍入，這可以解釋為什么兩個版本的性能相同。

任何人都可以證實這個解釋嗎？

編輯：使用int或long作為索引類型，程序運行速度與unit16_t一樣快。 我想這首先反對我懷疑的。

編輯：我在x86架構上編譯了windows程序。

編輯：這是一段代碼，它為uint32_t再現了double的效果，並且兩種情況對於int都同樣快。 請不要評論此代碼的用途。 它是一個修改過的代碼片段，它重現了沒有任何意義的效果。

主文件：

#include "stdafx.h"

typedef short spectraType;
typedef int intermediateValue;
typedef double returnType;

#include "Preprocess_t.h"
#include "Windows.h"
#include <iostream>

int main()
{
    const size_t numberOfBins = 10000;
    const size_t numberOfSpectra = 500;
    const size_t peakWidth = 25;

    bool startPeak = false;
    short peakHeight;

    Preprocess<short, returnType> myPreprocessor;
    std::vector<returnType> processedSpectrum;

    std::vector<std::vector<short>> spectra(numberOfSpectra, std::vector<short>(numberOfBins));


    std::vector<float> peakShape(peakWidth);

    LARGE_INTEGER freq, start, stop;
    double time_ms;

    QueryPerformanceFrequency(&freq);


    for (size_t i = 0; i < peakWidth; ++i)
    {
        peakShape[i] = static_cast<float>(exp(-(i - peakWidth / 2.0) *(i - peakWidth / 2.0) / 10.0));
    }


    for (size_t i = 0; i < numberOfSpectra; ++i)
    {
        size_t j = 0;
        for (; j < 200; ++j)
        {
            spectra[i][j] = rand() % 100;
        }

        for (size_t k = 0; k < 25; ++k)
        {
            spectra[i][j] = static_cast<short>(16383 * peakShape[k]);
            j++;
        }

        for (; j < numberOfBins; ++j)
        {
            startPeak = !static_cast<bool>(abs(rand()) % (numberOfBins / 4));

            if (startPeak)
            {
                peakHeight = rand() % 16384;

                for (size_t k = 0; k < 25 && j< numberOfBins; ++k)
                {
                    spectra[i][j] = peakHeight * peakShape[k] + rand() % 100;
                    j++;
                }
            }
            else
            {
                spectra[i][j] = rand() % 100;
            }
        }

        for (j = 0; j < numberOfBins; ++j)
        {
            double temp = 1000.0*exp(-(static_cast<float>(j) / (numberOfBins / 3.0)))*sin(static_cast<float>(j) / (numberOfBins / 10.0));
            spectra[i][j] -= static_cast<short>(1000.0*exp(-(static_cast<float>(j) / (numberOfBins / 3.0)))*sin(static_cast<float>(j) / (numberOfBins / 10.0)));
        }

    }

    // This is where the critical code is called

    QueryPerformanceCounter(&start);

    for (int i = 0; i < numberOfSpectra; ++i)
    {
        myPreprocessor.SetSpectrum(&spectra[i], 1000, &processedSpectrum);
        myPreprocessor.CorrectBaseline(30, 2.0);
    }

    QueryPerformanceCounter(&stop);

    time_ms = static_cast<double>(stop.QuadPart - start.QuadPart) / static_cast<double>(freq.QuadPart);

    std::cout << "time spend preprocessing: " << time_ms << std::endl;

    std::cin.ignore();

    return 0;
}

並包含頭文件Preprocess_t.h：

#pragma once

#include <vector>

//typedef unsigned int indexType;
typedef unsigned short indexType;

template<typename T, typename Out_Type>
class Preprocess
{
public:
    Preprocess() :threshold(1), sdev(1), laserPeakThreshold(500), a(0), b(0), firstPointUsedAfterLaserPeak(0) {};
    ~Preprocess() {};

    void SetSpectrum(std::vector<T>* input, T laserPeakThreshold, std::vector<Out_Type>* processedSpectrum); ///@note We need the laserPeakThresholdParameter for the baseline correction, not onla for the shift.

    void CorrectBaseline(indexType numberOfPoints, Out_Type thresholdFactor);

private:

    void LinFitValues(indexType beginPoint);
    Out_Type SumOfSquareDiffs(Out_Type x, indexType n);

    Out_Type LinResidualSumOfSquareDist(indexType beginPoint);

    std::vector<T>* input;
    std::vector<Out_Type>* processedSpectrum;

    std::vector<indexType> fitWave_X;
    std::vector<Out_Type> fitWave;
    Out_Type threshold;
    Out_Type sdev;
    T laserPeakThreshold;
    Out_Type a, b;
    indexType firstPointUsedAfterLaserPeak;
    indexType numberOfPoints;
};

template<typename T, typename Out_Type>
void Preprocess<T, Out_Type>::CorrectBaseline(indexType numberOfPoints, Out_Type thresholdFactor)
{
    this->numberOfPoints = numberOfPoints;

    indexType numberOfBins = input->size();
    indexType firstPointUsedAfterLaserPeak = 0;

    indexType positionInFitWave = 0;

    positionInFitWave = firstPointUsedAfterLaserPeak;

    for (indexType i = firstPointUsedAfterLaserPeak; i < numberOfBins - numberOfPoints; i++) {
        LinFitValues(positionInFitWave);
        processedSpectrum->at(i + numberOfPoints) = input->at(i + numberOfPoints) - static_cast<Out_Type>(a + b*(i + numberOfPoints));


        positionInFitWave++;
        fitWave[positionInFitWave + numberOfPoints - 1] = input->at(i + numberOfPoints - 1);
        fitWave_X[positionInFitWave + numberOfPoints - 1] = i + numberOfPoints - 1;

    }
}

template<typename T, typename Out_Type>
void Preprocess<T, Out_Type>::LinFitValues(indexType beginPoint)
{
    Out_Type y_mean, x_mean, SSxy, SSxx, normFactor;
    y_mean = x_mean = SSxy = SSxx = normFactor = static_cast<Out_Type>(0);
    indexType endPoint = beginPoint + numberOfPoints;
    Out_Type temp;

    if ((fitWave_X[endPoint - 1] - fitWave_X[beginPoint]) == numberOfPoints)
    {
        x_mean = (fitWave_X[endPoint - 1] - fitWave_X[beginPoint]) / static_cast<Out_Type>(2);

        for (indexType i = beginPoint; i < endPoint; i++) {
            y_mean += fitWave[i];
        }

        y_mean /= numberOfPoints;

        SSxx = SumOfSquareDiffs(x_mean, fitWave_X[endPoint - 1]) - SumOfSquareDiffs(x_mean, fitWave_X[beginPoint]);

        for (indexType i = beginPoint; i < endPoint; i++)
        {
            SSxy += (fitWave_X[i] - x_mean)*(fitWave[i] - y_mean);
        }
    }
    else
    {
        for (indexType i = beginPoint; i < endPoint; i++) {
            y_mean += fitWave[i];
            x_mean += fitWave_X[i];
        }

        y_mean /= numberOfPoints;
        x_mean /= numberOfPoints;

        for (indexType i = beginPoint; i < endPoint; i++)
        {
            temp = (fitWave_X[i] - x_mean);
            SSxy += temp*(fitWave[i] - y_mean);
            SSxx += temp*temp;
        }
    }

    b = SSxy / SSxx;
    a = y_mean - b*x_mean;
}

template<typename T, typename Out_Type>
inline Out_Type Preprocess<T, Out_Type>::SumOfSquareDiffs(Out_Type x, indexType n)
{
    return n*x*x + n*(n - 1)*x + ((n - 1)*n*(2 * n - 1)) / static_cast<Out_Type>(6);
}

template<typename T, typename Out_Type>
Out_Type Preprocess<T, Out_Type>::LinResidualSumOfSquareDist(indexType beginPoint) 
{
    Out_Type sumOfSquares = 0;
    Out_Type temp;

    for (indexType i = 0; i < numberOfPoints; ++i) {
        temp = fitWave[i + beginPoint] - (a + b*fitWave_X[i + beginPoint]);
        sumOfSquares += temp*temp;
    }
    return sumOfSquares;
}


template<typename T, typename Out_Type>
inline void Preprocess<T, Out_Type>::SetSpectrum(std::vector<T>* input, T laserPeakThreshold, std::vector<Out_Type>* processedSpectrum)
{
    this->input = input;
    fitWave_X.resize(input->size());
    fitWave.resize(input->size());
    this->laserPeakThreshold = laserPeakThreshold;
    this->processedSpectrum = processedSpectrum;
    processedSpectrum->resize(input->size());
}

Answer 1

您正在使用MSVC？ 當我實現基本上是矩陣乘法加上向量加法的代碼時，我有類似的效果。 在這里，我認為float會更快，因為它們可以更好地SIMD並行化，因為可以在SSE寄存器中打包更多。 但是，使用double s要快得多。

經過一些調查，我從匯編程序代碼中發現浮點數需要從內部FPU精度轉換，這種舍入消耗了大部分運行時間。 您可以通過降低精度的成本將FP模型更改為更快的模型。 在SO的舊線程中也有一些討論。

結合整數和浮點數：性能考慮因素

問題描述

1 個解決方案

解決方案1
0 2016-06-22 15:32:08

結合整數和浮點數：性能考慮因素

問題描述

1 個解決方案

解決方案1 0 2016-06-22 15:32:08

解決方案1
0 2016-06-22 15:32:08