然后讀取然后在C / C ++中快速從二進制文件中添加大量整數

Question

我正在編寫代碼，用於在32位Linux操作系統上使用C / C ++從二進制文件中讀取無符號整數，該操作系統旨在在8核x86系統上運行。 應用程序接受一個輸入文件，該文件包含一個接一個的小端格式的無符號整數。 因此輸入文件大小（以字節為單位）是4的倍數。文件中可能有十億個整數。 讀取和添加所有整數並以64位精度返回總和的最快方法是什么？

以下是我的實施。 檢查損壞數據的錯誤不是這里的主要問題，在這種情況下輸入文件被認為沒有任何問題。

#include <iostream>
#include <fstream>
#include <pthread.h>
#include <string>
#include <string.h>


using namespace std;

string filepath;
unsigned int READBLOCKSIZE = 1024*1024;
unsigned long long nFileLength = 0;

unsigned long long accumulator = 0; // assuming 32 bit OS running on X86-64
unsigned int seekIndex[8] = {};
unsigned int threadBlockSize = 0; 
unsigned long long acc[8] = {};

pthread_t thread[8];
void* threadFunc(void* pThreadNum);

//time_t seconds1;
//time_t seconds2;

int main(int argc, char *argv[])
{
    if (argc < 2) 
    {
        cout << "Please enter a file path\n";
        return -1;
    }

    //seconds1 = time (NULL);
    //cout << "Start Time in seconds since January 1, 1970 -> " << seconds1 << "\n";

    string path(argv[1]);
    filepath = path;
    ifstream ifsReadFile(filepath.c_str(), ifstream::binary);  // Create FileStream for the file to be read
    if(0 == ifsReadFile.is_open()) 
    {
        cout << "Could not find/open input file\n";
        return -1;
    }

    ifsReadFile.seekg (0, ios::end);
    nFileLength = ifsReadFile.tellg();           // get file size
    ifsReadFile.seekg (0, ios::beg);



    if(nFileLength < 16*READBLOCKSIZE)
    {
        //cout << "Using One Thread\n"; //**
        char* readBuf = new char[READBLOCKSIZE];
        if(0 == readBuf) return -1;

        unsigned int startOffset = 0;   
        if(nFileLength >  READBLOCKSIZE)
        {
            while(startOffset + READBLOCKSIZE < nFileLength)
            {
                //ifsReadFile.flush();
                ifsReadFile.read(readBuf, READBLOCKSIZE);  // At this point ifsReadFile is open
                int* num = reinterpret_cast<int*>(readBuf);
                for(unsigned int i = 0 ; i < (READBLOCKSIZE/4) ; i++) 
                {
                    accumulator += *(num + i);  
                }
                startOffset += READBLOCKSIZE;
            }

        }

        if(nFileLength - (startOffset) > 0)
        {
            ifsReadFile.read(readBuf, nFileLength - (startOffset));  
            int* num = reinterpret_cast<int*>(readBuf);
            for(unsigned int i = 0 ; i < ((nFileLength - startOffset)/4) ; ++i) 
            {
                accumulator += *(num + i);  
            }
        }
        delete[] readBuf; readBuf = 0;
    }
    else
    {
        //cout << "Using 8 Threads\n"; //**
        unsigned int currthreadnum[8] = {0,1,2,3,4,5,6,7};
        if(nFileLength > 200000000) READBLOCKSIZE *= 16; // read larger blocks
        //cout << "Read Block Size -> " << READBLOCKSIZE << "\n";       

        if(nFileLength % 28)
        {
            threadBlockSize = (nFileLength / 28);
            threadBlockSize *= 4;
        }
        else
        {   
            threadBlockSize = (nFileLength / 7);
        }

        for(int i = 0; i < 8 ; ++i)
        {
            seekIndex[i] = i*threadBlockSize;
            //cout << seekIndex[i] << "\n";
        }
        pthread_create(&thread[0], NULL, threadFunc, (void*)(currthreadnum + 0));
        pthread_create(&thread[1], NULL, threadFunc, (void*)(currthreadnum + 1));
        pthread_create(&thread[2], NULL, threadFunc, (void*)(currthreadnum + 2));
        pthread_create(&thread[3], NULL, threadFunc, (void*)(currthreadnum + 3));
        pthread_create(&thread[4], NULL, threadFunc, (void*)(currthreadnum + 4));
        pthread_create(&thread[5], NULL, threadFunc, (void*)(currthreadnum + 5));
        pthread_create(&thread[6], NULL, threadFunc, (void*)(currthreadnum + 6));
        pthread_create(&thread[7], NULL, threadFunc, (void*)(currthreadnum + 7));

        pthread_join(thread[0], NULL);
        pthread_join(thread[1], NULL);
        pthread_join(thread[2], NULL);
        pthread_join(thread[3], NULL);
        pthread_join(thread[4], NULL);
        pthread_join(thread[5], NULL);
        pthread_join(thread[6], NULL);
        pthread_join(thread[7], NULL);

        for(int i = 0; i < 8; ++i)
        {
            accumulator += acc[i];
        }
    }

    //seconds2 = time (NULL);
    //cout << "End Time in seconds since January 1, 1970 -> " << seconds2 << "\n";
    //cout << "Total time to add " << nFileLength/4 << " integers -> " << seconds2 - seconds1 << " seconds\n";

    cout << accumulator << "\n";      
    return 0;
}

void* threadFunc(void* pThreadNum)
{
    unsigned int threadNum = *reinterpret_cast<int*>(pThreadNum);
    char* localReadBuf = new char[READBLOCKSIZE];
    unsigned int startOffset = seekIndex[threadNum];
    ifstream ifs(filepath.c_str(), ifstream::binary);  // Create FileStream for the file to be read
    if(0 == ifs.is_open()) 
    {
        cout << "Could not find/open input file\n";
        return 0;
    }   
    ifs.seekg (startOffset, ios::beg); // Seek to the correct offset for this thread
    acc[threadNum] = 0;
    unsigned int endOffset = startOffset + threadBlockSize;
    if(endOffset > nFileLength) endOffset = nFileLength; // for last thread
    //cout << threadNum << "-" << startOffset << "-" << endOffset << "\n"; 
    if((endOffset - startOffset) >  READBLOCKSIZE)
    {
        while(startOffset + READBLOCKSIZE < endOffset)
        {
            ifs.read(localReadBuf, READBLOCKSIZE);  // At this point ifs is open
            int* num = reinterpret_cast<int*>(localReadBuf);
            for(unsigned int i = 0 ; i < (READBLOCKSIZE/4) ; i++) 
            {
                acc[threadNum] += *(num + i);   
            }
            startOffset += READBLOCKSIZE;
        }   
    }

    if(endOffset - startOffset > 0)
    {
        ifs.read(localReadBuf, endOffset - startOffset);
        int* num = reinterpret_cast<int*>(localReadBuf);
        for(unsigned int i = 0 ; i < ((endOffset - startOffset)/4) ; ++i) 
        {
            acc[threadNum] += *(num + i);   
        }
    }

    //cout << "Thread " << threadNum + 1 << " subsum = " << acc[threadNum] << "\n"; //**
    delete[] localReadBuf; localReadBuf = 0;
    return 0;
}

我寫了一個小的C＃程序來生成用於測試的輸入二進制文件。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;

namespace BinaryNumWriter
{
    class Program
    {
        static UInt64 total = 0;
        static void Main(string[] args)
        {
            BinaryWriter bw = new BinaryWriter(File.Open("test.txt", FileMode.Create));
            Random rn = new Random();
            for (UInt32 i = 1; i <= 500000000; ++i)
            {
                UInt32 num = (UInt32)rn.Next(0, 0xffff);
                bw.Write(num);
                total += num;
            }
            bw.Flush();
            bw.Close();
        }
    }
}

在Core i5機器上運行程序@ 3.33 Ghz（它的四核，但我現在得到的）2 GB RAM和Ubuntu 9.10 32位具有以下性能數字

100個整數~0秒（否則我真的要吮吸）100000整數<0秒100000000整數~7秒500000000整數~29秒（1.86 GB輸入文件）

我不確定硬盤是5400RPM還是7200RPM。 我嘗試了不同的緩沖區大小進行讀取，一次讀取16 MB的大輸入文件就是最佳點。

有沒有更好的方法從文件中更快地讀取以提高整體性能？ 是否有更智能的方法可以更快地添加大型整數數組並重復折疊？ 我編寫代碼的方式是否有任何重大的障礙/我做了一些明顯錯誤的事情，這花費了很多時間？

我該怎么做才能更快地讀取和添加數據？

謝謝。

Chinmay

Answer 1

以您的方式從多個線程訪問機械硬盤將采取一些頭部移動（讀取速度慢）。 你幾乎可以肯定IO綁定（1.86GB文件為65MBps）。 嘗試通過以下方式更改策略：

開始8個線程 - 我們稱之為消費者
8個線程將等待數據可用
在主線程中開始讀取文件的塊（比如256KB），因此成為了消費者的提供者
主線程擊中EOF並向工作人員發出信號，表明沒有更多可用數據
主線程等待8名工人加入。

你需要相當多的同步才能讓它完美運行，我認為通過執行順序文件訪問可以最大限度地提高你的硬盤/文件系統IO功能。 YMMV上的小文件，可以以閃電般的速度從緩存中緩存和提供。

你可以嘗試的另一件事是只啟動7個線程，為主線程和系統的其余部分留一個空閑CPU。

..或獲得SSD :)

編輯：

為簡單起見，請參閱沒有處理的單線程讀取文件（丟棄緩沖區）的速度有多快。 加上epsilon是你完成這項工作的速度的理論極限。

Answer 2

如果要快速讀取（或寫入）大量數據，並且不希望對該數據進行大量處理，則需要避免在緩沖區之間使用額外的數據副本。 這意味着你想要避免fstream或FILE抽象（因為它們引入了需要復制的額外緩沖區），並避免在內核和用戶緩沖區之間復制內容的讀/寫類型調用。

相反，在linux上，你想使用mmap（2）。 在64位操作系統上，只需將整個文件madvise(MADV_SEQUENTIAL)到內存中，使用madvise(MADV_SEQUENTIAL)告訴內核你將主要按順序訪問它，並擁有它。 對於32位操作系統，您需要以塊的形式進行mmap，每次都取消映射前一個塊。 與您當前的結構非常相似，每個線程一次只能映射一個固定大小的塊，應該可以正常工作。

然后讀取然后在C / C ++中快速從二進制文件中添加大量整數

問題描述

2 個解決方案

解決方案1
3 2012-06-08 18:25:48

解決方案2
3 2012-06-08 19:22:54

然后讀取然后在C / C ++中快速從二進制文件中添加大量整數

問題描述

2 個解決方案

解決方案1 3 2012-06-08 18:25:48

解決方案2 3 2012-06-08 19:22:54

解決方案1
3 2012-06-08 18:25:48

解決方案2
3 2012-06-08 19:22:54