[英]How to increase read and write speed ifstream and ofstream for std::vector<uint8_t>
我必須在磁盤上讀取和寫入大量數據,而目前的方式是通過以下兩個功能:
std::vector<uint8_t> read_vector_from_disk(std::string path)
{
std::ifstream instream(path, std::ios::in | std::ios::binary);
std::vector<uint8_t> data((std::istreambuf_iterator<char>(instream)), std::istreambuf_iterator<char>());
return data;
}
bool write_vector_to_disk(std::vector<uint8_t> data, std::string path)
{
std::ofstream outstream(path, std::ios::out | std::ios::binary);
std::ostream_iterator<uint8_t> out_iterator(outstream);
std::copy(data.begin(), data.end(), out_iterator);
return true;
}
我對使用這種方法的讀寫時間感到失望:
File name: "1_IMGP3437.JPG"
File size: 11987566 Bytes
Average read time: 34554.9 Microseconds
Average write time: 186344 Microseconds
根據我使用的讀/寫基准測試工具,我預計讀取速度接近520 MB / s,寫入速度接近495 MB / s。
我該怎么做才能優化流程? 我一直在查看內存映射的I / O,但不確定該走的路。 基於“審查”,我已讀取內存映射的I / O。
我的基准測試的完整源代碼可以在這里找到:
https://gist.github.com/looopTools/5ea6b21dfa33e70890cb486b0db222e1
和Makefile: https : //gist.github.com/looopTools/ccfee5b669caf44c3fa2993c48a12332
使用內存映射文件,可以避免在讀取和寫入時在內核和用戶空間之間進行大量復制,這要快得多。
您要讀入向量的代碼在讀取文件時會重新分配向量,這會使它變慢。 下面的基准創建文件大小的向量,以避免重新分配,並大大加快了讀取速度。
MiB / s的數目很大,因為內核將文件數據緩存在頁面緩存中,因此在基准測試期間不會發生實際的磁盤I / O。
結果:
Input file: /home/max/Downloads/1.tif
Output dir: /home/max/tmp
File name: "1.tif"
File size: 63106564
*** Benchmarking method IOstreams ***
Checksums: 132769358678
Average read time: 27225.9us
Average read speed: 2210.51MiB/s
Average write time: 670221us
Average write speed: 2210.51MiB/s
*** Benchmarking method mmap ***
Checksums: 132769358678
Average read time: 9554.5us
Average read speed: 6298.93MiB/s
Average write time: 28803.9us
Average write speed: 6298.93MiB/s
碼:
#include <boost/iostreams/device/mapped_file.hpp>
#include <experimental/filesystem>
#include <string>
#include <fstream>
#include <iterator>
#include <iostream>
#include <chrono>
struct IostreamTag {};
struct MmapTag {};
inline char const* name(IostreamTag) { return "IOstreams"; }
inline char const* name(MmapTag) { return "mmap"; }
template<class T>
inline T& get_ref(T& t) { return t; }
template<class T>
inline T& get_ref(std::unique_ptr<T>& t) { return *t; }
template<class C>
uint64_t get_checksum(C const& data) {
uint64_t sum = 0;
for(uint8_t c : data)
sum += c;
return sum;
}
std::vector<char> read(IostreamTag, std::string const& path) {
size_t file_size = std::experimental::filesystem::file_size(path);
std::vector<char> data(file_size); // Avoid memory reallocations.
std::ifstream instream(path, std::ios::in | std::ios::binary);
instream.read(data.data(), file_size);
if(file_size != static_cast<size_t>(instream.tellg()))
throw;
return data;
}
std::unique_ptr<boost::iostreams::mapped_file> read(MmapTag, std::string const& path) {
return std::make_unique<boost::iostreams::mapped_file>(path, boost::iostreams::mapped_file::readwrite);
}
void write(IostreamTag, std::vector<char> const& data, std::string const& path) {
std::ofstream outstream(path, std::ios::out | std::ios::binary);
std::ostream_iterator<uint8_t> out_iterator(outstream);
std::copy(data.begin(), data.end(), out_iterator);
}
void write(MmapTag, boost::iostreams::mapped_file const& data, std::string const& path) {
boost::iostreams::mapped_file_params params;
params.path = path;
params.flags = boost::iostreams::mapped_file::readwrite;
params.new_file_size = data.size();
boost::iostreams::mapped_file f(params);
std::copy(data.begin(), data.end(), f.begin());
}
size_t print_file_data(std::string path) {
std::experimental::filesystem::path p(path);
std::cout << "File name: " << p.filename() << '\n';
size_t file_size = std::experimental::filesystem::file_size(p);
std::cout << "File size: " << file_size << '\n';
return file_size;
}
template<class Tag>
void benchmark(Tag t, std::string const& input_file, std::string const& output_directory, size_t file_size) {
std::cout << "\n*** Benchmarking method " << name(t) << " *** \n";
uint64_t read_speed_total = 0;
uint64_t checksums = 0;
uint32_t iterations = 11;
for(uint32_t i = 0; i < iterations; ++i) {
auto start = std::chrono::high_resolution_clock::now();
auto data = read(t, input_file);
checksums += get_checksum(get_ref(data));
auto end = std::chrono::high_resolution_clock::now();
auto c_start = std::chrono::duration_cast<std::chrono::microseconds>(start.time_since_epoch());
auto c_end = std::chrono::duration_cast<std::chrono::microseconds>(end.time_since_epoch());
if(i)
read_speed_total += static_cast<uint64_t>(c_end.count() - c_start.count());
}
double avg_read_time = read_speed_total / static_cast<double>((iterations - 1));
std::cout << "Checksums: " << checksums << '\n';
std::cout << "Average read time: " << avg_read_time << "us" << '\n';
std::cout << "Average read speed: " << (file_size / (1024 * 1024.)) / (avg_read_time / 1000000) << "MiB/s" << '\n';
std::experimental::filesystem::path in(input_file);
std::experimental::filesystem::path out(output_directory);
out = out / in.filename();
auto data = read(t, input_file);
uint64_t write_speed_total = 0;
for(uint32_t i = 0; i < iterations; ++i) {
auto start = std::chrono::high_resolution_clock::now();
std::experimental::filesystem::remove(out);
write(t, get_ref(data), out.string());
auto end = std::chrono::high_resolution_clock::now();
auto c_start = std::chrono::duration_cast<std::chrono::microseconds>(start.time_since_epoch());
auto c_end = std::chrono::duration_cast<std::chrono::microseconds>(end.time_since_epoch());
if(i)
write_speed_total += static_cast<uint64_t>(c_end.count() - c_start.count());
}
double avg_write_time = write_speed_total / static_cast<double>((iterations - 1));
std::cout << "Average write time: " << avg_write_time << "us" << '\n';
std::cout << "Average write speed: " << (file_size / (1024 * 1024.)) / (avg_read_time / 1000000) << "MiB/s" << '\n';
}
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cerr << "forgot input file and destination dir" << '\n';
return 1;
}
std::string input_file = argv[1];
std::string output_directory = argv[2];
std::cout << "Input file: " << input_file << '\n';
std::cout << "Output dir: " << output_directory << '\n';
size_t file_size = print_file_data(input_file);
benchmark(IostreamTag{}, input_file, output_directory, file_size);
benchmark(MmapTag{}, input_file, output_directory, file_size);
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.