简体   繁体   中英

thread_group with boost:asio

I have written of small program which has following purpose

  1. it reads directory and each path need to assign for each thread which determine size of direcotry
  2. /home/dir1
  3. /home/dir2 I did reference from this question.

Using Boost threads and io_service to create a threadpool

I am seeing it more slow than single thread application. Any mistake.

#include<boost/tokenizer.hpp>
#include<boost/asio.hpp>
#include <boost/bind/bind.hpp>
#include "scan_dir.h"  //local file
using namespace std::chrono;

/*
 * Process directory fucntion
 * Input : Project path, maxdepth
 */
void process_dir(const std::string &proj, uint64_t &count, std::vector<std::string> &dirs) {
    std::cout<<"Creating Directory"<<std::endl;
    //dirs  = Util::get_top_dir_depth(proj, 0);
    dirs  = Util::traverse_dir(proj, 1);
    count = dirs.size();
}

int main(int argc, char *argv[]) {
    po::options_description desc("DiskAnalyzer Tool");
    po::variables_map vm;
    std::string user, proj;
    uint64_t f_size, maxdepth=0, dir_size=0;
    bool show_dir;

    Dirs d;
    
    desc.add_options()
        ("help,h", "DiskAnalyzer option")
        ("proj,p", po::value<string>(),"provide directory path which you would like to search data")
        ("user,u", po::value<string>(), "display file which is associated/Owner with user")
        ("dirsize,ds", po::value<uint64_t>()->default_value(1000000), "display dir which dir_size>=size by default 1000000 Byte:1MB")
        ("showdir,sh", po::value<bool>()->default_value(false), "show only dir which is associated with user")
        ("maxdepth", po::value<uint64_t>()->default_value(5), "show only dir which is associated with user")
        ("filesize,fs", po::value<uint64_t>()->default_value(10000), "display file which file_size>=size by default 10000 Byte:10KB");
    
    try {
        po::store(po::parse_command_line(argc, argv, desc), vm);
        po::notify(vm);
    }catch(const std::exception &err) {
        std::cerr<<err.what()<<std::endl;
        std::cout<<desc<<std::endl;
    }
    catch(...) {
        std::cout<<"Unkown exception"<<std::endl;
    }
    if(vm.count("help")) {
        std::cout<<"scan -p <proj_name> -u <user_name> -maxdepth <maxdepth> -fs <file_size> -d <debug>\n\n";
        std::cout<<desc<<std::endl;
        return 1;
    }
    if(vm.count("user")){
        user = vm["user"].as<string>();
    }
    if(vm.count("proj")){
        proj = vm["proj"].as<string>();
    }
    if(vm.count("filesize")){
        f_size = vm["filesize"].as<uint64_t>();
    }
    if(vm.count("showdir")) {
        show_dir = vm["showdir"].as<bool>();
    }
    if(vm.count("dirsize")) {
        dir_size = vm["dirsize"].as<uint64_t>();
    }
    if(vm.count("maxdepth")){
        maxdepth = vm["maxdepth"].as<uint64_t>();
    }
    
    if(show_dir) {
        d.scan_dir_name(proj, user, dir_size, maxdepth);
        return 0;
    } else {
        uint64_t count = 0;
        std::vector<std::string> dir;
        process_dir(proj, count, dir);
        std::cout<<"createing database["<<proj<<"   "<<count<<"  ]"<<std::endl;
        std::string db_name = Command::basename(proj);
        DataBase db[count];
        for (uint64_t i = 0; i<count; i++){
            db[i].set_db_name("DiskAnalyzer_"+ std::to_string(i)+"_" +db_name);
            if(!db[i].prepare_db()){
                std::cerr<<"[Error] DataBase operation failed"<<std::endl;
                return 1;
            }
        }
        std::size_t max_thread = dir.size() > 1000 ? 1000 : dir.size();
        //max_thread = 10;
        std::cout<<dir.size()<<std::endl;
        //contain directory information
        while(dir.size()){
            std::size_t dir_traverse = 0, db_count = 0;
            boost::asio::io_service io_service;
            boost::asio::io_service::work work(io_service);
            boost::thread_group threads;
            for (std::size_t i = 0; i < max_thread; ++i)
                threads.create_thread(boost::bind(&boost::asio::io_service::run, &io_service));
            
            for(auto it = dir.begin(); it != dir.end() && dir_traverse <max_thread; ++it){
                if(db_count>=count)
                    db_count = 0;
                try {
                    //this function determine determine size of directory. I had expectation
                    // each directory will go each thread
                    io_service.post(boost::bind(&Dirs::scan_dir, boost::ref(d), *it, db[db_count], user));
                } catch(...) {
                    std::cerr<<"got error"<<std::endl;
                    continue;
                }
                dir_traverse++;
                //dir_traverse = dir_traverse + max_thread;
                db_count++;
                //boost::this_thread::sleep(boost::posix_time::seconds(1));
            }
            io_service.stop();
            threads.join_all();
            dir.erase(dir.begin(), dir.begin()+dir_traverse);
            std::cout<<" [Remaining Processing dir cout  "<<dir.size()<<std::endl;
        }
        return 0;
    }
    std::cout<<desc<<std::endl;
    return 0;
}

You're stopping the entire pool each time around the top level loop.

Not only does that incur a lot of overhead in recreating and destroying the threads.

It also completely negates the concurrency goal by doing a stop . The stop forces the execution context to stop even though tasks weren't completed . In fact, no tasks may have started at all before the threads are joined.

The natural fix would be to take the pool out of the loop and simply join it at the end (without stop -ping the service, so all tasks complete). However, in your case it requires more changes, because the tasks post -ed take references to locals in the loop.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM