简体   繁体   中英

Non recursive Kosaraju's two pass algorithm implementation taking forever to execute on a large data set

  • I coded this for an assignment which has passed its deadline.

  • This implementation works completely fine with various smaller test cases and displays the sizes of the 5 largest Strongly Connected Components in the graph as it should.

  • But seems to execute forever when i run it on the assignment data set of about 875714 vertices. (Doesn't even come out of the first DFS pass after 60mins)

  • I've used the non recursive stack implementation of the DFS routine as i heard that the large number of vertices was causing recursion stack overflow problems.

  • It would be really helpful if anyone could point out, what in this code is making it behave this way with the large dataset.

  • The input file consists of list of edges in the graph. one edge/line.

(eg):

1 2

2 3

3 1

3 4

5 4

Download link for the Large graph test case zip file

Link to my program file

Code follows:

//Macro definitions and Global variables

#define N 875714
#define all(a) (a).begin(), (a).end()
#define tr(c,i) for(typeof((c).begin()) i = (c).begin(); i != (c).end(); i++)

vi v(N), ft, size;

//Non recursive DFS algorithm

void DFS(vvi g, int s, int flag)
{
stack<int> stk;
stk.push(s);
v[s] = 1;

int jumpOut, count;
vi::iterator i;

if(flag == 2)
     count = 1;

while(!stk.empty())
{
i = g[stk.top()].begin();
jumpOut = 0;

for(; i != g[stk.top()].end(); i++)
{
    if(v[*i] != 1)
    {
        stk.push(*i);
        v[*i] = 1;

        if(flag == 2) //Count the SCC size
            count++;

        jumpOut = 1; //Jump to the while loop's beginning
        break;
    }
 }

 if(flag == 1 && jumpOut == 0) //Record the finishing time order of vertices
    ft.push_back(stk.top());

 if(jumpOut == 0)
      stk.pop();
}

if(flag == 2)
    size.push_back(count); //Store the SCC size
}

// The 2 pass Kosaraju algorithm

void kosaraju(vvi g, vvi gr)
{
cout<<"\nInside pass 1\n";

for(int i = N - 1; i >= 0; i--)
    if(v[i] != 1)
        DFS(gr, i, 1);

cout<<"\nPass 1 completed\n";

fill(all(v), 0);

cout<<"\nInside pass 2\n";

for(int i = N - 1; i >= 0; i--)
    if(v[ ft[i] ] != 1)
        DFS(g, ft[i], 2);

cout<<"\nPass 2 completed\n";
}

.

int main()
{
vvi g(N), gr(N);
ifstream file("/home/tauseef/Desktop/DAA/SCC.txt");
int first, second;
string line;

while(getline(file,line,'\n')) //Reading from file
{
    stringstream ss(line);
    ss >> first;
    ss >> second;
    if(first == second) //Eliminating self loops
        continue;

    g[first-1].push_back(second-1); //Creating G & Grev
    gr[second-1].push_back(first-1);
}

cout<<"\nfile read successfully\n";

kosaraju(g, gr);

cout<<"\nFinishing order is: ";
tr(ft, j)
    cout<<*j+1<<" ";
cout<<"\n";

sort(size.rbegin(), size.rend()); //Sorting the SCC sizes in descending order

cout<<"\nThe largest 5 SCCs are: ";
tr(size, j)
    cout<<*j<<" ";
cout<<"\n";

file.close();
}

There are several improvements that you can apply:
1- cin is not as fast scanf for large inputs : Because your input file is huge you better use scanf to read your data.
2- It is not a good idea to pass large data to functions by value : You have two huge graphs in your code that you pass them to functions by value. It takes a lot of time because every time you are making a copy of the data.
3- There is no need to use iterator for traversing a vector : Because you are using a vector and you have random access to it via [] operator there is no need to use iterator to access data.
4- Your DFS is not efficient : This is the most important one. Every time the program go to the beginning of the while and check the adjacency list of the element on top of the stack you start from the beginning and check elements. This make the algorithm very inefficient because you are checking some things over and over again. You can simply store how many of the children you have checked and when you go back to this element you start from the next element instead of starting from start.

#include<iostream>
#include<vector>
#include<stack>
#include<algorithm>
#include<fstream>
#include<string>
#include<sstream>
using namespace std;

typedef vector<int> vi;
typedef vector<vi> vvi;

#define N 875714
#define sz(a) int((a).size())
#define all(a) (a).begin(), (a).end()
#define tr(c,i) for(typeof((c).begin()) i = (c).begin(); i != (c).end(); i++)

vi v(N), ft, size;
vi childsVisited(N);

void DFS(vvi &g, int s, int flag)
{
    stack<int> stk;
    stk.push(s);
    v[s] = 1;

    int jumpOut, count;

    if(flag == 2)
        count = 1;
    int counter = 0;
    while(!stk.empty())
    {
        jumpOut = 0;
        int cur = stk.top();
        for ( ;childsVisited[cur] < g[cur].size(); ++childsVisited[cur] )
        //for ( int i=0; i< g[cur].size(); ++i )
        //for(; i != g[stk.top()].end(); i++)
        {
            int i = childsVisited[cur];
            int next = g[cur][i];
            if(v[next] != 1)
            {
                stk.push(next);
                v[next] = 1;
                if(flag == 2) //Count the SCC size
                    count++;

                jumpOut = 1; //Jump to the while loop's beginning
                break;
            }
        }

        if(flag == 1 && jumpOut == 0) //Record the finishing time order of vertices
            ft.push_back(stk.top());

        if(jumpOut == 0)
            stk.pop();
    }

    if(flag == 2)
        size.push_back(count); //Store the SCC size
}

void kosaraju(vvi &g, vvi &gr)
{
    cout<<"\nInside pass 1\n";

    for(int i = N - 1; i >= 0; i--)
        if(v[i] != 1)
            DFS(gr, i, 1);

    cout<<"\nPass 1 completed\n";

    fill(all(v), 0);
    fill(all(childsVisited), 0);

    cout<<"\nInside pass 2\n";

    for(int i = N - 1; i >= 0; i--)
        if(v[ ft[i] ] != 1)
            DFS(g, ft[i], 2);

    cout<<"\nPass 2 completed\n";
}

int main()
{
    freopen("input.txt","r",stdin);
    vvi g(N), gr(N);
    //ifstream file("/home/tauseef/Desktop/DAA/SCC.txt");
    int first, second;
    //string line;
    unsigned long int cnt = 0;

    //while(getline(file,line,'\n')) //Reading from file
    //{
        //stringstream ss(line);
        //ss >> first;
        //ss >> second;
        //if(first == second) //Eliminating self loops
            //continue;
    for ( int i = 0; i < 5105043; ++i ){
        int first, second;
        scanf("%d %d",&first,&second);
        g[first-1].push_back(second-1); //Creating G & Grev
        gr[second-1].push_back(first-1);
    }
        //cnt++;
    //}

    cout<<"\nfile read successfully\n";


    kosaraju(g, gr);

    cout<<"\nFinishing order is: ";

    sort(size.rbegin(), size.rend()); //Sorting the SCC sizes in descending order

    cout<<"\nThe largest 5 SCCs are: ";

}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM