SEGFAULT when using shared_ptr

Question

I'm trying to implement the Lazy Concurrent List-based Set in C++ by using shared_ptr . My reasoning is that unreachable nodes will be automatically freed by the last shared_ptr . As per my understanding, increment and decrement operation on a shared_ptr's reference count is atomic. Which means only the last shared_ptr with reference to the node should call delete/free for that node. I ran the program for multiple threads , but my program is crashing with the error double free called or just Segmentation Fault(SIGSEGV) . I don't understand how this is possible. Given below is my code for the implementation, with the method names signifying their intended operation.

#include<thread>
#include<iostream>
#include<mutex>
#include<climits>

using namespace std;

class Thread
{
   public:
      std::thread t;  
};
int n=50,ki=100,kd=100,kc=100;`/*no of threads, no of inserts,deletes & searches*/`


class Node
{
public:
      int key;
      shared_ptr<Node> next;
      bool marked;
      std::mutex nodeLock;

      Node() {
         key=0;
         next = nullptr;
         marked = false;
      }

      Node(int k) {
         key = k;
         next = nullptr;
         marked = false;
      }

      void lock() {
         nodeLock.lock();
      }

      void unlock() {
         nodeLock.unlock();
      }

      ~Node()
      {
      }
};

class List {
   shared_ptr<Node> head;
   shared_ptr<Node> tail;

public:

   bool validate(shared_ptr<Node> pred, shared_ptr<Node> curr) {
      return !(pred->marked) && !(curr->marked) && ((pred->next) == curr);
   }

   List() {
      head=make_shared<Node>(INT_MIN);
      tail=make_shared<Node>(INT_MAX);
      head->next=tail;
   }

   bool add(int key)
   {
      while(true)
      {
         /*shared_ptr<Node> pred = head;
         shared_ptr<Node> curr = pred->next;*/
        auto pred = head;
        auto curr = pred->next;

         while (key>(curr->key))
         {
            pred = curr;
            curr = curr->next;
         }

         pred->lock();
         curr->lock();

         if (validate(pred,curr))
         {
            if (curr->key == key)
            {
               curr->unlock();
               pred->unlock();
               return false;
            }
            else
            {
                shared_ptr<Node> newNode(new Node(key));
               //auto newNode = make_shared<Node>(key);
                //shared_ptr<Node> newNode = make_shared<Node>(key);
                newNode->next = curr;
                pred->next = newNode;
                curr->unlock();
                pred->unlock();
                return true;
            }
         }
         curr->unlock();
         pred->unlock();
      }
   }

   bool remove(int key)
   {
      while(true)
      {
         /*shared_ptr<Node> pred = head;
         shared_ptr<Node> curr = pred->next;*/

        auto pred = head;
        auto curr = pred->next;

         while (key>(curr->key))
         {
            pred = curr;
            curr = curr->next;
         }

         pred->lock();
         curr->lock();

         if (validate(pred,curr))
         {
            if (curr->key != key)
            {
               curr->unlock();
               pred->unlock();
               return false;
            }
            else
            {
               curr->marked = true;
               pred->next = curr->next;
               curr->unlock();
               pred->unlock();
               return true;
            }
         }
         curr->unlock();
         pred->unlock();
      }
   }

   bool contains(int key) {
      //shared_ptr<Node> curr = head->next;
    auto curr = head->next;

      while (key>(curr->key)) {
         curr = curr->next;
      }
      return curr->key == key && !curr->marked;
   }
}list;

void test(int curr)
{
   bool test;
    int time;

    int val, choice;
    int total,k=0;
    total=ki+kd+kc;

    int i=0,d=0,c=0;

    while(k<total)
    {
        choice = (rand()%3)+1;

        if(choice==1)
        {
            if(i<ki)
            {
                val = (rand()%99)+1;
                test = list.add(val);
                i++;
                k++;
            }
        }
        else if(choice==2)
        {
            if(d<kd)
            {
                val = (rand()%99)+1;
                test = list.remove(val);
                d++;
                k++;
            }
        }
        else if(choice==3)
        {
            if(c<kc)
            {
                val = (rand()%99)+1;
                test = list.contains(val);
                c++;
                k++;
            }
        }
    }
}

int main()
{
   int i;

   vector<Thread>thr(n);

   for(i=0;i<n;i++)
   {
      thr[i].t = thread(test,i+1);
   }
   for(i=0;i<n;i++)
   {
      thr[i].t.join();
   }
   return 0;
}

I'm not able to figure out what's wrong with the above code. The errors differ every time, some of which are just SEGFAULTS or

pure virtual method called
terminate called without an active exception
Aborted (core dumped)

Could you please point out what I'm doing wrong in the above code? And how to fix that error?
EDIT: Added a very crude test function which randomly calls the three list methods . Also, number of threads and number of each operations are declared globally. Crude programming, but it recreates the SEGFAULT .

Answer 1

The issue is that you're not using the atomic store and load operations for your shared_ptr s.

It is true that the reference count in the control block (to which each shared_ptr participating in ownership of a particular shared object has a pointer to) of a shared_ptr is atomic, however, the data members of the shared_ptr itself aren't.

Thus it is safe to have multiple threads each with their own shared_ptr to a shared object, but it is not save to have multiple threads access the same shared_ptr as soon as at least one of them is using a non-const member function, which is what you're doing when reassigning the next pointer.

Illustrating the problem

Let's look at a the (simplified and prettified) copy-constructor of libstdc++'s shared_ptr implementation:

shared_ptr(const shared_ptr& rhs)
 : m_ptr(rhs.m_ptr),
   m_refcount(rhs.m_refcount) 
{ }

Here m_ptr is just a raw pointer to the shared object, and m_refcount is a class that does the reference counting and also handles eventual deletion of the object m_ptr points to.

Just one example of what can go wrong: Assume that currently a thread is trying to figure out whether a particular key is contained in the list. It starts with the copy-initialization auto curr = head->next in List::contains . Just after it managed to initialize curr.m_ptr the OS scheduler decides this thread has to pause and schedules in another thread.

That other thread is removing the successor of head . Since the ref-count of head->next still is 1 (after all, the ref-count of head->next wasn't modified by thread 1 yet), when the second thread is done removing the node it is being deleted.

Then some time later the first thread continues. It completes the initialization of curr , but since m_ptr was already initialized before thread 2 started the deletion, it still points to the now deleted node. When trying to compare key > curr->key thread 1 will access invalid memory.

Using std::atomic_load and std::atomic_store to prevent the issue

std::atomic_load and std::atomic_store prevent the issue from occurring by locking a mutex before the call to the copy-constructor/copy-assignment-operator of the shared_ptr that is passed in by pointer. If all reads from and writes to shared_ptr s that are shared across multiple threads are through std::atomic_load / std::atomic_store resp. it can never happen that one thread has only modified m_ptr but not the reference count at the time another thread starts reading or modifying the same shared_ptr .

With the necessary atomic loads and stores the List member functions should read as follows:

bool validate(Node const& pred, Node const& curr) {
   return !(pred.marked) && !(curr.marked) && 
          (std::atomic_load(&pred.next).get() == &curr);
}

bool add(int key) {
    while (true) {
        auto pred = std::atomic_load(&head);
        auto curr = std::atomic_load(&pred->next);

        while (key > (curr->key)) {
            pred = std::move(curr);
            curr = std::atomic_load(&pred->next);
        }

        std::scoped_lock lock{pred->nodeLock, curr->nodeLock};
        if (validate(*pred, *curr)) {
            if (curr->key == key) {
                return false;
            } else {
                auto new_node = std::make_shared<Node>(key);

                new_node->next = std::move(curr);
                std::atomic_store(&pred->next, std::move(new_node));
                return true;
            }
        }
    }
}

bool remove(int key) {
    while (true) {
        auto pred = std::atomic_load(&head);
        auto curr = std::atomic_load(&pred->next);

        while (key > (curr->key)) {
            pred = std::move(curr);
            curr = std::atomic_load(&pred->next);
        }

        std::scoped_lock lock{pred->nodeLock, curr->nodeLock};
        if (validate(*pred, *curr)) {
            if (curr->key != key) {
                return false;
            } else {
                curr->marked = true;
                std::atomic_store(&pred->next, std::atomic_load(&curr->next));
                return true;
            }
        }
    }
}

bool contains(int key) {
    auto curr = std::atomic_load(&head->next);

    while (key > (curr->key)) {
        curr = std::atomic_load(&curr->next);
    }
    return curr->key == key && !curr->marked;
}

Additionally, you should also make Node::marked a std::atomic_bool .

SEGFAULT when using shared_ptr

Question

1 answers

solution1
5 2018-02-11 01:01:43

Illustrating the problem

Using std::atomic_load and std::atomic_store to prevent the issue

SEGFAULT when using shared_ptr

Question

1 answers

solution1 5 2018-02-11 01:01:43

Illustrating the problem

Using std::atomic_load and std::atomic_store to prevent the issue

solution1
5 2018-02-11 01:01:43