简体   繁体   中英

How to implement ConcurrentHashSet in .Net

I am trying to implement a ConcurrentHashSet in the spirit of ConcurrentDictionary, approach taken is to use a internal backing ConcurrentDictionary and write small delegating methods, this is how far i got, but well the set theoretic methods are I am stuck on, esp. I am not sure if I can use a foreach and still not violate concurrency

public class ConcurrentHashSet<TElement> : ISet<TElement>
{
    private readonly ConcurrentDictionary<TElement, object> _internal;

    public ConcurrentHashSet(IEnumerable<TElement> elements = null)
    {
        _internal = new ConcurrentDictionary<TElement, object>();
        if (elements != null)
            UnionWith(elements);
    }

    public void UnionWith(IEnumerable<TElement> other)
    {
        if (other == null) throw new ArgumentNullException("other");

        foreach (var otherElement in other)
            Add(otherElement);
    }

    public void IntersectWith(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public void ExceptWith(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public void SymmetricExceptWith(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public bool IsSubsetOf(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public bool IsSupersetOf(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public bool IsProperSupersetOf(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public bool IsProperSubsetOf(IEnumerable<TElement> other)
    {
        throw new NotImplementedException();
    }

    public bool Overlaps(IEnumerable<TElement> other)
    {
        return other.Any(otherElement => _internal.ContainsKey(otherElement));
    }

    public bool SetEquals(IEnumerable<TElement> other)
    {
        int otherCount = 0;
        int thisCount = Count;
        foreach (var otherElement in other)
        {
            otherCount++;
            if (!_internal.ContainsKey(otherElement))
                return false;
        }
        return otherCount == thisCount;
    }

    public bool Add(TElement item)
    {
        return _internal.TryAdd(item, null);
    }

    public void Clear()
    {
        _internal.Clear();
    }

    // I am not sure here if that fullfills contract correctly
    void ICollection<TElement>.Add(TElement item)
    {
        Add(item);
    }

    public bool Contains(TElement item)
    {
        return _internal.ContainsKey(item);
    }

    public void CopyTo(TElement[] array, int arrayIndex)
    {
        _internal.Keys.CopyTo(array, arrayIndex);
    }

    public bool Remove(TElement item)
    {
        object ignore;
        return _internal.TryRemove(item, out ignore);
    }

    public int Count
    {
        get { return _internal.Count; }
    }

    public bool IsReadOnly
    {
        get { return false; }
    }

    public IEnumerator<TElement> GetEnumerator()
    {
        return _internal.Keys.GetEnumerator();
    }

    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
}

I just ran into a similar scenario ("I am interested in a fast Add and Contains and Remove") and implemented this sucker:

using System.Collections.Generic;
using System.Threading;

namespace BlahBlah.Utilities
{
    public class ConcurrentHashSet<T> : IDisposable
    {
        private readonly ReaderWriterLockSlim _lock = new ReaderWriterLockSlim(LockRecursionPolicy.SupportsRecursion);
        private readonly HashSet<T> _hashSet = new HashSet<T>();

        #region Implementation of ICollection<T> ...ish
        public bool Add(T item)
        {
            try
            {
                _lock.EnterWriteLock();
                return _hashSet.Add(item);
            }
            finally
            {
                if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
            }
        }

        public void Clear()
        {
            try
            {
                _lock.EnterWriteLock();
                _hashSet.Clear();
            }
            finally
            {
                if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
            }
        }

        public bool Contains(T item)
        {
            try
            {
                _lock.EnterReadLock();
                return _hashSet.Contains(item);
            }
            finally
            {
                if (_lock.IsReadLockHeld) _lock.ExitReadLock();
            }
        }

        public bool Remove(T item)
        {
            try
            {
                _lock.EnterWriteLock();
                return _hashSet.Remove(item);
            }
            finally
            {
                if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
            }
        }

        public int Count
        {
            get
            {
                try
                {
                    _lock.EnterReadLock();
                    return _hashSet.Count;
                }
                finally
                {
                    if (_lock.IsReadLockHeld) _lock.ExitReadLock();
                }
            }
        }
        #endregion

        #region Dispose
        public void Dispose()
        {
            if (_lock != null) _lock.Dispose();
        }
        #endregion
    }
}

Haven't really tested it (performance- or reliability-wise). YMMV.

Here's an implementation of a concurrent set based on the ConcurrentDictionary :

public class ConcurrentSet<T> : IEnumerable<T>, ISet<T>, ICollection<T>
{
    private readonly ConcurrentDictionary<T, byte> _dictionary = new ConcurrentDictionary<T, byte>();

    /// <summary>
    /// Returns an enumerator that iterates through the collection.
    /// </summary>
    /// <returns>
    /// A <see cref="T:System.Collections.Generic.IEnumerator`1"/> that can be used to iterate through the collection.
    /// </returns>
    public IEnumerator<T> GetEnumerator()
    {
        return _dictionary.Keys.GetEnumerator();
    }

    /// <summary>
    /// Returns an enumerator that iterates through a collection.
    /// </summary>
    /// <returns>
    /// An <see cref="T:System.Collections.IEnumerator"/> object that can be used to iterate through the collection.
    /// </returns>
    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }

    /// <summary>
    /// Removes the first occurrence of a specific object from the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <returns>
    /// true if <paramref name="item"/> was successfully removed from the <see cref="T:System.Collections.Generic.ICollection`1"/>; otherwise, false. This method also returns false if <paramref name="item"/> is not found in the original <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </returns>
    /// <param name="item">The object to remove from the <see cref="T:System.Collections.Generic.ICollection`1"/>.</param><exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.</exception>
    public bool Remove(T item)
    {
        return TryRemove(item);
    }

    /// <summary>
    /// Gets the number of elements in the set.
    /// </summary>
    public int Count
    {
        get { return _dictionary.Count; }
    }

    /// <summary>
    /// Gets a value indicating whether the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.
    /// </summary>
    /// <returns>
    /// true if the <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only; otherwise, false.
    /// </returns>
    public bool IsReadOnly { get { return false; } }

    /// <summary>
    /// Gets a value that indicates if the set is empty.
    /// </summary>
    public bool IsEmpty
    {
        get { return _dictionary.IsEmpty; }
    }

    public ICollection<T> Values
    {
        get { return _dictionary.Keys; }
    }

    /// <summary>
    /// Adds an item to the <see cref="T:System.Collections.Generic.ICollection`1"/>.
    /// </summary>
    /// <param name="item">The object to add to the <see cref="T:System.Collections.Generic.ICollection`1"/>.</param><exception cref="T:System.NotSupportedException">The <see cref="T:System.Collections.Generic.ICollection`1"/> is read-only.</exception>
    void ICollection<T>.Add(T item)
    {
        if(!Add(item))
            throw new ArgumentException("Item already exists in set.");
    }

    /// <summary>
    /// Modifies the current set so that it contains all elements that are present in both the current set and in the specified collection.
    /// </summary>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public void UnionWith(IEnumerable<T> other)
    {
        foreach (var item in other)
            TryAdd(item);
    }

    /// <summary>
    /// Modifies the current set so that it contains only elements that are also in a specified collection.
    /// </summary>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public void IntersectWith(IEnumerable<T> other)
    {
        var enumerable = other as IList<T> ?? other.ToArray();
        foreach (var item in this)
        {
            if (!enumerable.Contains(item))
                TryRemove(item);
        }
    }

    /// <summary>
    /// Removes all elements in the specified collection from the current set.
    /// </summary>
    /// <param name="other">The collection of items to remove from the set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public void ExceptWith(IEnumerable<T> other)
    {
        foreach (var item in other)
            TryRemove(item);
    }

    /// <summary>
    /// Modifies the current set so that it contains only elements that are present either in the current set or in the specified collection, but not both. 
    /// </summary>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public void SymmetricExceptWith(IEnumerable<T> other)
    {
        throw new NotImplementedException();
    }

    /// <summary>
    /// Determines whether a set is a subset of a specified collection.
    /// </summary>
    /// <returns>
    /// true if the current set is a subset of <paramref name="other"/>; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool IsSubsetOf(IEnumerable<T> other)
    {
        var enumerable = other as IList<T> ?? other.ToArray();
        return this.AsParallel().All(enumerable.Contains);
    }

    /// <summary>
    /// Determines whether the current set is a superset of a specified collection.
    /// </summary>
    /// <returns>
    /// true if the current set is a superset of <paramref name="other"/>; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool IsSupersetOf(IEnumerable<T> other)
    {
        return other.AsParallel().All(Contains);
    }

    /// <summary>
    /// Determines whether the current set is a correct superset of a specified collection.
    /// </summary>
    /// <returns>
    /// true if the <see cref="T:System.Collections.Generic.ISet`1"/> object is a correct superset of <paramref name="other"/>; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set. </param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool IsProperSupersetOf(IEnumerable<T> other)
    {
        var enumerable = other as IList<T> ?? other.ToArray();
        return this.Count != enumerable.Count && IsSupersetOf(enumerable);
    }

    /// <summary>
    /// Determines whether the current set is a property (strict) subset of a specified collection.
    /// </summary>
    /// <returns>
    /// true if the current set is a correct subset of <paramref name="other"/>; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool IsProperSubsetOf(IEnumerable<T> other)
    {
        var enumerable = other as IList<T> ?? other.ToArray();
        return Count != enumerable.Count && IsSubsetOf(enumerable);
    }

    /// <summary>
    /// Determines whether the current set overlaps with the specified collection.
    /// </summary>
    /// <returns>
    /// true if the current set and <paramref name="other"/> share at least one common element; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool Overlaps(IEnumerable<T> other)
    {
        return other.AsParallel().Any(Contains);
    }

    /// <summary>
    /// Determines whether the current set and the specified collection contain the same elements.
    /// </summary>
    /// <returns>
    /// true if the current set is equal to <paramref name="other"/>; otherwise, false.
    /// </returns>
    /// <param name="other">The collection to compare to the current set.</param><exception cref="T:System.ArgumentNullException"><paramref name="other"/> is null.</exception>
    public bool SetEquals(IEnumerable<T> other)
    {
        var enumerable = other as IList<T> ?? other.ToArray();
        return Count == enumerable.Count && enumerable.AsParallel().All(Contains);
    }

    /// <summary>
    /// Adds an element to the current set and returns a value to indicate if the element was successfully added. 
    /// </summary>
    /// <returns>
    /// true if the element is added to the set; false if the element is already in the set.
    /// </returns>
    /// <param name="item">The element to add to the set.</param>
    public bool Add(T item)
    {
        return TryAdd(item);
    }

    public void Clear()
    {
        _dictionary.Clear();
    }

    public bool Contains(T item)
    {
        return _dictionary.ContainsKey(item);
    }

    /// <summary>
    /// Copies the elements of the <see cref="T:System.Collections.Generic.ICollection`1"/> to an <see cref="T:System.Array"/>, starting at a particular <see cref="T:System.Array"/> index.
    /// </summary>
    /// <param name="array">The one-dimensional <see cref="T:System.Array"/> that is the destination of the elements copied from <see cref="T:System.Collections.Generic.ICollection`1"/>. The <see cref="T:System.Array"/> must have zero-based indexing.</param><param name="arrayIndex">The zero-based index in <paramref name="array"/> at which copying begins.</param><exception cref="T:System.ArgumentNullException"><paramref name="array"/> is null.</exception><exception cref="T:System.ArgumentOutOfRangeException"><paramref name="arrayIndex"/> is less than 0.</exception><exception cref="T:System.ArgumentException"><paramref name="array"/> is multidimensional.-or-The number of elements in the source <see cref="T:System.Collections.Generic.ICollection`1"/> is greater than the available space from <paramref name="arrayIndex"/> to the end of the destination <paramref name="array"/>.-or-Type <paramref name="T"/> cannot be cast automatically to the type of the destination <paramref name="array"/>.</exception>
    public void CopyTo(T[] array, int arrayIndex)
    {
        Values.CopyTo(array, arrayIndex);
    }

    public T[] ToArray()
    {
        return _dictionary.Keys.ToArray();
    }

    public bool TryAdd(T item)
    {
        return _dictionary.TryAdd(item, default(byte));
    }

    public bool TryRemove(T item)
    {
        byte donotcare;
        return _dictionary.TryRemove(item, out donotcare);
    }
}

ConcurrentDictionary has better performance characteristics as it uses a lockfree manner for reads (at least in .NET 4.0+). So for performance in heavy mulithreaded scenarios a ConcurrentDictionary will probably perform better as a readerwriterlockslim wrapper. But you need to carry around an empty byte as dummyvalue (I agree, that looks awful).

What do you intend to use it for?

Even if you do get the methods to work, you could have the following scenario:

var set1 = new ConcurrentHashSet<int>();
...

if (set1.Overlaps(set2))
{
    set1.IntersectWith(set2);
    assert(! set1.IsEmpty());    // might fail
}

That may be acceptable but a Set just is much less likely to be useful in a concurrent setting than a Queue.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM