简体   繁体   中英

C# Linq full outer join on repetitive values

I have two IQueryable collections having this kind of type

public class Property  
{  
   public string Name {get; set;}  
}

Collection 1, with the following Name values:

A  
A  
A  
B  

Collection 2, with the following Name values:

A  
B  
B

What I would like to get is a third collection having Name values from Collections 1 and 2 matched, and if there is no match, than null (empty) , so as follows:

Result Collection:  

A     A
A     null  
A     null  
B     B  
null  B

How is it possible to achieve this with C#, LINQ?

using System;
using System.Collections.Generic;
using System.Linq;    

namespace Testing
{
    public class Property
    {
        public string Name { get; set; }

        public override bool Equals(object obj)
        {
            var item = obj as Property;

            if (item == null)
            {
                return false;
            }
            return item.Name == Name;
        }

        public override int GetHashCode()
        {
            return Name.GetHashCode();
        }
    }

    public class JoinedProperty
    {
        public Property Name1 { get; set; }
        public Property Name2 { get; set; }

        public override string ToString()
        {
            return (Name1 == null ? "" : Name1.Name)
                + (Name2 == null ? "" : Name2.Name);
        }
    }

    class Program
    {
        static void Main(string[] args)
        {
            var list1 = new List<Property>
            {
                new Property{ Name = "A" },
                new Property{ Name = "A" },
                new Property{ Name = "A" },
                new Property{ Name = "B" }
            };

            var list2 = new List<Property>
            {
                new Property{ Name = "A" },
                new Property{ Name = "B" },
                new Property{ Name = "B" }
            };

            var allLetters = list1.Union(list2).Distinct().ToList();

            var result = new List<JoinedProperty>();

            foreach (var letter in allLetters)
            {
                var list1Count = list1.Count(l => l.Name == letter.Name);
                var list2Count = list2.Count(l => l.Name == letter.Name);

                var matchCount = Math.Min(list1Count, list2Count);

                addValuesToResult(result, letter, letter, matchCount);

                var difference = list1Count - list2Count;

                if(difference > 0)
                {
                    addValuesToResult(result, letter, null, difference);                   
                }
                else
                {
                    difference = difference * -1;
                    addValuesToResult(result,null, letter, difference);                   
                }
            }
            foreach(var res in result)
            {
                Console.WriteLine(res.ToString());
            }
            Console.ReadLine();                
        }

        private static void addValuesToResult(List<JoinedProperty> result, Property letter1, Property letter2, int count)
        {
            for (int i = 0; i < count; i++)
            {
                result.Add(new JoinedProperty
                {
                    Name1 = letter1,
                    Name2 = letter2
                });
            }
        }
    }
}

Run this and you get the result

AA
A
A
BB
B

and the contents of the result list are what you are after.

Edit: Updated my answer to use the Property that was specified.

Seems to be a lot of interest for this question so I have attempted to come up with a more generalised solution. I have taken inspiration from this link https://www.codeproject.com/Articles/488643/LinQ-Extended-Joins .

I've created a fullouterjoin extension method which does what the op asks for. Not sure if fullouterjoin is the right name though.

I have used my extension method to solve the ops problem.

using System;
using System.Collections.Generic;
using System.Linq;


namespace Testing
{


    public class Property
    {
        public string Name { get; set; }
    }

    public class JoinedProperty
    {
        public Property Name1 { get; set; }
        public Property Name2 { get; set; }

        public override string ToString()
        {
            return (Name1 == null ? "" : Name1.Name)
                + (Name2 == null ? "" : Name2.Name);
        }  
    }

    class Program
    {
        static void Main(string[] args)
        {
            var list1 = new List<Property>
        {
            new Property{ Name = "A" },
            new Property{ Name = "A" },
            new Property{ Name = "A" },
            new Property{ Name = "B" }
        };

            var list2 = new List<Property>
        {
            new Property{ Name = "A" },
            new Property{ Name = "B" },
            new Property{ Name = "B" }
        };



            var result = list1.FullOuterJoin(
                list2,
                p1 => p1.Name,
                p2 => p2.Name,
                (p1, p2) => new JoinedProperty
                {
                    Name1 = p1,
                    Name2 = p2
                }).ToList();


            foreach (var res in result)
            {
                Console.WriteLine(res.ToString());
            }
            Console.ReadLine();

        }

    }

    public static class MyExtensions
    {



        public static IEnumerable<TResult>
            FullOuterJoin<TSource, TInner, TKey, TResult>(this IEnumerable<TSource> source,
                                IEnumerable<TInner> inner,
                                Func<TSource, TKey> pk,
                                Func<TInner, TKey> fk,
                                Func<TSource, TInner, TResult> result)
            where TSource : class where TInner : class
        {

            var fullList = source.Select(s => new Tuple<TSource, TInner>(s, null))
                .Concat(inner.Select(i => new Tuple<TSource, TInner>(null, i)));


            var joinedList = new List<Tuple<TSource, TInner>>();

            foreach (var item in fullList)
            {
                var matchingItem = joinedList.FirstOrDefault
                    (
                        i => matches(i, item, pk, fk)
                    );

                if(matchingItem != null)
                {
                    joinedList.Remove(matchingItem);
                    joinedList.Add(combinedMatchingItems(item, matchingItem));
                }
                else
                {
                    joinedList.Add(item);
                }
            }
            return joinedList.Select(jl => result(jl.Item1, jl.Item2)).ToList();

        }

        private static Tuple<TSource, TInner> combinedMatchingItems<TSource, TInner>(Tuple<TSource, TInner> item1, Tuple<TSource, TInner> item2)
            where TSource : class
            where TInner : class
        {
            if(item1.Item1 == null && item2.Item2 == null && item1.Item2 != null && item2.Item1 !=null)
            {
                return new Tuple<TSource, TInner>(item2.Item1, item1.Item2);
            }

            if(item1.Item2 == null && item2.Item1 == null && item1.Item1 != null && item2.Item2 != null)
            {
                return new Tuple<TSource, TInner>(item1.Item1, item2.Item2);
            }

            throw new InvalidOperationException("2 items cannot be combined");
        }

        public static bool matches<TSource, TInner, TKey>(Tuple<TSource, TInner> item1, Tuple<TSource, TInner> item2, Func<TSource, TKey> pk, Func<TInner, TKey> fk)
            where TSource : class
            where TInner : class
        {          

            if (item1.Item1 != null && item1.Item2 == null && item2.Item2 != null && item2.Item1 == null && pk(item1.Item1).Equals(fk(item2.Item2)))
            {
                return true;
            }

            if (item1.Item2 != null && item1.Item1 == null && item2.Item1 != null && item2.Item2 == null && fk(item1.Item2).Equals(pk(item2.Item1)))
            {
                return true;
            }

            return false;

        }

    }
}

I think, it is the best option is just use loop ;

        var listA = new List<Property>
        {
            new Property{ Name = "A" },
            new Property{ Name = "A" },
            new Property{ Name = "A" },
            new Property{ Name = "B" }
        };
        var listB = new List<Property>
        {
            new Property{ Name = "A" },
            new Property{ Name = "B" },
            new Property{ Name = "B" }
        };
        var joinedList = new List<JoinedProperty>();
        for (int i = 0; i < listA.Count; i++)
        {
            var property = new JoinedProperty
            {
                AName = listA[i].Name,
                BName = null
            };
            if (listB.Count < i + 1)
            {
                continue;
            }
            if (listA[i].Name == listB[i].Name)
            {
                property.BName = listA[i].Name;
            }
            joinedList.Add(property);
        }
        for (int i = 0; i < listB.Count; i++)
        {
            var property = new JoinedProperty
            {
                AName = null,
                BName = listB[i].Name
            };
            if (listA.Count < i + 1)
            {
                continue;
            }
            if (listB[i].Name == listA[i].Name)
            {
                property.AName = listB[i].Name;
            }
            joinedList.Add(property);
        }

        public class JoinedProperty
        {
             public string AName { get; set; }
             public string BName { get; set; }
        }

Also, I think, your output example is missing an element;

null B

Output;

A     A
A     null  
A     null  
B     B  
null  B
null  B
public class Property
{
    public string Name { get; set; }
}

var list1 = new List<Property>
{
    new Property { Name ="A" },
    new Property { Name ="A" },
    new Property { Name ="A" },
    new Property { Name ="B" }
};

var list2 = new List<Property>
{
    new Property { Name ="A" },
    new Property { Name ="B" },
    new Property { Name ="B" }
};

var r = new List<string>();
int x1 = 0, x2 = 0;
int count1 = list1.Count, count2 = list2.Count;

while (true)
{
    if (x1 == count1 && x2 == count2) break;

    if (x1 < count1 && x2 == count2)
    {
        r.Add($"{list1[x1].Name}\tNULL");
        ++x1;
    }
    else if (x1 == count1 && x2 < count2)
    {
        r.Add($"NULL\t{list2[x2].Name}");
        ++x2;
    }
    else
    {
        if (list1[x1].Name == list2[x2].Name)
        {
            r.Add($"{list1[x1].Name}\t{list2[x2].Name}");
            ++x1; ++x2;
        }
        else
        {
            r.Add($"{list1[x1].Name}\tNULL");
            ++x1;
        }
    }
}

Explanation

The idea is in managing positions in lists - ie should we advance the position or not. The loop exits as soon as all positions were looked up.

You asked for a LINQ function, well there is none, but you could extend it, so it can be used for any two sequences you want this trick for.

All you have to do is write an extension function of IEnumerable similar as all other LINQ functions.

See Extension Methods Demystified

public static class MyEnumerableExtensions
{
    public IEnumerable<System.Tuple<T, T>> EqualityZip<T>(this IEnumerable<T> sourceA,
        IEnumerable<T> sourceB)
    {
        // TODO: check for parameters null

        var enumeratorA = sourceA.GetEnumerator();
        var enumeratorB = sourceB.GetEnumerator();

        // enumerate as long as we have elements in A and in B:
        bool aAvailable = enumeratorA.MoveNext();
        bool bAvailable = enumeratorB.MoveNext();
        while (aAvailable && bAvailable)
        {   // we have an A element and a B element
            T a = enumeratorA.Current;
            T b = enumeratorB.Current;

            // compare the two elements:
            if (a == b)
            {   // equal: return tuple (a, b)
                yield return Tuple.Create(a, b)
            }
            else
            {   // not equal, return (a, null)
                yield return Tuple.Create(a, (T)null)
            }

            // move to the next element
            aAvailable = enumeratorA.MoveNext();
            bAvailable = enumeratorB.MoveNext();
        }
        // now either we are out of A or out of B

        while (aAvailable)
        {   // we still have A but no B, return (A, null)
            T A = enumeratorA.Current;
            yield return Tuple.Create(A, (T)null);
            aAvailable = enumeratorA.MoveNext();
        }
        while (bAvailable)
        {   // we don't have A, but there are still B, return (null, B)
            T B = enumeratorB.Current;
            yield return Tuple.Create((T)null, B);
            bAvailable = enumeratorB.MoveNext();
        }

        // if there are still A elements without B element: return (a, null)
        while (enumaratorA.Nex
    }
}

Usage:

var sequenceA = ...
var sequenceB = ...
var result = sequenceA.EqualityZip(sequenceB);

TODO: make the function nicer, with the possibility to compare two different classes, KeySelectors to select the comparison key for A and for B and an IEqualityCompare:

public static IEnumerable<Tuple<TA, TB> EqualityZip<TA, TB, TKey>(
    this IEnumerable<TA> sourceA,   // the first sequence
    this IEnumerable<TB> sourceB,   // the second sequence
    Func<TA, TKey> keySelectorA,    // the property of sourceA to take
    Func<TB, TKey> keySelectorB,    // the property of sourceB to take
    IEqualityComparer<TKey> comparer)
{
    // TODO: ArgumentNullException if arguments null
    if (comparer==null) comparer = EqualityCompare<TKey>.Default;

     var enumeratorA = sourceA.GetEnumerator();
        var enumeratorB = sourceB.GetEnumerator();

        // enumerate as long as we have elements in A and in B:
        bool aAvailable = enumeratorA.MoveNext();
        bool bAvailable = enumeratorB.MoveNext();
        while (aAvailable && bAvailable)
        {   // we have an A element and a B element
            TKey keyA = keySelectorA(enumeratorA.Current);
            TKey keyB = keySelectorB(enumeratorB.Current);
            if (comparer.Equals(keyA, keyB)
            {
                yield return Tuple.Create(Ta, Tb)
            }
            else

etc.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM