LINQ - Full Outer Join

I don't know if this covers all cases, logically it seems correct. The idea is to take a left outer join and right outer join then take the union of the results.

var firstNames = new[]
{
    new { ID = 1, Name = "John" },
    new { ID = 2, Name = "Sue" },
};
var lastNames = new[]
{
    new { ID = 1, Name = "Doe" },
    new { ID = 3, Name = "Smith" },
};
var leftOuterJoin =
    from first in firstNames
    join last in lastNames on first.ID equals last.ID into temp
    from last in temp.DefaultIfEmpty()
    select new
    {
        first.ID,
        FirstName = first.Name,
        LastName = last?.Name,
    };
var rightOuterJoin =
    from last in lastNames
    join first in firstNames on last.ID equals first.ID into temp
    from first in temp.DefaultIfEmpty()
    select new
    {
        last.ID,
        FirstName = first?.Name,
        LastName = last.Name,
    };
var fullOuterJoin = leftOuterJoin.Union(rightOuterJoin);

This works as written since it is in LINQ to Objects. If LINQ to SQL or other, the query processor might not support safe navigation or other operations. You'd have to use the conditional operator to conditionally get the values.

i.e.,

var leftOuterJoin =
    from first in firstNames
    join last in lastNames on first.ID equals last.ID into temp
    from last in temp.DefaultIfEmpty()
    select new
    {
        first.ID,
        FirstName = first.Name,
        LastName = last != null ? last.Name : default,
    };

I think there are problems with most of these, including the accepted answer, because they don't work well with Linq over IQueryable either due to doing too many server round trips and too much data returns, or doing too much client execution.

For IEnumerable I don't like Sehe's answer or similar because it has excessive memory use (a simple 10000000 two list test ran Linqpad out of memory on my 32GB machine).

Also, most of the others don't actually implement a proper Full Outer Join because they are using a Union with a Right Join instead of Concat with a Right Anti Semi Join, which not only eliminates the duplicate inner join rows from the result, but any proper duplicates that existed originally in the left or right data.

So here are my extensions that handle all of these issues, generate SQL as well as implementing the join in LINQ to SQL directly, executing on the server, and is faster and with less memory than others on Enumerables:

public static class Ext {
    public static IEnumerable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from left in leftItems
               join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
               from right in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from right in rightItems
               join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
               from left in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    public static IEnumerable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        var hashLK = new HashSet<TKey>(from l in leftItems select leftKeySelector(l));
        return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
    }

    public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector)  where TLeft : class {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;

    public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TRight), "c");
        var argLeft = Expression.PropertyOrField(parmP, "left");
        var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));

        return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
    }

    public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TLeft), "c");
        var argRight = Expression.PropertyOrField(parmP, "right");
        var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TResult>> CastSBody<TP, TResult>(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression<Func<TP, TResult>>)ex;

    public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLgR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
        var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
        var argRight = Expression.PropertyOrField(parmLgR, "right");
        var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }
}

The difference between a Right Anti-Semi-Join is mostly moot with Linq to Objects or in the source, but makes a difference on the server (SQL) side in the final answer, removing an unnecessary JOIN.

The hand coding of Expression to handle merging an Expression<Func<>> into a lambda could be improved with LinqKit, but it would be nice if the language/compiler had added some help for that. The FullOuterJoinDistinct and RightOuterJoin functions are included for completeness, but I did not re-implement FullOuterGroupJoin yet.

I wrote another version of a full outer join for IEnumerable for cases where the key is orderable, which is about 50% faster than combining the left outer join with the right anti semi join, at least on small collections. It goes through each collection after sorting just once.

I also added another answer for a version that works with EF by replacing the Invoke with a custom expansion.


Update 1: providing a truly generalized extension method FullOuterJoin
Update 2: optionally accepting a custom IEqualityComparer for the key type
Update 3: this implementation has recently become part of MoreLinq - Thanks guys!

Edit Added FullOuterGroupJoin (ideone). I reused the GetOuter<> implementation, making this a fraction less performant than it could be, but I'm aiming for 'highlevel' code, not bleeding-edge optimized, right now.

See it live on http://ideone.com/O36nWc

static void Main(string[] args)
{
    var ax = new[] { 
        new { id = 1, name = "John" },
        new { id = 2, name = "Sue" } };
    var bx = new[] { 
        new { id = 1, surname = "Doe" },
        new { id = 3, surname = "Smith" } };

    ax.FullOuterJoin(bx, a => a.id, b => b.id, (a, b, id) => new {a, b})
        .ToList().ForEach(Console.WriteLine);
}

Prints the output:

{ a = { id = 1, name = John }, b = { id = 1, surname = Doe } }
{ a = { id = 2, name = Sue }, b =  }
{ a = , b = { id = 3, surname = Smith } }

You could also supply defaults: http://ideone.com/kG4kqO

    ax.FullOuterJoin(
            bx, a => a.id, b => b.id, 
            (a, b, id) => new { a.name, b.surname },
            new { id = -1, name    = "(no firstname)" },
            new { id = -2, surname = "(no surname)" }
        )

Printing:

{ name = John, surname = Doe }
{ name = Sue, surname = (no surname) }
{ name = (no firstname), surname = Smith }

Explanation of terms used:

Joining is a term borrowed from relational database design:

  • A join will repeat elements from a as many times as there are elements in b with corresponding key (i.e.: nothing if b were empty). Database lingo calls this inner (equi)join.
  • An outer join includes elements from a for which no corresponding element exists in b. (i.e.: even results if b were empty). This is usually referred to as left join.
  • A full outer join includes records from a as well as b if no corresponding element exists in the other. (i.e. even results if a were empty)

Something not usually seen in RDBMS is a group join[1]:

  • A group join, does the same as described above, but instead of repeating elements from a for multiple corresponding b, it groups the records with corresponding keys. This is often more convenient when you wish to enumerate through 'joined' records, based on a common key.

See also GroupJoin which contains some general background explanations as well.


[1] (I believe Oracle and MSSQL have proprietary extensions for this)

Full code

A generalized 'drop-in' Extension class for this

internal static class MyExtensions
{
    internal static IEnumerable<TResult> FullOuterGroupJoin<TA, TB, TKey, TResult>(
        this IEnumerable<TA> a,
        IEnumerable<TB> b,
        Func<TA, TKey> selectKeyA, 
        Func<TB, TKey> selectKeyB,
        Func<IEnumerable<TA>, IEnumerable<TB>, TKey, TResult> projection,
        IEqualityComparer<TKey> cmp = null)
    {
        cmp = cmp?? EqualityComparer<TKey>.Default;
        var alookup = a.ToLookup(selectKeyA, cmp);
        var blookup = b.ToLookup(selectKeyB, cmp);

        var keys = new HashSet<TKey>(alookup.Select(p => p.Key), cmp);
        keys.UnionWith(blookup.Select(p => p.Key));

        var join = from key in keys
                   let xa = alookup[key]
                   let xb = blookup[key]
                   select projection(xa, xb, key);

        return join;
    }

    internal static IEnumerable<TResult> FullOuterJoin<TA, TB, TKey, TResult>(
        this IEnumerable<TA> a,
        IEnumerable<TB> b,
        Func<TA, TKey> selectKeyA, 
        Func<TB, TKey> selectKeyB,
        Func<TA, TB, TKey, TResult> projection,
        TA defaultA = default(TA), 
        TB defaultB = default(TB),
        IEqualityComparer<TKey> cmp = null)
    {
        cmp = cmp?? EqualityComparer<TKey>.Default;
        var alookup = a.ToLookup(selectKeyA, cmp);
        var blookup = b.ToLookup(selectKeyB, cmp);

        var keys = new HashSet<TKey>(alookup.Select(p => p.Key), cmp);
        keys.UnionWith(blookup.Select(p => p.Key));

        var join = from key in keys
                   from xa in alookup[key].DefaultIfEmpty(defaultA)
                   from xb in blookup[key].DefaultIfEmpty(defaultB)
                   select projection(xa, xb, key);

        return join;
    }
}