Benchmarkdotnet: Why is a foreach loop on an array slower than a LINQ call?

Created on 26 Dec 2017  路  13Comments  路  Source: dotnet/BenchmarkDotNet

Consider the following Benchmark class:

[CoreJob, ClrJob]
[MemoryDiagnoser]
public class IsSubsetOfGist
{
    [Params(10, 100, 1000, 10_000, 100_000, 1_000_000)]
    public int NumberOfItems;

    public int[] SubsetArray;
    public int[] SupersetArray;

    [GlobalSetup]
    public void GlobalArraySetup()
    {
        SupersetArray = new int[NumberOfItems];
        for (var i = 0; i < NumberOfItems; i++)
        {
            SupersetArray[i] = i + 1;
        }

        SubsetArray = new int[5];
        for (var i = 0; i < 5; i++)
        {
            SubsetArray[i] = SupersetArray[SupersetArray.Length - 1 - i];
        }
    }

    [Benchmark(Baseline = true)]
    public bool ArrayWithForLoops() => IsSubsetOfWithForLoops(SubsetArray, SupersetArray);

    private static bool IsSubsetOfWithForLoops<T>(T[] array, T[] superset, IEqualityComparer<T> comparer = null)
    {
        comparer = comparer ?? EqualityComparer<T>.Default;

        for (var i = 0; i < array.Length; i++)
        {
            var currentItem = array[i];
            var wasItemFound = false;
            for (var j = 0; j < superset.Length; j++)
            {
                if (!comparer.Equals(superset[i], currentItem))
                    continue;

                wasItemFound = true;
                break;
            }

            if (!wasItemFound)
                return false;
        }

        return true;
    }

    [Benchmark]
    public bool ArrayWithLinq() => SubsetArray.All(SupersetArray.Contains);

    [Benchmark]
    public bool ArrayWithForeachLoops() => IsSubsetOfWithForeachLoops(SubsetArray, SupersetArray);

    private static bool IsSubsetOfWithForeachLoops<T>(T[] set, T[] superset, IEqualityComparer<T> comparer = null)
    {
        comparer = comparer ?? EqualityComparer<T>.Default;
        foreach (var itemFromSet in set)
        {
            var wasItemFound = false;
            foreach (var itemFromSuperset in superset)
            {
                if (comparer.Equals(itemFromSet, itemFromSuperset))
                {
                    wasItemFound = true;
                    break;
                }
            }

            if (!wasItemFound)
                return false;
        }

        return true;
    }
}

In these benchmarks, I test different ways to determine if a collection is a subset of another collection (without checking for duplicate items). SubsetArray always contains the last 5 items of SupersetArray in reverse order (to mimic the worst case scenario from the number of loop runs). I expected that the versions with the for loops and foreach loops are faster than the call to the LINQ methods Enumerable.All and Enumerable.Contains, however they are not:


BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437500 Hz, Resolution=410.2564 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT


| Method | Job | Runtime | NumberOfItems | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
|---------------------- |----- |-------- |-------------- |-----------------:|---------------:|---------------:|-------:|---------:|-------:|----------:|
| ArrayWithForLoops | Clr | Clr | 10 | 38.97 ns | 0.1858 ns | 0.1647 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10 | 321.30 ns | 1.0796 ns | 1.0098 ns | 8.24 | 0.04 | 0.0606 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 10 | 131.90 ns | 0.4581 ns | 0.4285 ns | 3.38 | 0.02 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10 | 31.81 ns | 0.0935 ns | 0.0875 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10 | 276.83 ns | 1.9921 ns | 1.8634 ns | 8.70 | 0.06 | 0.0606 | 96 B |
| ArrayWithForeachLoops | Core | Core | 10 | 125.05 ns | 0.4460 ns | 0.4172 ns | 3.93 | 0.02 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100 | 305.43 ns | 1.8793 ns | 1.7579 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100 | 689.30 ns | 3.1709 ns | 2.9661 ns | 2.26 | 0.02 | 0.0601 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 100 | 1,476.23 ns | 12.7996 ns | 11.9728 ns | 4.83 | 0.05 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100 | 298.21 ns | 1.4164 ns | 1.2556 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100 | 680.31 ns | 4.7340 ns | 4.4282 ns | 2.28 | 0.02 | 0.0601 | 96 B |
| ArrayWithForeachLoops | Core | Core | 100 | 1,442.54 ns | 3.3149 ns | 3.1008 ns | 4.84 | 0.02 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000 | 2,900.18 ns | 12.7123 ns | 11.2691 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000 | 3,352.68 ns | 22.9503 ns | 19.1645 ns | 1.16 | 0.01 | 0.0572 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 1000 | 14,304.57 ns | 59.9615 ns | 56.0880 ns | 4.93 | 0.03 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000 | 2,862.33 ns | 12.5162 ns | 11.0953 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000 | 3,892.44 ns | 12.1110 ns | 11.3287 ns | 1.36 | 0.01 | 0.0534 | 96 B |
| ArrayWithForeachLoops | Core | Core | 1000 | 14,197.90 ns | 72.6340 ns | 64.3881 ns | 4.96 | 0.03 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 10000 | 32,064.52 ns | 104.7986 ns | 98.0287 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10000 | 30,438.20 ns | 387.8124 ns | 362.7600 ns | 0.95 | 0.01 | - | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 10000 | 142,249.81 ns | 1,119.4741 ns | 1,047.1567 ns | 4.44 | 0.03 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10000 | 28,367.15 ns | 46.8529 ns | 39.1243 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10000 | 35,950.57 ns | 65.9271 ns | 58.4427 ns | 1.27 | 0.00 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 10000 | 142,462.62 ns | 649.8010 ns | 607.8242 ns | 5.02 | 0.02 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100000 | 320,891.14 ns | 1,262.2017 ns | 1,180.6642 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100000 | 302,454.62 ns | 1,715.1904 ns | 1,604.3900 ns | 0.94 | 0.01 | - | 100 B |
| ArrayWithForeachLoops | Clr | Clr | 100000 | 1,428,956.23 ns | 11,414.0670 ns | 9,531.2620 ns | 4.45 | 0.03 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100000 | 284,467.21 ns | 1,791.0643 ns | 1,675.3625 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100000 | 356,758.14 ns | 834.7999 ns | 780.8722 ns | 1.25 | 0.01 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 100000 | 1,416,342.79 ns | 3,701.5172 ns | 3,462.4011 ns | 4.98 | 0.03 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000000 | 3,181,027.99 ns | 20,028.1485 ns | 18,734.3407 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000000 | 3,231,952.75 ns | 10,953.6190 ns | 10,246.0211 ns | 1.02 | 0.01 | - | 128 B |
| ArrayWithForeachLoops | Clr | Clr | 1000000 | 14,555,322.97 ns | 43,249.8432 ns | 40,455.9262 ns | 4.58 | 0.03 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000000 | 2,830,874.57 ns | 11,429.7118 ns | 10,691.3585 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000000 | 3,801,994.66 ns | 11,227.0771 ns | 9,952.5104 ns | 1.34 | 0.01 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 1000000 | 14,704,212.82 ns | 71,842.9255 ns | 67,201.9106 ns | 5.19 | 0.03 | - | 0 B |

How can that be? Am I setting up my benchmarks in a wrong way? I expected the for loop variant to be the fastest, and the foreach variant to be at least a little bit faster than the LINQ calls, especially as foreach on arrays does not allocate any objects. But the foreach loops are way slower - which confuses me as Enumerable.All and Enumerable.Contains are implemented in basically the same way. The LINQ version is sometimes even faster than the for-loop version in the CLR.

Thanks for your help in advance.

IsSubsetOfGist.log

question

All 13 comments

The only way to know for sure is looking at the assembly generated for all of those. You can use the disassembly support of BenchmarkDotNet to look at it. Without looking at the assembly there are a few things that can cause it. The likely cause is that the returned enumerator is not a struct enumerator, therefore, the calls to MoveNext() and the other support functions are not inlined.

@redknightlois after activating the [DisassemblyDiagnoser(printIL:true, printAsm:false)], the results of the important methods IsSubsetOfWithForeachLoops and IsSubsetOfWithForLoops are not printed out (I guess because they are not the marked with the BenchmarkAttribute but called indirectly?). However, when using ILDasm, I can see no calls to the IEnumerator<T> methods - the IL looks like the following (identical for .NET and .NET Core):

.method private hidebysig static bool  IsSubsetOfWithForeachLoops<T>(!!T[] set,
                                                                     !!T[] superset,
                                                                     [opt] class [mscorlib]System.Collections.Generic.IEqualityComparer`1<!!T> comparer) cil managed
{
  .param [3] = nullref
  // Code size       93 (0x5d)
  .maxstack  3
  .locals init (!!T[] V_0,
           int32 V_1,
           !!T V_2,
           bool V_3,
           !!T[] V_4,
           int32 V_5,
           !!T V_6)
  IL_0000:  ldarg.2
  IL_0001:  dup
  IL_0002:  brtrue.s   IL_000a
  IL_0004:  pop
  IL_0005:  call       class [mscorlib]System.Collections.Generic.EqualityComparer`1<!0> class [mscorlib]System.Collections.Generic.EqualityComparer`1<!!T>::get_Default()
  IL_000a:  starg.s    comparer
  IL_000c:  ldarg.0
  IL_000d:  stloc.0
  IL_000e:  ldc.i4.0
  IL_000f:  stloc.1
  IL_0010:  br.s       IL_0055
  IL_0012:  ldloc.0
  IL_0013:  ldloc.1
  IL_0014:  ldelem     !!T
  IL_0019:  stloc.2
  IL_001a:  ldc.i4.0
  IL_001b:  stloc.3
  IL_001c:  ldarg.1
  IL_001d:  stloc.s    V_4
  IL_001f:  ldc.i4.0
  IL_0020:  stloc.s    V_5
  IL_0022:  br.s       IL_0044
  IL_0024:  ldloc.s    V_4
  IL_0026:  ldloc.s    V_5
  IL_0028:  ldelem     !!T
  IL_002d:  stloc.s    V_6
  IL_002f:  ldarg.2
  IL_0030:  ldloc.2
  IL_0031:  ldloc.s    V_6
  IL_0033:  callvirt   instance bool class [mscorlib]System.Collections.Generic.IEqualityComparer`1<!!T>::Equals(!0,
                                                                                                                 !0)
  IL_0038:  brfalse.s  IL_003e
  IL_003a:  ldc.i4.1
  IL_003b:  stloc.3
  IL_003c:  br.s       IL_004c
  IL_003e:  ldloc.s    V_5
  IL_0040:  ldc.i4.1
  IL_0041:  add
  IL_0042:  stloc.s    V_5
  IL_0044:  ldloc.s    V_5
  IL_0046:  ldloc.s    V_4
  IL_0048:  ldlen
  IL_0049:  conv.i4
  IL_004a:  blt.s      IL_0024
  IL_004c:  ldloc.3
  IL_004d:  brtrue.s   IL_0051
  IL_004f:  ldc.i4.0
  IL_0050:  ret
  IL_0051:  ldloc.1
  IL_0052:  ldc.i4.1
  IL_0053:  add
  IL_0054:  stloc.1
  IL_0055:  ldloc.1
  IL_0056:  ldloc.0
  IL_0057:  ldlen
  IL_0058:  conv.i4
  IL_0059:  blt.s      IL_0012
  IL_005b:  ldc.i4.1
  IL_005c:  ret
} // end of method IsSubsetOfGist::IsSubsetOfWithForeachLoops

Thus I would assume that the foreach loops are optimized and should perform at least as good as the LINQ methods.

@feO2x When benchmarking you shouldn't wrap the calls, you may be skewing your results via an extra call; which for many benchmarking workloads is high enough to account for the lion share of your total time and/or standard deviation.

I just realized you are passing around arrays of integers over a generic type. The LINQ implementation relies on a few tricks that would outperform a typical foreach by a while, for example, devirtualization of the Default comparer when passing a null value (which you are passing to). That means that you may actually benchmarking oranges and apples and the results kinda suggest that (there is no apparent cause for the difference).

Again difficult to know from IL because that depends on what assembler instructions the JIT is emitting there. For example, if the devirtualization is not happening, that is enough to account for the massive difference you are looking at there.

EDIT: For reference https://github.com/dotnet/corefx/commit/4cc7cd68cd544c8703d740ced9a31cd9b16479e3#diff-acc6cdce907ecf103772cf4bdc0e95a0R26

@redknightlois Thanks for your insights about devirtualizing - maybe I should give a little bit of context about why I structured the benchmarks the way they are. I am the author of Light.GuardClauses, an assertion library for checking parameter values in production code. I'm currently considering whether it's useful to boost the performance of collection assertions - of course, these assertions are implemented in a generic way like the methods above are.

I'm not that good in reading assembler language - but I will check if I can get hold of the assembler instructions.

@feO2x I see. I have been looking at the code of GuardClauses and many of the patterns used internally are definitely not 'free' in the general sense. There are many tricks you can use to achieve almost zero overhead if you are willing to trick the JIT into emitting the proper code. I explained some of the techniques at DotNext Moscow 2017 (live streaming available at their YouTube channel https://youtu.be/FMktdWexo8A?t=6175) there are a few more that would be necessary but you can make it almost zero overhead if you are interested into going there.

@redknightlois Thanks - I will take a look. I have much to learn about performance optimizations. Will close this issue for now. Can I come back to you if I have further questions?

Sure, I do 8 hours of performance consulting for Open Source projects as community work for free every month. Some like to write blogs, I am more of a 'get my hands dirty' kind of guy... essentially I despise writing :D ... So ping me on my twitter if you want GuardClauses to be scheduled for January /February spot (which I didn't open yet ;) ).

The proper question here is not why foreach is slower than LINQ but rather why foreach is slower than for. In the case of arrays foreach and for should generate similar code and thus similar timings. If the timings are so different then there's a good chance that the 2 versions aren't comparable.

Indeed, this line appears to be incorrect:
C# if (!comparer.Equals(superset[i], currentItem))
It should probably be superset[j] instead of superset[i].

@mikedn You are completely correct, there is a bug in my for-loop benchmark. After fixing it, the for loop version is as slow as the foreach version:


BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437500 Hz, Resolution=410.2564 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT


| Method | Job | Runtime | NumberOfItems | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
|---------------------- |----- |-------- |-------------- |----------------:|---------------:|---------------:|-------:|---------:|-------:|----------:|
| ArrayWithForLoops | Clr | Clr | 10 | 132.1 ns | 0.5414 ns | 0.4799 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10 | 318.5 ns | 2.0388 ns | 1.8073 ns | 2.41 | 0.02 | 0.0606 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 10 | 132.8 ns | 0.5085 ns | 0.4756 ns | 1.01 | 0.00 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10 | 120.2 ns | 0.2715 ns | 0.2540 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10 | 266.0 ns | 2.0088 ns | 1.8790 ns | 2.21 | 0.02 | 0.0606 | 96 B |
| ArrayWithForeachLoops | Core | Core | 10 | 112.1 ns | 0.4089 ns | 0.3825 ns | 0.93 | 0.00 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100 | 1,471.5 ns | 26.4787 ns | 22.1109 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100 | 681.1 ns | 4.5160 ns | 3.7711 ns | 0.46 | 0.01 | 0.0601 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 100 | 1,458.3 ns | 7.0498 ns | 5.5040 ns | 0.99 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100 | 1,446.6 ns | 7.4547 ns | 6.6084 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100 | 809.8 ns | 10.0747 ns | 9.4238 ns | 0.56 | 0.01 | 0.0601 | 96 B |
| ArrayWithForeachLoops | Core | Core | 100 | 1,282.7 ns | 4.6168 ns | 4.3186 ns | 0.89 | 0.00 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000 | 14,300.6 ns | 63.4179 ns | 56.2183 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000 | 3,339.2 ns | 20.9335 ns | 17.4804 ns | 0.23 | 0.00 | 0.0572 | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 1000 | 14,257.6 ns | 46.0916 ns | 43.1141 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000 | 14,233.8 ns | 87.3339 ns | 81.6922 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000 | 3,951.1 ns | 13.7111 ns | 12.8254 ns | 0.28 | 0.00 | 0.0534 | 96 B |
| ArrayWithForeachLoops | Core | Core | 1000 | 12,524.4 ns | 28.8571 ns | 26.9929 ns | 0.88 | 0.01 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 10000 | 142,812.0 ns | 449.4916 ns | 420.4547 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10000 | 30,494.6 ns | 120.6997 ns | 100.7897 ns | 0.21 | 0.00 | - | 96 B |
| ArrayWithForeachLoops | Clr | Clr | 10000 | 142,572.6 ns | 657.7825 ns | 615.2901 ns | 1.00 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10000 | 142,693.7 ns | 430.9256 ns | 403.0880 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10000 | 36,163.4 ns | 96.1260 ns | 85.2132 ns | 0.25 | 0.00 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 10000 | 124,936.0 ns | 294.8500 ns | 275.8028 ns | 0.88 | 0.00 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100000 | 1,427,927.9 ns | 6,193.1063 ns | 5,793.0349 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100000 | 301,327.9 ns | 885.6928 ns | 785.1436 ns | 0.21 | 0.00 | - | 100 B |
| ArrayWithForeachLoops | Clr | Clr | 100000 | 1,427,722.5 ns | 7,160.2840 ns | 6,697.7335 ns | 1.00 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100000 | 1,433,723.0 ns | 5,300.5856 ns | 4,958.1706 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100000 | 358,312.9 ns | 1,399.4997 ns | 1,309.0928 ns | 0.25 | 0.00 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 100000 | 1,250,089.9 ns | 6,911.6685 ns | 6,465.1784 ns | 0.87 | 0.01 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000000 | 14,622,995.1 ns | 63,250.9134 ns | 59,164.9378 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000000 | 3,227,347.0 ns | 14,015.2056 ns | 12,424.1135 ns | 0.22 | 0.00 | - | 128 B |
| ArrayWithForeachLoops | Clr | Clr | 1000000 | 14,738,577.8 ns | 28,660.7134 ns | 22,376.3978 ns | 1.01 | 0.00 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000000 | 14,589,810.0 ns | 42,640.7048 ns | 39,886.1378 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000000 | 3,789,296.6 ns | 15,291.9963 ns | 12,769.5082 ns | 0.26 | 0.00 | - | 96 B |
| ArrayWithForeachLoops | Core | Core | 1000000 | 12,903,099.5 ns | 39,239.6372 ns | 36,704.7774 ns | 0.88 | 0.00 | - | 0 B |

So now both for and foreach are slow, funny.

The LINQ version is faster because it uses the Contains overload without an IEqualityComparer<T> parameter. If you change it to use the other overload then it will probably be slow as well.

@mikedn afaik the changes to the operation were done so it could devirtualize the comparer and because it is an int, it would only add a single check to the code. You may need to use a custom comparer to actually not being able to devirtualize and be as slow as those 2.

FWIW, it suffices to create a lambda and pass null as the comparer to decrease the performance of the LINQ version:

SubsetArray.All(i => SupersetArray.Contains(i, null));

BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i5-6300U CPU 2.40GHz (Skylake), ProcessorCount=4
Frequency=2437498 Hz, Resolution=410.2567 ns, Timer=TSC
.NET Core SDK=2.1.2
  [Host] : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT
  Clr    : .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2600.0
  Core   : .NET Core 2.0.3 (Framework 4.6.25815.02), 64bit RyuJIT


| Method | Job | Runtime | NumberOfItems | Mean | Error | StdDev | Scaled | ScaledSD | Gen 0 | Allocated |
|---------------------- |----- |-------- |-------------- |----------------:|----------------:|----------------:|-------:|---------:|-------:|----------:|
| ArrayWithForLoops | Clr | Clr | 10 | 127.5 ns | 0.6032 ns | 0.5037 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10 | 628.0 ns | 9.0563 ns | 8.0282 ns | 4.92 | 0.06 | 0.1621 | 256 B |
| ArrayWithForeachLoops | Clr | Clr | 10 | 139.9 ns | 0.6873 ns | 0.5740 ns | 1.10 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10 | 133.8 ns | 2.1991 ns | 1.9494 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10 | 581.3 ns | 3.6555 ns | 3.2405 ns | 4.35 | 0.06 | 0.1621 | 256 B |
| ArrayWithForeachLoops | Core | Core | 10 | 125.0 ns | 1.4068 ns | 1.2471 ns | 0.94 | 0.02 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100 | 1,464.1 ns | 17.2244 ns | 13.4477 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100 | 4,722.7 ns | 47.8080 ns | 44.7197 ns | 3.23 | 0.04 | 0.1602 | 256 B |
| ArrayWithForeachLoops | Clr | Clr | 100 | 1,637.1 ns | 16.8034 ns | 15.7179 ns | 1.12 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100 | 1,620.1 ns | 13.9337 ns | 11.6353 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100 | 4,663.8 ns | 20.7348 ns | 16.1884 ns | 2.88 | 0.02 | 0.1602 | 256 B |
| ArrayWithForeachLoops | Core | Core | 100 | 1,463.2 ns | 10.7066 ns | 9.4911 ns | 0.90 | 0.01 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000 | 14,466.2 ns | 138.0810 ns | 122.4052 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000 | 45,050.0 ns | 256.5118 ns | 227.3910 ns | 3.11 | 0.03 | 0.1221 | 257 B |
| ArrayWithForeachLoops | Clr | Clr | 1000 | 16,220.3 ns | 108.1038 ns | 90.2716 ns | 1.12 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000 | 15,691.8 ns | 173.4613 ns | 153.7689 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000 | 45,022.9 ns | 224.8520 ns | 187.7616 ns | 2.87 | 0.03 | 0.1221 | 256 B |
| ArrayWithForeachLoops | Core | Core | 1000 | 15,040.7 ns | 160.2104 ns | 149.8609 ns | 0.96 | 0.01 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 10000 | 143,433.2 ns | 667.3433 ns | 521.0177 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 10000 | 449,157.9 ns | 3,467.6669 ns | 3,073.9961 ns | 3.13 | 0.02 | - | 260 B |
| ArrayWithForeachLoops | Clr | Clr | 10000 | 162,755.0 ns | 883.6883 ns | 826.6025 ns | 1.13 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 10000 | 157,583.6 ns | 1,586.7188 ns | 1,484.2176 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 10000 | 446,518.7 ns | 2,945.0956 ns | 2,754.8439 ns | 2.83 | 0.03 | - | 256 B |
| ArrayWithForeachLoops | Core | Core | 10000 | 148,659.9 ns | 774.2595 ns | 686.3608 ns | 0.94 | 0.01 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 100000 | 1,443,379.0 ns | 3,643.9711 ns | 2,844.9727 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 100000 | 4,521,913.9 ns | 35,326.2630 ns | 31,315.8089 ns | 3.13 | 0.02 | - | 320 B |
| ArrayWithForeachLoops | Clr | Clr | 100000 | 1,619,285.5 ns | 9,384.6966 ns | 8,778.4502 ns | 1.12 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 100000 | 1,629,428.2 ns | 8,392.3427 ns | 7,850.2018 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 100000 | 4,489,078.2 ns | 26,777.5918 ns | 23,737.6354 ns | 2.76 | 0.02 | - | 256 B |
| ArrayWithForeachLoops | Core | Core | 100000 | 1,447,246.3 ns | 3,888.5543 ns | 3,035.9272 ns | 0.89 | 0.00 | - | 0 B |
| ArrayWithForLoops | Clr | Clr | 1000000 | 14,903,893.6 ns | 65,066.3644 ns | 57,679.6316 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Clr | Clr | 1000000 | 45,719,124.6 ns | 659,323.2631 ns | 550,564.7327 ns | 3.07 | 0.04 | - | 512 B |
| ArrayWithForeachLoops | Clr | Clr | 1000000 | 16,750,741.9 ns | 137,658.8803 ns | 128,766.1895 ns | 1.12 | 0.01 | - | 0 B |
| ArrayWithForLoops | Core | Core | 1000000 | 16,716,489.7 ns | 104,164.0436 ns | 92,338.7024 ns | 1.00 | 0.00 | - | 0 B |
| ArrayWithLinq | Core | Core | 1000000 | 45,882,191.5 ns | 485,470.1228 ns | 454,109.0099 ns | 2.74 | 0.03 | - | 256 B |
| ArrayWithForeachLoops | Core | Core | 1000000 | 14,956,303.7 ns | 107,229.4869 ns | 100,302.5188 ns | 0.89 | 0.01 | - | 0 B |

My guess is that it has something to do with the fact that LINQ uses less lines of code, so the compiler is able to process the request faster.

Was this page helpful?
0 / 5 - 0 ratings