Why Permutations2 are lower than CartesianProduct?
Permutations2 depends of CartesianProduct, should not be longer than CartesianProduct?

This is my code:
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Generic;
using System.Linq;
namespace ConsoleApp1
{
public class Program
{
public static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<PermBenchmark>();
Console.ReadKey();
}
}
public class PermBenchmark
{
//
[Benchmark]
public IEnumerable<IEnumerable<string>> Perm1()
{
var input = new[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m" };
return input.Permutations1(10);
}
//
[Benchmark]
public IEnumerable<IEnumerable<string>> Perm2()
{
var input = new[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m" };
return input.Permutations2(10);
}
//
[Benchmark]
public IEnumerable<IEnumerable<string>> Product()
{
var input = new[]
{
new[] { "A", "B", "C", "D" },
new[] { "x", "y" }
};
return input.CartesianProduct();
}
}
public static class PermUtil
{
public static IEnumerable<IEnumerable<T>> CartesianProduct<T>(
this IEnumerable<IEnumerable<T>> source, int repeat = 1)
{
if (source == null)
{
throw new ArgumentNullException(nameof(source));
}
if (repeat < 0)
{
throw new ArgumentOutOfRangeException(nameof(repeat));
}
IEnumerable<IEnumerable<T>> seed = new[] { Enumerable.Empty<T>() };
return source
.SelectMany(x => Enumerable.Repeat(x, repeat))
.Aggregate(seed, (accumulator, current) => (
from a in accumulator
from b in current
select a.Concat(new[] { b })));
}
public static IEnumerable<IEnumerable<T>> Permutations1<T>(
this IEnumerable<T> source, int? length = null)
{
if (source == null)
{
throw new ArgumentNullException(nameof(source));
}
var src = source.ToList();
var n = src.Count;
var r = length ?? n;
var range = new[] { Enumerable.Range(0, n) };
return range
.CartesianProduct(r)
.Where(x => x.Distinct().Count() == r)
.Select(x => x.Select(i => src[i]));
}
public static IEnumerable<IEnumerable<T>> Permutations2<T>(
this IEnumerable<T> source, int? length = null)
{
if (source == null)
{
throw new ArgumentNullException(nameof(source));
}
var src = source.ToList();
var n = src.Count;
var r = length ?? n;
var range = new[] { Enumerable.Range(0, n) };
foreach (var e in range.CartesianProduct(r))
{
var indices = e.ToList();
if (indices.Distinct().Count() == r)
{
var seed = Enumerable.Empty<T>();
yield return indices.Aggregate(seed, (accumulator, i) =>
accumulator.Concat(new[] { src[i] }));
}
}
}
}
}
Hi @raky291
First of all, you always create new arrays in your benchmark. Should your benchmark include that? Or do you want to compare only the methods that work on existing input?
If you don't want to include the creation cost, you should move the arrays to fields of the class and initialize them once.
If you want to include the array creation cost, every benchmark should create the same array. In your example they are different.
And the real problem of your benchmark: you are trying to measure the performance of method which returns deffered LINQ queries. To get full understanding you should read this blog post.
To tell the long story short: you don't execute the LINQ queries as long as you don't use them for something real.
Enumarable.Range(1, 100) does not create new range. It creates a query, that once executed is going to create the range. You can do it for example with .ToArray() (which will also allocate an array) or .Count() which is going to count the elements.
So your benchmarks could look like this:
public class PermBenchmark
{
string[] array1d = new[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m" };
string[][] array2d = new[]
{
new[] { "A", "B", "C", "D" },
new[] { "x", "y" }
};
[Benchmark]
public int Perm1() => array1d.Permutations1(10).Sum(collection => collection.Count());
[Benchmark]
public int Perm2() => array1d.Permutations2(10).Sum(collection => collection.Count());
[Benchmark]
public int Product() => array2d.CartesianProduct().Sum(collection => collection.Count());
}
Please keep in mind, that in that case you will create a LOT of permutations and it will take BenchmarkDotNet quite a long time to benchmark it correctly.
Also if you ever get into a situation when you can not explain given perf problem you should use profiler. Profiler tells you what took so long, benchmarking tells you only how long it takes.
Most helpful comment
Hi @raky291
First of all, you always create new arrays in your benchmark. Should your benchmark include that? Or do you want to compare only the methods that work on existing input?
If you don't want to include the creation cost, you should move the arrays to fields of the class and initialize them once.
If you want to include the array creation cost, every benchmark should create the same array. In your example they are different.
And the real problem of your benchmark: you are trying to measure the performance of method which returns deffered LINQ queries. To get full understanding you should read this blog post.
To tell the long story short: you don't execute the LINQ queries as long as you don't use them for something real.
Enumarable.Range(1, 100)does not create new range. It creates a query, that once executed is going to create the range. You can do it for example with.ToArray()(which will also allocate an array) or.Count()which is going to count the elements.So your benchmarks could look like this:
Please keep in mind, that in that case you will create a LOT of permutations and it will take BenchmarkDotNet quite a long time to benchmark it correctly.
Also if you ever get into a situation when you can not explain given perf problem you should use profiler. Profiler tells you what took so long, benchmarking tells you only how long it takes.