Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements for Random.Shuffle() #83305

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 149 additions & 7 deletions src/libraries/System.Private.CoreLib/src/System/Random.cs
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,160 @@ public void Shuffle<T>(T[] values)
/// </remarks>
public void Shuffle<T>(Span<T> values)
{
int n = values.Length;
var chooser = new IncreasingUniform(this);
for (int i = 1; i < values.Length; i++)
{
int index = chooser.NextIndex();

// This pattern is faster than tuple deconstruction for large structs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is that something the compiler could/should improve?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was mentioned here, my understand is that it will be fixed after C# 11

var swap = values[index];
values[index] = values[i];
values[i] = swap;
}
}

private struct IncreasingUniform
{
private readonly Random _random;
private int _n;
private int _chunk;
private int _chunkRemaining;

public IncreasingUniform(Random random)
{
_random = random;
_chunk = 0;
_n = 1;
_chunkRemaining = 0;
}

for (int i = 0; i < n - 1; i++)
/// <summary>
/// Increase n by one and then return a random positive integer less than the new n
/// </summary>
public int NextIndex()
{
int j = Next(i, n);
int nextN = _n + 1;
if (_chunkRemaining == 0)
{
(int bound, int remaining) = CalculateBound(nextN);
_chunkRemaining = remaining - 1;
_chunk = _random.Next(bound);
}
else
{
_chunkRemaining--;
}

if (j != i)
int result;
if (_chunkRemaining == 0)
{
T temp = values[i];
values[i] = values[j];
values[j] = temp;
result = _chunk;
}
else
{
result = _chunk % nextN;
_chunk /= nextN;
}

_n = nextN;
return result;
}

/// <summary>
/// Calculate the highest (x,k) such that x = n*n+1*..*n+k-1 where x is a 32 bit integer,
/// assuming n is 2, 13, or >= 20
/// </summary>
private static (int, int) CalculateBound(int n)
{
// This method should always first be called with 2, then 13, then higher numbers starting at 20
Debug.Assert(n == 2 || n == 13 || n > 19);
// Threshold_6 is the highest value of n for which k will be 6
const int Threshold_6 = 33;
const int Threshold_5 = 71;
const int Threshold_4 = 213;
const int Threshold_3 = 1289;
const int Threshold_2 = 46340;
AssertIsThreshold(6, Threshold_6);
AssertIsThreshold(5, Threshold_5);
AssertIsThreshold(4, Threshold_4);
AssertIsThreshold(3, Threshold_3);
AssertIsThreshold(2, Threshold_2);

int count;
switch (n)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this switch use fancy new pattern matching syntax to be more compact?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this but I don't think relational patterns allow return statements.

{
case 2:
const int Product_2_11 = 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9 * 10 * 11 * 12;
Debug.Assert(Product_2_11 == 479001600);
return (Product_2_11, 11);
case 13:
const int Product_13_7 = 13 * 14 * 15 * 16 * 17 * 18 * 19;
Debug.Assert(Product_13_7 == 253955520);
return (Product_13_7, 7);
case <= Threshold_6:
count = 6;
break;
case <= Threshold_5:
count = 5;
break;
case <= Threshold_4:
count = 4;
break;
case <= Threshold_3:
count = 3;
break;
case <= Threshold_2:
count = 2;
break;
default:
return (n, 1);
}

int product = GetProduct(n, count - 1);
return (product, count);
}

private static int GetProduct(int n, int multiplications)
{
int product = n;
while (multiplications > 0)
{
product *= n + multiplications;
multiplications--;
}

return product;
}

/// <summary>
/// Checks that t*t+1*..*t+k-1 is a 32 bit integer, and that t is the highest integer such that this is the case
/// </summary>
[System.Diagnostics.Conditional("DEBUG")]
private static void AssertIsThreshold(int k, int t)
{
static bool IsProductI32(int n, int multiplications)
{
try
{
checked
{
int product = n;
while (multiplications > 0)
{
product *= n + multiplications;
multiplications--;
}
}
}
catch (OverflowException)
{
return false;
}
return true;
}

Debug.Assert(IsProductI32(t, k - 1), $"k = {k}; t = {t}");
Debug.Assert(!IsProductI32(t, k), $"k + 1 = {k + 1}; t = {t}");
}
}

Expand Down
55 changes: 49 additions & 6 deletions src/libraries/System.Runtime.Extensions/tests/System/Random.cs
Original file line number Diff line number Diff line change
Expand Up @@ -713,9 +713,9 @@ public static void Shuffle_Array_Seeded(bool emptyShuffle)
Random random = new Random(0x70636A61);
int[] items = new int[] { 1, 2, 3, 4 };
random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 4, 2, 1, 3 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 1, 3, 4, 2 }, items);
random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 2, 3, 4, 1 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 3, 1, 2, 4 }, items);

if (emptyShuffle)
{
Expand All @@ -724,7 +724,7 @@ public static void Shuffle_Array_Seeded(bool emptyShuffle)
}

random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 1, 4, 3, 2 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 2, 1, 4, 3 }, items);
}

[Fact]
Expand All @@ -742,9 +742,9 @@ public static void Shuffle_Span_Seeded(bool emptyShuffle)
Random random = new Random(0x70636A61);
Span<int> items = new int[] { 1, 2, 3, 4 };
random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 4, 2, 1, 3 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 1, 3, 4, 2 }, items);
random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 2, 3, 4, 1 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 3, 1, 2, 4 }, items);

if (emptyShuffle)
{
Expand All @@ -753,9 +753,52 @@ public static void Shuffle_Span_Seeded(bool emptyShuffle)
}

random.Shuffle(items);
AssertExtensions.SequenceEqual(stackalloc int[] { 1, 4, 3, 2 }, items);
AssertExtensions.SequenceEqual(stackalloc int[] { 2, 1, 4, 3 }, items);
}

[Theory]
[InlineData(1000, 50, 2565)]
[InlineData(1000, 49, 2465)]
public static void Shuffle_Array_Fairness(int runs, int len, double critical)
wainwrightmark marked this conversation as resolved.
Show resolved Hide resolved
{
// Test that repeatedly shuffling an array puts each value in each position a roughly equal number of times.
// This does not prove that the shuffle is fair but will at least fail if the shuffle is obviously unfair.
// This is a "Chi Squared Goodness of fit" problem.
// The number of degrees of freedom is (len - 1)*(len - 1).
// The significance is 0.01 so this test should naturally fail for 1 in 100 seeds.
// To calculate the critical values I used an Excel formula e.g. `=Round(CHISQ.INV.RT(0.01,49 * 49))`

int[,] buckets = new int[len, len];
var rng = new Random(123);
for (int i = 0; i < runs; i++)
{
int[]? array = new int[len];
for (int index = 0; index < len; index++)
{
// Fill the array with the numbers 0..len
array[index] = index;
}

rng.Shuffle(array);

for (int j = 0; j < len; j++)
{
int index = array[j];
buckets[j, index]++;
}
}

double expectedPerBucket = (double)runs / len;
double chiSquare = 0;
foreach (var bucket in buckets)
{
chiSquare += Math.Pow(bucket - expectedPerBucket, 2) / expectedPerBucket;
}
Assert.InRange(chiSquare, 0, critical);
}



[Fact]
public static void GetItems_Span_ArgValidation()
{
Expand Down