Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Interleave to prevent eager MoveNext calls #696

Merged
merged 23 commits into from
Dec 12, 2019
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c9aa710
Add TestInterleaveDoNoCallMoveNextEagerly.
Orace Nov 14, 2019
66d1228
Fix interleave implementation so it doen't call MoveNext eagerly.
Orace Nov 14, 2019
336423a
Update MoreLinq/Interleave.cs
Orace Nov 14, 2019
61e4921
Fix the test
atifaziz Nov 22, 2019
b790e53
Undo implementation to prove test isn't bokren
atifaziz Nov 22, 2019
c569479
Merge branch 'master' into issue694
atifaziz Nov 22, 2019
32f02e4
Add TestInterleaveDoNoCallMoveNextEagerly.
Orace Nov 14, 2019
6ff6d03
Fix interleave implementation so it doen't call MoveNext eagerly.
Orace Nov 14, 2019
8a972a9
Merge back originally proposed fix and test
atifaziz Nov 22, 2019
1a4270f
Interleave: add early test of null elements in otherSequences
Orace Nov 22, 2019
76e3516
Added TestInterleaveEarlyThrowOnNullElementInOtherSequences
Orace Nov 22, 2019
6f61760
Remove useless private extension method
Orace Nov 22, 2019
81479fb
Added TestInterleaveDisposesOnPartialEnumeration
Orace Nov 25, 2019
00ef366
Made TestInterleaveDisposesOnPartialEnumeration pass
Orace Nov 25, 2019
d70974b
Interleave: simple implementation using Acquire.
Orace Nov 25, 2019
9be11ad
remove trailing whitespaces
Orace Nov 25, 2019
7d0aa0d
Removed TestInterleaveEarlyThrowOnNullElementInOtherSequences
Orace Dec 12, 2019
205c371
Niptick in Interleave
Orace Dec 12, 2019
08e90e3
remove unused using in InterleaveTests
Orace Dec 12, 2019
4fdb16c
Interleave relace "allNull" by "hasNext"
Orace Dec 12, 2019
ba740cc
update own state first then call external parties
Orace Dec 12, 2019
a7f2c6f
Removed TestInterleaveDisposesOnPartialEnumeration
Orace Dec 12, 2019
efd0f4c
Rewrite TestInterleaveDoNoCallMoveNextEagerly
Orace Dec 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions MoreLinq.Test/InterleaveTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,23 @@ public void TestInterleaveIsLazy()
new BreakingSequence<int>().Interleave(new BreakingSequence<int>());
}

/// <summary>
/// Verify that interleaving do not call enumerators MoveNext method eagerly
/// </summary>
[Test]
public void TestInterleaveDoNoCallMoveNextEagerly()
{
void Code()
{
var sequenceA = Enumerable.Range(1, 1);
var sequenceB = MoreEnumerable.From<int>(() => throw new TestException());

sequenceA.Interleave(sequenceB).Take(1).Consume();
}

Assert.DoesNotThrow(Code);
Orace marked this conversation as resolved.
Show resolved Hide resolved
}

/// <summary>
/// Verify that interleaving disposes those enumerators that it managed
/// to open successfully
Expand Down
148 changes: 42 additions & 106 deletions MoreLinq/Interleave.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ namespace MoreLinq
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

public static partial class MoreEnumerable
{
Expand All @@ -45,125 +43,63 @@ public static partial class MoreEnumerable
/// <returns>A sequence of interleaved elements from all of the source sequences</returns>

public static IEnumerable<T> Interleave<T>(this IEnumerable<T> sequence, params IEnumerable<T>[] otherSequences)
{
return Interleave(sequence, ImbalancedInterleaveStrategy.Skip, otherSequences);
}

/// <summary>
/// Interleaves the elements of two or more sequences into a single sequence, applying the specified strategy when sequences are of unequal length
/// </summary>
/// <remarks>
/// Interleave combines sequences by visiting each in turn, and returning the first element of each, followed
/// by the second, then the third, and so on. So, for example:<br/>
/// <code><![CDATA[
/// {1,1,1}.Interleave( {2,2,2}, {3,3,3} ) => { 1,2,3,1,2,3,1,2,3 }
/// ]]></code>
/// This operator behaves in a deferred and streaming manner.<br/>
/// When sequences are of unequal length, this method will use the imbalance strategy specified to
/// decide how to continue interleaving the remaining sequences. See <see cref="ImbalancedInterleaveStrategy"/>
/// for more information.<br/>
/// The sequences are interleaved in the order that they appear in the <paramref name="otherSequences"/>
/// collection, with <paramref name="sequence"/> as the first sequence.
/// </remarks>
/// <typeparam name="T">The type of the elements of the source sequences</typeparam>
/// <param name="sequence">The first sequence in the interleave group</param>
/// <param name="imbalanceStrategy">Defines the behavior of the operator when sequences are of unequal length</param>
/// <param name="otherSequences">The other sequences in the interleave group</param>
/// <returns>A sequence of interleaved elements from all of the source sequences</returns>

static IEnumerable<T> Interleave<T>(this IEnumerable<T> sequence, ImbalancedInterleaveStrategy imbalanceStrategy, params IEnumerable<T>[] otherSequences)
{
if (sequence == null) throw new ArgumentNullException(nameof(sequence));
if (otherSequences == null) throw new ArgumentNullException(nameof(otherSequences));
if (otherSequences.Any(s => s == null))
throw new ArgumentNullException(nameof(otherSequences), "One or more sequences passed to Interleave was null.");

return _(); IEnumerable<T> _()
{
var sequences = new[] { sequence }.Concat(otherSequences);
return InterleaveSkip(otherSequences.Prepend(sequence));
}

// produce an iterator collection for all IEnumerable<T> instancess passed to us
var iterators = sequences.Select(e => e.GetEnumerator()).Acquire();
List<IEnumerator<T>> iteratorList = null;
private static IEnumerable<T> InterleaveSkip<T>(this IEnumerable<IEnumerable<T>> sequences)
Orace marked this conversation as resolved.
Show resolved Hide resolved
{
var enumerators = new LinkedList<IEnumerator<T>>();

try
try
{
// First pass. create enumerators.
foreach (var sequence in sequences)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have Acquire for this so I would take advantage of it here rather than roll out a duplicate implementation that needs separate testing.

Copy link
Contributor Author

@Orace Orace Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIU, Acquire do not return a IDisposable so Interleave keep the responsibility of disposing the enumerators when the result sequence enumerator is disposed.
Achieve this with a using for each sequence is not feasible since the number of using is dynamic.
The only way I know to achieve this in a yield context is to use some try {} finally {} blocks. Because the finally block is called when the enumerator is disposed.

We indeed can put this behavior in a specialized IDisposable class. When disposed, this object dispose a bunch of elements.
DisposableGroup<T> (declared in SortedMerge and only used here) looks to achieve this, but DisposableGroup<T> internally use a list where I use a LinkedList.
(Rem: DisposableGroup<T> constructor doesn't dispose the already acquired object in case of error in the input sequence, like Acquire do. But anyway, the only usage of DisposableGroup<T> use Acquire).

I have chosen to use a LinkedList since Remove operation is O(1) there.
If we use a list or an array, where the the Remove operation is O(N), enumerating N sequences of 1 element will cost O(N²).
I know that it is an uncommon use case (it may be less uncommon with #697), anyway this implementation manage it.
Rem: Because of DisposableGroup<T> implementation SortedMerge is O(N²) in the case described above.

Copy link
Member

@atifaziz atifaziz Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIU, Acquire do not return a IDisposable

It would return an array of enumerators that are disposable. Question is whether there is something to be gained out of not calling GetEnumerator() on other sequences until you get to them, but that will only optimize the case of someone eventually taking fewer elements than sequences!

Copy link
Contributor Author

@Orace Orace Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acquire will not remove the necessity of a try {} finally {} (with disposal of the enumerators in the finally block), and then is just an alias of ToArray().

  • An array is not optimal here.
  • If Acquire is used and if an error occur while acquiring (the only purpose of Acquire is to manage this case), elements will be disposed twice, which is useless.

So Acquire here is not optimal and useless.

A ToDisposableLinkedList() method that have the functionality of Acquire (on error, dispose already acquired elements) and provide a DisposableLinkedList that have the functionality of DisposableGroup<T> (being disposable and dispose its content when disposed) is the way to go for Interleave, SortedMerge and Transpose.

It will be used like that:

using var enumerators = source.Select(e => e.GetEnumerator()).ToDisposableLinkedList();

The using here allow to remove the try {} finally {} from here:

finally
{
foreach (var e in enumerators)
e?.Dispose();
}

and here:
finally
{
Debug.Assert(iteratorList != null || iterators != null);
foreach (var iter in (iteratorList ?? (IList<IEnumerator<T>>) iterators))
iter.Dispose();
}

and improve the DisposableGroup/Acquire combo from here:

using var disposables = new DisposableGroup<TSource>(sequences.Select(e => e.GetEnumerator()).Acquire());

I opened #724 to refactor Interleave, SortedMerge and Transpose.

Copy link
Member

@atifaziz atifaziz Nov 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acquire will not remove the necessity of a try {} finally {} (with disposal of the enumerators in the finally block), and then is just an alias of ToArray().

Acquire itself does not need to be inside a try-finally block and so it's not the same as ToArray(). It doesn't mean it prevents you from having one. It is, for example, perfectly safe to return the array from Acquire as the return value of a function:

static Stream[] OpenFiles(params string[] paths) =>
    paths.Select(p => File.OpenRead(p)).Acquire();

Of course, the disposal becomes the responsibility of the caller and will need a try-catch. It does prevent that.

  • If Acquire is used and if an error occur while acquiring (the only purpose of Acquire is to manage this case), elements will be disposed twice, which is useless.

It is useless but it's not a problem since it is a requirement for IDisposable.Dispose to be idempotent (per docs):

If an object's Dispose method is called more than once, the object must ignore all calls after the first one.

I have chosen to use a LinkedList since Remove operation is O(1) there.

That's a fine choice but I am not debating the most optimal solution first. My point with Acquire was that it's carefully designed and tested and so might be best to keep using it instead of putting everything into question. This PR was about fixing one problem but by trying to be cleverly optimal at the same time, it may introduce others bugs (and I believe it does)! This means it takes longer to review as the scope of change expands. Instead, I find that it helps to make it work, make it right and then make it fast.

Copy link
Contributor Author

@Orace Orace Nov 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just push a simple (I hope) implementation of Interleave based on the array returned by Acquire.
The LinkedList implementation is in #726

Anyway it's a rebuild of current implementation since it's over-complicated (manage not used cases) and flawed (original issue #694).

Copy link
Contributor Author

@Orace Orace Dec 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@atifaziz with the implementation using Acquire in this PR (currently at a7f2c6f), calls to GetEnumerator are made eagerly and the code below throw:

var sequenceA = Enumerable.Range(1, 1);
var sequenceB = new BreakingSequence<int>();
sequenceA.Interleave(sequenceB).Take(1).Consume();

I don't know it it's acceptable
It doesn't throw in #726.

{
iteratorList = new List<IEnumerator<T>>(iterators);
iterators = null;
var shouldContinue = true;
var consumedIterators = 0;
var iterCount = iteratorList.Count;

while (shouldContinue)
{
// advance every iterator and verify a value exists to be yielded
for (var index = 0; index < iterCount; index++)
{
if (!iteratorList[index].MoveNext())
{
// check if all iterators have been consumed and we can terminate
// or if the imbalance strategy informs us that we MUST terminate
if (++consumedIterators == iterCount || imbalanceStrategy == ImbalancedInterleaveStrategy.Stop)
{
shouldContinue = false;
break;
}
if (sequence == null)
Orace marked this conversation as resolved.
Show resolved Hide resolved
throw new ArgumentException("An item is null.", nameof(sequences));

iteratorList[index].Dispose(); // dispose the iterator sice we no longer need it
var enumerator = sequence.GetEnumerator();

// otherwise, apply the imbalance strategy
switch (imbalanceStrategy)
{
case ImbalancedInterleaveStrategy.Pad:
var newIter = iteratorList[index] = Generate(default(T), x => default).GetEnumerator();
newIter.MoveNext();
break;
// Immediately dispose enumerators of empty sequences.
if (!enumerator.MoveNext())
{
enumerator.Dispose();
continue;
}

case ImbalancedInterleaveStrategy.Skip:
iteratorList.RemoveAt(index); // no longer visit this particular iterator
--iterCount; // reduce the expected number of iterators to visit
--index; // decrement iterator index to compensate for index shifting
--consumedIterators; // decrement consumer iterator count to stay in balance
break;
}
yield return enumerator.Current;
enumerators.AddLast(enumerator);
}

}
}
var node = enumerators.First;
while (node != null)
{
var nextNode = node.Next;

if (shouldContinue) // only if all iterators could be advanced
{
// yield the values of each iterator's current position
foreach (var iterator in iteratorList)
yield return iterator.Current;
}
var enumerator = node.Value;
if (enumerator.MoveNext())
{
yield return enumerator.Current;
}
}
finally
{
Debug.Assert(iteratorList != null || iterators != null);
foreach (var iter in (iteratorList ?? (IList<IEnumerator<T>>) iterators))
iter.Dispose();
else
{
enumerator.Dispose();
enumerators.Remove(node);
}

// Work on next node or restart from first one.
node = nextNode ?? enumerators.First;
}
}
}

/// <summary>
/// Defines the strategies available when Interleave is passed sequences of unequal length
/// </summary>
enum ImbalancedInterleaveStrategy
{
/// <summary>
/// Extends a sequence by padding its tail with default(T)
/// </summary>
Pad,
/// <summary>
/// Removes the sequence from the interleave set, and continues interleaving remaining sequences.
/// </summary>
Skip,
/// <summary>
/// Stops the interleave operation.
/// </summary>
Stop,
finally
{
foreach (var enumerator in enumerators)
enumerator.Dispose();
}
}
}
}