Skip to content

Commit 617fce4

Browse files
authored
Prevent ActivityId leak across threads (#55625)
Prevent ActivityId leak across threads Fixes #51608 Previously running an async task with two or more levels of nesting on the EventSource activity stack would leave the ActivityID set on the old thread after being swapped out at a yield point. This meant that threadpool threads would often have stale ActivityIDs present when they began a new work item. This in turn produced non-sensical logs where it appeared that unrelated work items were nested within each other. Most of this change is adding tests. The ActivityIdIsZeroedOnThreadSwitchOut test is the one being fixed. The remainder of the new tests pass both before and after the change. I added those to better describe some of the existing behavior and add a little confidence that the change didn't have unintended effects elsewhere. As best I can tell the Activity tracking feature didn't have any testing previously and there is certainly still room for improvement on test coverage.
1 parent 90b8ddd commit 617fce4

File tree

3 files changed

+184
-4
lines changed

3 files changed

+184
-4
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.Tracing;
7+
using System.Linq;
8+
using System.Text;
9+
using System.Threading.Tasks;
10+
using Xunit;
11+
12+
namespace BasicEventSourceTests
13+
{
14+
public class ActivityTracking
15+
{
16+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
17+
public void StartStopCreatesActivity()
18+
{
19+
using ActivityEventListener l = new ActivityEventListener();
20+
using ActivityEventSource es = new ActivityEventSource();
21+
22+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
23+
es.ExampleStart();
24+
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
25+
es.ExampleStop();
26+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
27+
}
28+
29+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
30+
public async Task ActivityFlowsAsync()
31+
{
32+
using ActivityEventListener l = new ActivityEventListener();
33+
using ActivityEventSource es = new ActivityEventSource();
34+
35+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
36+
es.ExampleStart();
37+
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
38+
await Task.Yield();
39+
Assert.NotEqual(Guid.Empty, EventSource.CurrentThreadActivityId);
40+
es.ExampleStop();
41+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
42+
}
43+
44+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
45+
public async Task ActivityIdIsZeroedOnThreadSwitchOut()
46+
{
47+
using ActivityEventListener l = new ActivityEventListener();
48+
using ActivityEventSource es = new ActivityEventSource();
49+
50+
// Run tasks on many threads. If an activity id leaks it is likely
51+
// that the thread will be sheduled to run one of our other tasks
52+
// and we can detect the non-zero id at the start of the task
53+
List<Task> tasks = new List<Task>();
54+
for (int i = 0; i < 100; i++)
55+
{
56+
tasks.Add(Task.Run(() => YieldTwoActivitiesDeep(es)));
57+
}
58+
59+
await Task.WhenAll(tasks.ToArray());
60+
}
61+
62+
private async Task YieldTwoActivitiesDeep(ActivityEventSource es)
63+
{
64+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
65+
es.ExampleStart();
66+
es.Example2Start();
67+
await Task.Yield();
68+
es.Example2Stop();
69+
es.ExampleStop();
70+
Assert.Equal(Guid.Empty, EventSource.CurrentThreadActivityId);
71+
}
72+
73+
// I don't know if this behavior is by-design or accidental. For now
74+
// I am attempting to preserve it to lower back compat risk, but in
75+
// the future we might decide it wasn't even desirable to begin with.
76+
// Compare with SetCurrentActivityIdAfterEventDoesNotFlowAsync below.
77+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
78+
public async Task SetCurrentActivityIdBeforeEventFlowsAsync()
79+
{
80+
using ActivityEventListener l = new ActivityEventListener();
81+
using ActivityEventSource es = new ActivityEventSource();
82+
try
83+
{
84+
Guid g = Guid.NewGuid();
85+
EventSource.SetCurrentThreadActivityId(g);
86+
Assert.Equal(g, EventSource.CurrentThreadActivityId);
87+
es.ExampleStart();
88+
await Task.Yield();
89+
es.ExampleStop();
90+
Assert.Equal(g, EventSource.CurrentThreadActivityId);
91+
}
92+
finally
93+
{
94+
EventSource.SetCurrentThreadActivityId(Guid.Empty);
95+
}
96+
}
97+
98+
// I don't know if this behavior is by-design or accidental. For now
99+
// I am attempting to preserve it to lower back compat risk, but in
100+
// the future we might decide it wasn't even desirable to begin with.
101+
// Compare with SetCurrentActivityIdBeforeEventFlowsAsync above.
102+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
103+
public async Task SetCurrentActivityIdAfterEventDoesNotFlowAsync()
104+
{
105+
using ActivityEventListener l = new ActivityEventListener();
106+
using ActivityEventSource es = new ActivityEventSource();
107+
try
108+
{
109+
es.ExampleStart();
110+
Guid g = Guid.NewGuid();
111+
EventSource.SetCurrentThreadActivityId(g);
112+
Assert.Equal(g, EventSource.CurrentThreadActivityId);
113+
await Task.Yield();
114+
Assert.NotEqual(g, EventSource.CurrentThreadActivityId);
115+
es.ExampleStop();
116+
}
117+
finally
118+
{
119+
EventSource.SetCurrentThreadActivityId(Guid.Empty);
120+
}
121+
}
122+
}
123+
124+
[EventSource(Name = "ActivityEventSource")]
125+
class ActivityEventSource : EventSource
126+
{
127+
[Event(1)]
128+
public void ExampleStart() => WriteEvent(1);
129+
130+
[Event(2)]
131+
public void ExampleStop() => WriteEvent(2);
132+
133+
[Event(3)]
134+
public void Example2Start() => WriteEvent(3);
135+
136+
[Event(4)]
137+
public void Example2Stop() => WriteEvent(4);
138+
}
139+
140+
class ActivityEventListener : EventListener
141+
{
142+
protected override void OnEventSourceCreated(EventSource eventSource)
143+
{
144+
if (eventSource.Name == "System.Threading.Tasks.TplEventSource")
145+
{
146+
EnableEvents(eventSource, EventLevel.LogAlways, (EventKeywords)0x80);
147+
}
148+
else if (eventSource.Name == "ActivityEventSource")
149+
{
150+
EnableEvents(eventSource, EventLevel.Informational);
151+
}
152+
}
153+
}
154+
}

src/libraries/System.Diagnostics.Tracing/tests/System.Diagnostics.Tracing.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
<Compile Include="BasicEventSourceTest\TestsManifestGeneration.Etw.cs" />
1818
</ItemGroup>
1919
<ItemGroup>
20+
<Compile Include="BasicEventSourceTest\ActivityTracking.cs" />
2021
<Compile Include="BasicEventSourceTest\Harness\EventTestHarness.cs" />
2122
<Compile Include="BasicEventSourceTest\FuzzyTests.cs" />
2223
<Compile Include="BasicEventSourceTest\Harness\Listeners.cs" />

src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/ActivityTracker.cs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -550,12 +550,36 @@ private static unsafe void WriteNibble(ref byte* ptr, byte* endPtr, uint value)
550550
// This callback is used to initialize the m_current AsyncLocal Variable.
551551
// Its job is to keep the ETW Activity ID (part of thread local storage) in sync
552552
// with m_current.ActivityID
553+
//
554+
// WARNING: When mixing manual usage of EventSource.SetCurrentThreadActivityID
555+
// and Start/Stop EventSource events I can't identify a clear design how this
556+
// synchronization is intended to work. For example code that changes
557+
// SetCurrentThreadActivityID after a FooStart() event will not flow the
558+
// explicit ID with the async work, but if FooStart() event is called after
559+
// SetCurrentThreadActivityID then the explicit ID change does flow.
560+
// For now I've adopted the approach:
561+
// Priority 1: Make the API predictable/sensible when only Start/Stop events
562+
// are in use.
563+
// Priority 2: If users aren't complaining and it doesn't interfere with
564+
// goal #1, try to preserve the arbitrary/buggy? existing behavior
565+
// for mixed usage of SetActivityID + events.
566+
//
567+
// For scenarios that only use start/stop events this is what we expect:
568+
// calling start -> push new ID on stack and update thread-local to match new ID
569+
// calling stop -> pop ID from stack and update thread-local to match new topmost
570+
// still active ID. If there is none, set ID to zero
571+
// thread swap -> update thread-local to match topmost active ID.
572+
// If there is none, set ID to zero.
553573
private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
554574
{
555575
ActivityInfo? cur = args.CurrentValue;
556576
ActivityInfo? prev = args.PreviousValue;
557577

558-
// Are we popping off a value? (we have a prev, and it creator is cur)
578+
// Special case only relevant for mixed SetActivityID usage:
579+
//
580+
// Are we MAYBE popping off a value? (we have a prev, and it creator is cur)
581+
// We can't be certain this is a pop because a thread swapping between two
582+
// ExecutionContexts can also appear the same way.
559583
// Then check if we should use the GUID at the time of the start event
560584
if (prev != null && prev.m_creator == cur)
561585
{
@@ -569,8 +593,7 @@ private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
569593
}
570594
}
571595

572-
// OK we did not have an explicit SetActivityID set. Then we should be
573-
// setting the activity to current ActivityInfo. However that activity
596+
// Set the activity to current ActivityInfo. However that activity
574597
// might be dead, in which case we should skip it, so we never set
575598
// the ID to dead things.
576599
while (cur != null)
@@ -583,8 +606,10 @@ private void ActivityChanging(AsyncLocalValueChangedArgs<ActivityInfo?> args)
583606
}
584607
cur = cur.m_creator;
585608
}
609+
586610
// we can get here if there is no information on our activity stack (everything is dead)
587-
// currently we do nothing, as that seems better than setting to Guid.Emtpy.
611+
// Set ActivityID to zero
612+
EventSource.SetCurrentThreadActivityId(Guid.Empty);
588613
}
589614

590615
/// <summary>

0 commit comments

Comments
 (0)