forked from prometheus-net/docker_exporter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ContainerTracker.cs
221 lines (192 loc) · 9.29 KB
/
ContainerTracker.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
using Axinom.Toolkit;
using Docker.DotNet;
using Docker.DotNet.Models;
using Prometheus;
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace DockerExporter
{
/// <summary>
/// Tracks the status of one container and exports metrics, updating the data when new scrapes are requested.
/// </summary>
/// <remarks>
/// NOT thread-safe! No concurrent usage is expected.
/// DockerTracker performs the necessary synchronization logic.
/// </remarks>
sealed class ContainerTracker : IDisposable
{
public string Id { get; }
public string DisplayName { get; }
public ContainerTracker(string id, string displayName)
{
Id = id;
DisplayName = displayName;
_metrics = new ContainerTrackerMetrics(displayName);
}
public void Dispose()
{
_resourceMetrics?.Dispose();
_stateMetrics?.Dispose();
}
public void Unpublish()
{
_resourceMetrics?.Unpublish();
_stateMetrics?.Unpublish();
}
/// <summary>
/// Requests the tracker to update its data set.
/// </summary>
/// <remarks>
/// Method does not throw exceptions on transient failures, merely logs and ignores them.
/// </remarks>
public async Task TryUpdateAsync(DockerClient client, CancellationToken cancel)
{
ContainerInspectResponse container;
var resourceStatsRecorder = new StatsRecorder();
try
{
// First, inspect to get some basic information.
using (_metrics.InspectContainerDuration.NewTimer())
container = await client.Containers.InspectContainerAsync(Id, cancel);
// Then query for the latest resource usage stats (if container is running).
if (container.State.Running)
{
using var statsTimer = _metrics.GetResourceStatsDuration.NewTimer();
await client.Containers.GetContainerStatsAsync(Id, new ContainerStatsParameters
{
Stream = false // Only get latest, then stop.
}, resourceStatsRecorder, cancel);
}
}
catch (Exception ex)
{
_metrics.FailedProbeCount.Inc();
_log.Error(Helpers.Debug.GetAllExceptionMessages(ex));
_log.Debug(ex.ToString()); // Only to verbose output.
// Errors are ignored - if we fail to get data, we just skip an update and log the failure.
// The next update will hopefully get past the error. For now, we just unpublish.
Unpublish();
return;
}
// If anything goes wrong below, it is a fatal error not to be ignored, so not in the try block.
// Now that we have the data assembled, update the metrics.
if (_stateMetrics == null)
{
_log.Debug($"First update of state metrics for {DisplayName} ({Id}).");
_stateMetrics = new ContainerTrackerStateMetrics(DisplayName);
}
UpdateStateMetrics(_stateMetrics, container);
if (resourceStatsRecorder.Response != null)
{
if (_resourceMetrics == null)
{
_log.Debug($"Initializing resource metrics for {DisplayName} ({Id}).");
_resourceMetrics = new ContainerTrackerResourceMetrics(DisplayName);
}
UpdateResourceMetrics(_resourceMetrics, container, resourceStatsRecorder.Response);
}
else
{
// It could be we already had resource metrics and now they should go away.
// They'll be recreated once we get the resource metrics again (e.g. after it starts).
_resourceMetrics?.Dispose();
_resourceMetrics = null;
}
}
private void UpdateStateMetrics(ContainerTrackerStateMetrics metrics, ContainerInspectResponse container)
{
metrics.RestartCount.Set(container.RestartCount);
if (container.State.Running)
metrics.RunningState.Set(1);
else if (container.State.Restarting)
metrics.RunningState.Set(0.5);
else
metrics.RunningState.Set(0);
if (container.State.Running && !string.IsNullOrWhiteSpace(container.State.StartedAt))
metrics.StartTime.SetToTimeUtc(DateTimeOffset.Parse(container.State.StartedAt));
}
private void UpdateResourceMetrics(ContainerTrackerResourceMetrics metrics, ContainerInspectResponse container, ContainerStatsResponse resources)
{
// The resource reporting is very different for different operating systems.
// This field is only used on Windows. We assume a container can't exist with 0 memory.
bool isWindowsContainer = resources.MemoryStats.Commit != 0;
// CPU usage
// The mechanism of calculation is the rate of increase in container CPU time versus available ("system") CPU time.
// The idea here is that we build two series - one counting used CPU in whatever units
// the other counting potentially available CPU in whatever units. The % always comes right.
// Docker CPU usage on Windows counts 100ns ticks.
// Docker CPU usage on Linux counts unspecified ticks in relation to some other stats.
// See https://github.com/moby/moby/blob/eb131c5383db8cac633919f82abad86c99bffbe5/cli/command/container/stats_helpers.go#L175
if (isWindowsContainer)
{
// To compensate for core count on Windows, we normalize the container usage to a single core.
// We also normalize the available CPU time to a single core.
// This way the Windows calculation is always per-core averaged.
// A .NET DateTimeOffset tick is 100ns, exactly, so matches what Docker uses.
metrics.CpuCapacity.Set(CpuBaselineTimer.Elapsed.Ticks);
metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage / resources.NumProcs);
}
else
{
// This is counting all cores (right?).
metrics.CpuCapacity.Set(resources.CPUStats.SystemUsage);
metrics.CpuUsage.Set(resources.CPUStats.CPUUsage.TotalUsage);
}
// Memory usage
if (isWindowsContainer)
{
// Windows reports Private Working Set in Docker stats... but seems to use Commit Bytes to enforce limit!
// We want to report the same metric that is limited, so there we go.
metrics.MemoryUsage.Set(resources.MemoryStats.Commit);
}
else
{
metrics.MemoryUsage.Set(resources.MemoryStats.Usage);
}
// Network I/O
if (resources.Networks == null)
{
metrics.TotalNetworkBytesIn.Set(0);
metrics.TotalNetworkBytesOut.Set(0);
}
else
{
metrics.TotalNetworkBytesIn.Set(resources.Networks.Values.Sum(n => (double)n.RxBytes));
metrics.TotalNetworkBytesOut.Set(resources.Networks.Values.Sum(n => (double)n.TxBytes));
}
// Disk I/O
if (isWindowsContainer)
{
metrics.TotalDiskBytesRead.Set(resources.StorageStats.ReadSizeBytes);
metrics.TotalDiskBytesWrite.Set(resources.StorageStats.WriteSizeBytes);
}
else
{
var readEntries = resources.BlkioStats.IoServiceBytesRecursive?
.Where(entry => entry.Op.Equals("read", StringComparison.InvariantCultureIgnoreCase))
.ToArray();
var writeEntries = resources.BlkioStats.IoServiceBytesRecursive?
.Where(entry => entry.Op.Equals("write", StringComparison.InvariantCultureIgnoreCase))
.ToArray();
var totalRead = readEntries == null ? 0 : readEntries.Any() ? readEntries.Sum(entry => (long)entry.Value) : 0;
var totalWrite = writeEntries == null ? 0 : writeEntries.Any() ? writeEntries.Sum(entry => (long)entry.Value) : 0;
metrics.TotalDiskBytesRead.Set(totalRead);
metrics.TotalDiskBytesWrite.Set(totalWrite);
}
}
private sealed class StatsRecorder : IProgress<ContainerStatsResponse>
{
public ContainerStatsResponse? Response { get; private set; }
public void Report(ContainerStatsResponse value) => Response = value;
}
// We just need a monotonically increasing timer that does not use excessively large numbers (no 1970 base).
private static readonly Stopwatch CpuBaselineTimer = Stopwatch.StartNew();
private ContainerTrackerMetrics _metrics;
private ContainerTrackerStateMetrics? _stateMetrics;
private ContainerTrackerResourceMetrics? _resourceMetrics;
private readonly LogSource _log = Log.Default;
}
}