Skip to content

Commit 7cb66b3

Browse files
committed
Refactor to implement a custom Collector to track TunnelTime.
1 parent 4112238 commit 7cb66b3

File tree

2 files changed

+101
-142
lines changed

2 files changed

+101
-142
lines changed

cmd/outline-ss-server/metrics.go

+96-104
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,14 @@ import (
2828
"github.com/prometheus/client_golang/prometheus"
2929
)
3030

31-
// How often to report the active IP key TunnelTime.
32-
const tunnelTimeTrackerReportingInterval = 5 * time.Second
31+
const namespace = "shadowsocks"
3332

3433
// `now` is stubbable for testing.
3534
var now = time.Now
3635

3736
type outlineMetrics struct {
3837
ipinfo.IPInfoMap
39-
tunnelTimeTracker
38+
tunnelTimeCollector
4039

4140
buildInfo *prometheus.GaugeVec
4241
accessKeys prometheus.Gauge
@@ -46,9 +45,6 @@ type outlineMetrics struct {
4645
timeToCipherMs *prometheus.HistogramVec
4746
// TODO: Add time to first byte.
4847

49-
TunnelTimePerKey *prometheus.CounterVec
50-
TunnelTimePerLocation *prometheus.CounterVec
51-
5248
tcpProbes *prometheus.HistogramVec
5349
tcpOpenConnections *prometheus.CounterVec
5450
tcpClosedConnections *prometheus.CounterVec
@@ -72,97 +68,110 @@ func toIPKey(addr net.Addr, accessKey string) (*IPKey, error) {
7268
return &IPKey{ip, accessKey}, nil
7369
}
7470

75-
type ReportTunnelTimeFunc func(IPKey, ipinfo.IPInfo, time.Duration)
76-
71+
// Represents the clients that are or have been active recently. They stick
72+
// around until they are inactive, or get reported to Prometheus, whichever
73+
// comes last.
7774
type activeClient struct {
78-
clientInfo ipinfo.IPInfo
79-
connectionCount int
80-
startTime time.Time
75+
info ipinfo.IPInfo
76+
connCount int // The active connection count.
77+
startTime time.Time
78+
connDuration time.Duration // If the client has become inactive, this holds the connection duration.
8179
}
8280

8381
type IPKey struct {
8482
ip netip.Addr
8583
accessKey string
8684
}
8785

88-
type tunnelTimeTracker struct {
86+
type tunnelTimeCollector struct {
8987
ipinfo.IPInfoMap
90-
mu sync.Mutex // Protects the activeClients map.
91-
activeClients map[IPKey]*activeClient
92-
reportTunnelTime ReportTunnelTimeFunc
88+
mu sync.Mutex // Protects the activeClients map.
89+
activeClients map[IPKey]*activeClient
90+
91+
tunnelTimePerKey *prometheus.Desc
92+
tunnelTimePerLocation *prometheus.Desc
9393
}
9494

95-
// Reports time connected for all active clients, called at a regular interval.
96-
func (t *tunnelTimeTracker) reportAll(now time.Time) {
97-
if len(t.activeClients) == 0 {
98-
logger.Debugf("No active clients. No TunnelTime activity to report.")
99-
return
100-
}
101-
t.mu.Lock()
102-
defer t.mu.Unlock()
103-
for ipKey, c := range t.activeClients {
104-
t.reportDuration(ipKey, c, now)
105-
}
95+
func (c *tunnelTimeCollector) Describe(ch chan<- *prometheus.Desc) {
96+
ch <- c.tunnelTimePerKey
97+
ch <- c.tunnelTimePerLocation
10698
}
10799

108-
// Reports time connected for a given active client.
109-
func (t *tunnelTimeTracker) reportDuration(ipKey IPKey, c *activeClient, tNow time.Time) {
110-
connDuration := tNow.Sub(c.startTime)
111-
logger.Debugf("Reporting activity for key `%v`, duration: %v", ipKey.accessKey, connDuration)
112-
t.reportTunnelTime(ipKey, c.clientInfo, connDuration)
113-
// Reset the start time now that it's been reported.
114-
c.startTime = tNow
100+
// Collects time connected for all active clients.
101+
func (c *tunnelTimeCollector) Collect(ch chan<- prometheus.Metric) {
102+
c.mu.Lock()
103+
defer c.mu.Unlock()
104+
tNow := now()
105+
for ipKey, client := range c.activeClients {
106+
var connDuration = client.connDuration
107+
if client.connCount > 0 {
108+
connDuration += tNow.Sub(client.startTime)
109+
}
110+
logger.Debugf("Reporting activity for key `%v`, duration: %v", ipKey.accessKey, connDuration)
111+
ch <- prometheus.MustNewConstMetric(c.tunnelTimePerKey, prometheus.CounterValue, connDuration.Seconds(), ipKey.accessKey)
112+
ch <- prometheus.MustNewConstMetric(c.tunnelTimePerLocation, prometheus.CounterValue, connDuration.Seconds(), client.info.CountryCode.String(), asnLabel(client.info.ASN))
113+
if client.connCount == 0 {
114+
delete(c.activeClients, ipKey)
115+
continue
116+
}
117+
// Reset the start time now that it's been reported.
118+
client.startTime = tNow
119+
client.connDuration = 0
120+
}
115121
}
116122

117123
// Registers a new active connection for a client [net.Addr] and access key.
118-
func (t *tunnelTimeTracker) startConnection(ipKey IPKey) {
119-
t.mu.Lock()
120-
defer t.mu.Unlock()
121-
c, exists := t.activeClients[ipKey]
124+
func (c *tunnelTimeCollector) startConnection(ipKey IPKey) {
125+
c.mu.Lock()
126+
defer c.mu.Unlock()
127+
client, exists := c.activeClients[ipKey]
122128
if !exists {
123-
clientInfo, _ := ipinfo.GetIPInfoFromIP(t.IPInfoMap, net.IP(ipKey.ip.AsSlice()))
124-
// Initialize the TunnelTime for this IPKey with default value of 0:
125-
// https://prometheus.io/docs/practices/instrumentation/#avoid-missing-metrics
126-
t.reportTunnelTime(ipKey, clientInfo, 0)
127-
c = &activeClient{
128-
clientInfo: clientInfo,
129-
startTime: now(),
130-
}
129+
clientInfo, _ := ipinfo.GetIPInfoFromIP(c.IPInfoMap, net.IP(ipKey.ip.AsSlice()))
130+
client = &activeClient{info: clientInfo}
131131
}
132-
c.connectionCount++
133-
t.activeClients[ipKey] = c
132+
if client.connCount == 0 {
133+
// This client is new or was recently stopped (before it was measured and
134+
// deleted by the metrics reporter). Reset the start time so we are
135+
// accurately capturing this new session.
136+
client.startTime = now()
137+
}
138+
client.connCount++
139+
c.activeClients[ipKey] = client
134140
}
135141

136142
// Removes an active connection for a client [net.Addr] and access key.
137-
func (t *tunnelTimeTracker) stopConnection(ipKey IPKey) {
138-
t.mu.Lock()
139-
defer t.mu.Unlock()
140-
c, exists := t.activeClients[ipKey]
143+
func (c *tunnelTimeCollector) stopConnection(ipKey IPKey) {
144+
c.mu.Lock()
145+
defer c.mu.Unlock()
146+
client, exists := c.activeClients[ipKey]
141147
if !exists {
142148
logger.Warningf("Failed to find active client")
143149
return
144150
}
145-
c.connectionCount--
146-
if c.connectionCount <= 0 {
147-
t.reportDuration(ipKey, c, now())
148-
delete(t.activeClients, ipKey)
149-
return
151+
client.connCount--
152+
if client.connCount == 0 {
153+
client.connDuration = now().Sub(client.startTime)
150154
}
151155
}
152156

153-
func newTunnelTimeTracker(ip2info ipinfo.IPInfoMap, report ReportTunnelTimeFunc) *tunnelTimeTracker {
154-
tracker := &tunnelTimeTracker{
155-
IPInfoMap: ip2info,
156-
activeClients: make(map[IPKey]*activeClient),
157-
reportTunnelTime: report,
157+
func newTunnelTimeTracker(ip2info ipinfo.IPInfoMap, registerer prometheus.Registerer) *tunnelTimeCollector {
158+
c := &tunnelTimeCollector{
159+
IPInfoMap: ip2info,
160+
activeClients: make(map[IPKey]*activeClient),
161+
162+
tunnelTimePerKey: prometheus.NewDesc(
163+
prometheus.BuildFQName(namespace, "", "tunnel_time_seconds"),
164+
"Time at least 1 connection was open for a (IP, access key) pair, per key.",
165+
[]string{"access_key"}, nil,
166+
),
167+
tunnelTimePerLocation: prometheus.NewDesc(
168+
prometheus.BuildFQName(namespace, "", "tunnel_time_seconds_per_location"),
169+
"Time at least 1 connection was open for a (IP, access key) pair, per location.",
170+
[]string{"location", "asn"}, nil,
171+
),
158172
}
159-
ticker := time.NewTicker(tunnelTimeTrackerReportingInterval)
160-
go func() {
161-
for t := range ticker.C {
162-
tracker.reportAll(t)
163-
}
164-
}()
165-
return tracker
173+
registerer.MustRegister(c)
174+
return c
166175
}
167176

168177
// newPrometheusOutlineMetrics constructs a metrics object that uses
@@ -173,41 +182,41 @@ func newPrometheusOutlineMetrics(ip2info ipinfo.IPInfoMap, registerer prometheus
173182
m := &outlineMetrics{
174183
IPInfoMap: ip2info,
175184
buildInfo: prometheus.NewGaugeVec(prometheus.GaugeOpts{
176-
Namespace: "shadowsocks",
185+
Namespace: namespace,
177186
Name: "build_info",
178187
Help: "Information on the outline-ss-server build",
179188
}, []string{"version"}),
180189
accessKeys: prometheus.NewGauge(prometheus.GaugeOpts{
181-
Namespace: "shadowsocks",
190+
Namespace: namespace,
182191
Name: "keys",
183192
Help: "Count of access keys",
184193
}),
185194
ports: prometheus.NewGauge(prometheus.GaugeOpts{
186-
Namespace: "shadowsocks",
195+
Namespace: namespace,
187196
Name: "ports",
188197
Help: "Count of open Shadowsocks ports",
189198
}),
190199
tcpProbes: prometheus.NewHistogramVec(prometheus.HistogramOpts{
191-
Namespace: "shadowsocks",
200+
Namespace: namespace,
192201
Name: "tcp_probes",
193202
Buckets: []float64{0, 49, 50, 51, 73, 91},
194203
Help: "Histogram of number of bytes from client to proxy, for detecting possible probes",
195204
}, []string{"port", "status", "error"}),
196205
tcpOpenConnections: prometheus.NewCounterVec(prometheus.CounterOpts{
197-
Namespace: "shadowsocks",
206+
Namespace: namespace,
198207
Subsystem: "tcp",
199208
Name: "connections_opened",
200209
Help: "Count of open TCP connections",
201210
}, []string{"location", "asn"}),
202211
tcpClosedConnections: prometheus.NewCounterVec(prometheus.CounterOpts{
203-
Namespace: "shadowsocks",
212+
Namespace: namespace,
204213
Subsystem: "tcp",
205214
Name: "connections_closed",
206215
Help: "Count of closed TCP connections",
207216
}, []string{"location", "asn", "status", "access_key"}),
208217
tcpConnectionDurationMs: prometheus.NewHistogramVec(
209218
prometheus.HistogramOpts{
210-
Namespace: "shadowsocks",
219+
Namespace: namespace,
211220
Subsystem: "tcp",
212221
Name: "connection_duration_ms",
213222
Help: "TCP connection duration distributions.",
@@ -220,63 +229,52 @@ func newPrometheusOutlineMetrics(ip2info ipinfo.IPInfoMap, registerer prometheus
220229
float64(7 * 24 * time.Hour.Milliseconds()), // Week
221230
},
222231
}, []string{"status"}),
223-
TunnelTimePerKey: prometheus.NewCounterVec(prometheus.CounterOpts{
224-
Namespace: "shadowsocks",
225-
Name: "tunnel_time_seconds",
226-
Help: "Time at least 1 connection was open for a (IP, access key) pair, per key",
227-
}, []string{"access_key"}),
228-
TunnelTimePerLocation: prometheus.NewCounterVec(prometheus.CounterOpts{
229-
Namespace: "shadowsocks",
230-
Name: "tunnel_time_seconds_per_location",
231-
Help: "Time at least 1 connection was open for a (IP, access key) pair, per location",
232-
}, []string{"location", "asn"}),
233232
dataBytes: prometheus.NewCounterVec(
234233
prometheus.CounterOpts{
235-
Namespace: "shadowsocks",
234+
Namespace: namespace,
236235
Name: "data_bytes",
237236
Help: "Bytes transferred by the proxy, per access key",
238237
}, []string{"dir", "proto", "access_key"}),
239238
dataBytesPerLocation: prometheus.NewCounterVec(
240239
prometheus.CounterOpts{
241-
Namespace: "shadowsocks",
240+
Namespace: namespace,
242241
Name: "data_bytes_per_location",
243242
Help: "Bytes transferred by the proxy, per location",
244243
}, []string{"dir", "proto", "location", "asn"}),
245244
timeToCipherMs: prometheus.NewHistogramVec(
246245
prometheus.HistogramOpts{
247-
Namespace: "shadowsocks",
246+
Namespace: namespace,
248247
Name: "time_to_cipher_ms",
249248
Help: "Time needed to find the cipher",
250249
Buckets: []float64{0.1, 1, 10, 100, 1000},
251250
}, []string{"proto", "found_key"}),
252251
udpPacketsFromClientPerLocation: prometheus.NewCounterVec(
253252
prometheus.CounterOpts{
254-
Namespace: "shadowsocks",
253+
Namespace: namespace,
255254
Subsystem: "udp",
256255
Name: "packets_from_client_per_location",
257256
Help: "Packets received from the client, per location and status",
258257
}, []string{"location", "asn", "status"}),
259258
udpAddedNatEntries: prometheus.NewCounter(
260259
prometheus.CounterOpts{
261-
Namespace: "shadowsocks",
260+
Namespace: namespace,
262261
Subsystem: "udp",
263262
Name: "nat_entries_added",
264263
Help: "Entries added to the UDP NAT table",
265264
}),
266265
udpRemovedNatEntries: prometheus.NewCounter(
267266
prometheus.CounterOpts{
268-
Namespace: "shadowsocks",
267+
Namespace: namespace,
269268
Subsystem: "udp",
270269
Name: "nat_entries_removed",
271270
Help: "Entries removed from the UDP NAT table",
272271
}),
273272
}
274-
m.tunnelTimeTracker = *newTunnelTimeTracker(ip2info, m.addTunnelTime)
273+
m.tunnelTimeCollector = *newTunnelTimeTracker(ip2info, registerer)
275274

276275
// TODO: Is it possible to pass where to register the collectors?
277276
registerer.MustRegister(m.buildInfo, m.accessKeys, m.ports, m.tcpProbes, m.tcpOpenConnections, m.tcpClosedConnections, m.tcpConnectionDurationMs,
278-
m.dataBytes, m.dataBytesPerLocation, m.timeToCipherMs, m.udpPacketsFromClientPerLocation, m.udpAddedNatEntries, m.udpRemovedNatEntries,
279-
m.TunnelTimePerKey, m.TunnelTimePerLocation)
277+
m.dataBytes, m.dataBytesPerLocation, m.timeToCipherMs, m.udpPacketsFromClientPerLocation, m.udpAddedNatEntries, m.udpRemovedNatEntries)
280278
return m
281279
}
282280

@@ -293,16 +291,10 @@ func (m *outlineMetrics) AddOpenTCPConnection(clientInfo ipinfo.IPInfo) {
293291
m.tcpOpenConnections.WithLabelValues(clientInfo.CountryCode.String(), asnLabel(clientInfo.ASN)).Inc()
294292
}
295293

296-
// Reports total time connected (i.e. TunnelTime), by access key and by country.
297-
func (m *outlineMetrics) addTunnelTime(ipKey IPKey, clientInfo ipinfo.IPInfo, duration time.Duration) {
298-
m.TunnelTimePerKey.WithLabelValues(ipKey.accessKey).Add(duration.Seconds())
299-
m.TunnelTimePerLocation.WithLabelValues(clientInfo.CountryCode.String(), asnLabel(clientInfo.ASN)).Add(duration.Seconds())
300-
}
301-
302294
func (m *outlineMetrics) AddAuthenticatedTCPConnection(clientAddr net.Addr, accessKey string) {
303295
ipKey, err := toIPKey(clientAddr, accessKey)
304296
if err == nil {
305-
m.tunnelTimeTracker.startConnection(*ipKey)
297+
m.tunnelTimeCollector.startConnection(*ipKey)
306298
}
307299
}
308300

@@ -334,7 +326,7 @@ func (m *outlineMetrics) AddClosedTCPConnection(clientInfo ipinfo.IPInfo, client
334326

335327
ipKey, err := toIPKey(clientAddr, accessKey)
336328
if err == nil {
337-
m.tunnelTimeTracker.stopConnection(*ipKey)
329+
m.tunnelTimeCollector.stopConnection(*ipKey)
338330
}
339331
}
340332

@@ -358,7 +350,7 @@ func (m *outlineMetrics) AddUDPNatEntry(clientAddr net.Addr, accessKey string) {
358350

359351
ipKey, err := toIPKey(clientAddr, accessKey)
360352
if err == nil {
361-
m.tunnelTimeTracker.startConnection(*ipKey)
353+
m.tunnelTimeCollector.startConnection(*ipKey)
362354
}
363355
}
364356

@@ -367,7 +359,7 @@ func (m *outlineMetrics) RemoveUDPNatEntry(clientAddr net.Addr, accessKey string
367359

368360
ipKey, err := toIPKey(clientAddr, accessKey)
369361
if err == nil {
370-
m.tunnelTimeTracker.stopConnection(*ipKey)
362+
m.tunnelTimeCollector.stopConnection(*ipKey)
371363
}
372364
}
373365

0 commit comments

Comments
 (0)