forked from blockpane/tenderduty
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example-config.yml
137 lines (120 loc) · 5.59 KB
/
example-config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
# controls whether the dashboard is enabled.
enable_dashboard: yes
# What TCP port the dashboard will listen on. Only the port is controllable for now.
listen_port: 8888
# hide_logs is useful if the dashboard will be posted publicly. It disables the log feed,
# and obscures most node-related details. Be aware this isn't fully vetted for preventing
# info leaks about node names, etc.
hide_logs: no
# How long to wait before alerting that a node is down.
node_down_alert_minutes: 3
# Node Down alert Pagerduty Severity
node_down_alert_severity: critical
# Should the prometheus exporter be enabled?
prometheus_enabled: yes
# What port should it listen on? For now only port is configurable.
prometheus_listen_port: 28686
# Global setting for pagerduty
pagerduty:
# Should we use PD? Be aware that if this is set to no it overrides individual chain alerting settings.
enabled: no
# This is an API key, not oauth token, more details to follow, but check the v1 docs for more info
api_key: aaaaaaaaaaaabbbbbbbbbbbbbcccccccccccc
# Not currently used, but will be soon. This allows setting escalation priorities etc.
default_severity: alert
# Discord settings
discord:
# Alert to discord?
enabled: no
# The webhook is set by right-clicking on a channel, editing the settings, and configuring a webhook in the intergrations section.
webhook: https://discord.com/api/webhooks/999999999999999999/zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
# Telegram settings
telegram:
# Alert via telegram? Note: also supersedes chain-specific settings
enabled: no
# API key ... talk to @BotFather
api_key: "5555555555:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
# The group ID for the chat where messages will be sent. Google how to find this, will include better info later.
channel: "-666666666"
# Slack settings
slack:
# Send alerts to Slack?
enabled: no
# The webhook can be added in the Slack app directory.
webhook: https://hooks.slack.com/services/AAAAAAAAAAAAAAAAAAAAAAA/bbbbbbbbbbbbbbbbbbbbbbbb
# Healthcheck settings (dead man's switch)
healthcheck:
# Send pings to determine if the monitor is running?
enabled: no
# URL to send pings to.
ping_url: https://hc-ping.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee
# Rate in which pings are sent in seconds.
ping_rate: 60
# The various chains to be monitored. Create a new entry for each chain. The name itself can be arbitrary, but a
# user-friendly name is recommended.
chains:
# The user-friendly name that will be used for labels. Highly suggest wrapping in quotes.
"Osmosis":
# chain_id is validated for a match when connecting to an RPC endpoint, also used as a label in several places.
chain_id: osmosis-1
# Hooray, in v2 we derive the valcons from abci queries so you don't have to jump through hoops to figure out how
# to convert ed25519 keys to the appropriate bech32 address.
# Use valcons address if using ICS
valoper_address: osmovaloper1xxxxxxx...
# Should the monitor revert to using public API endpoints if all supplied RCP nodes fail?
# This isn't always reliable, not all public nodes have websocket proxying setup correctly.
public_fallback: no
# Controls various alert settings for each chain.
alerts:
# If the chain stops seeing new blocks, should an alert be sent?
stalled_enabled: yes
# How long a halted chain takes in minutes to generate an alarm
stalled_minutes: 10
# Most basic alarm, you just missed x blocks ... would you like to know?
consecutive_enabled: yes
# How many missed blocks should trigger a notification?
consecutive_missed: 5
# Consecutive Missed alert Pagerduty Severity
consecutive_priority: critical
# For each chain there is a specific window of blocks and a percentage of missed blocks that will result in
# a downtime jail infraction. Should an alert be sent if a certain percentage of this window is exceeded?
percentage_enabled: no
# What percentage should trigger the alert
percentage_missed: 10
# Percentage Missed alert Pagerduty Severity
percentage_priority: warning
# Should an alert be sent if the validator is not in the active set ie, jailed,
# tombstoned, unbonding?
alert_if_inactive: yes
# Should an alert be sent if no RPC servers are responding? (Note this alarm is instantaneous with no delay)
alert_if_no_servers: yes
# for this *specific* chain it's possible to override alert settings. If the api_key or webhook addresses are empty,
# the global settings will be used. Note, enabled must be set both globally and for each chain.
# Chain specific setting for pagerduty
pagerduty:
enabled: yes
api_key: "" # uses default if blank
# Discord settings
discord:
enabled: yes
webhook: "" # uses default if blank
# Telegram settings
telegram:
enabled: yes
api_key: "" # uses default if blank
channel: "" # uses default if blank
# Slack settings
slack:
enabled: yes
webhook: "" # uses default if blank
# This section covers our RPC providers. No LCD (aka REST) endpoints are used, only TM's RPC endpoints
# Multiple hosts are encouraged, and will be tried sequentially until a working endpoint is discovered.
nodes:
# URL for the endpoint. Must include protocol://hostname:port
- url: tcp://localhost:26657
# Should we send an alert if this host isn't responding?
alert_if_down: yes
# repeat hosts for monitoring redundancy
- url: https://some-other-node:443
alert_if_down: no