Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(respondd): ping node to be sure if node is offline #190

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ As root:
go get -v -u github.com/FreifunkBremen/yanic
```

### allow to ping
only needed if config has `nodes.ping_count` > 0
```sh
sudo setcap cap_net_raw=+ep /opt/go/bin/yanic
```

#### Work with other databases
If you like to use another database solution than influxdb, Pull Requests are
welcome. Just fork this project and create another subpackage within the folder
Expand Down
6 changes: 6 additions & 0 deletions config_example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ save_interval = "5s"
# Set node to offline if not seen within this period
offline_after = "10m"

## Verify if node is really down by ping last seen address of node
# send x pings to verify if node is offline (for disable set count < 1)
ping_count = 3
# timeout of sending ping to a node
ping_timeout = "1s"


## [[nodes.output.example]]
# Each output format has its own config block and needs to be enabled by adding:
Expand Down
1 change: 1 addition & 0 deletions database/graphite/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s
func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric {
return []graphigo.Metric{
{Name: name + ".nodes", Value: stats.Nodes},
{Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd},
{Name: name + ".gateways", Value: stats.Gateways},
{Name: name + ".clients.total", Value: stats.Clients},
{Name: name + ".clients.wifi", Value: stats.ClientsWifi},
Expand Down
13 changes: 7 additions & 6 deletions database/influxdb/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time
// GlobalStatsFields returns fields for InfluxDB
func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} {
return map[string]interface{}{
"nodes": stats.Nodes,
"gateways": stats.Gateways,
"clients.total": stats.Clients,
"clients.wifi": stats.ClientsWifi,
"clients.wifi24": stats.ClientsWifi24,
"clients.wifi5": stats.ClientsWifi5,
"nodes": stats.Nodes,
"nodes.no_respondd": stats.NodesNoRespondd,
"gateways": stats.Gateways,
"clients.total": stats.Clients,
"clients.wifi": stats.ClientsWifi,
"clients.wifi24": stats.ClientsWifi24,
"clients.wifi5": stats.ClientsWifi5,
}
}

Expand Down
2 changes: 1 addition & 1 deletion database/logging/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) {
}

func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) {
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models))
conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models))
}

func (conn *Connection) PruneNodes(deleteAfter time.Duration) {
Expand Down
22 changes: 22 additions & 0 deletions docs/docs_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json"
prune_after = "7d"
save_interval = "5s"
offline_after = "10m"
ping_count = 3
ping_timeout = "1s"
```
{% endmethod %}

Expand Down Expand Up @@ -246,6 +248,26 @@ offline_after = "10m"
```
{% endmethod %}

### ping_count
{% method %}
Verify if node is really down by ping last seen address of node
send x pings to verify if node is offline (for disable set count < 1)
{% sample lang="toml" %}
```toml
ping_count = 3
```
{% endmethod %}


### ping_timeout
{% method %}
Timeout of sending ping to a node
{% sample lang="toml" %}
```toml
ping_timeout = "1s"
```
{% endmethod %}


## [[nodes.output.example]]
{% method %}
Expand Down
6 changes: 6 additions & 0 deletions docs/docs_install.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ As root:
go get -v -u github.com/FreifunkBremen/yanic
```

### allow to ping
only needed if config has `nodes.ping_count` > 0
```sh
sudo setcap cap_net_raw=+ep /opt/go/bin/yanic
```

### Install

```sh
Expand Down
1 change: 1 addition & 0 deletions runtime/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type Node struct {
Firstseen jsontime.Time `json:"firstseen"`
Lastseen jsontime.Time `json:"lastseen"`
Online bool `json:"online"`
NoRespondd bool `json:"no_respondd"`
Statistics *data.Statistics `json:"statistics"`
Nodeinfo *data.Nodeinfo `json:"nodeinfo"`
Neighbours *data.Neighbours `json:"-"`
Expand Down
27 changes: 25 additions & 2 deletions runtime/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/bdlm/log"
ping "github.com/digineo/go-ping"

"github.com/FreifunkBremen/yanic/data"
"github.com/FreifunkBremen/yanic/lib/jsontime"
Expand All @@ -17,6 +18,7 @@ type Nodes struct {
List map[string]*Node `json:"nodes"` // the current nodemap, indexed by node ID
ifaceToNodeID map[string]string // mapping from MAC address to NodeID
config *NodesConfig
pinger *ping.Pinger
sync.RWMutex
}

Expand All @@ -27,6 +29,11 @@ func NewNodes(config *NodesConfig) *Nodes {
ifaceToNodeID: make(map[string]string),
config: config,
}
p, err := ping.New("", "::")
if err != nil {
log.Warnf("ping bind failed: %s", err)
}
nodes.pinger = p

if config.StatePath != "" {
nodes.load()
Expand Down Expand Up @@ -80,9 +87,10 @@ func (nodes *Nodes) Update(nodeID string, res *data.ResponseData) *Node {
// Update fields
node.Lastseen = now
node.Online = true
node.Neighbours = res.Neighbours
node.Nodeinfo = res.Nodeinfo
node.Statistics = res.Statistics
node.Neighbours = res.Neighbours
node.NoRespondd = false

return node
}
Expand Down Expand Up @@ -170,15 +178,29 @@ func (nodes *Nodes) expire() {
nodes.Lock()
defer nodes.Unlock()

wg := sync.WaitGroup{}

for id, node := range nodes.List {
if node.Lastseen.Before(pruneAfter) {
// expire
delete(nodes.List, id)
} else if node.Lastseen.Before(offlineAfter) {
// set to offline
node.Online = false
wg.Add(1)
go func(node *Node) {
defer wg.Done()
if nodes.config.PingCount > 0 && nodes.ping(node) {
node.Online = true
node.NoRespondd = true
} else {
node.Online = false
node.NoRespondd = false
}
}(node)
}
}
wg.Wait()
log.WithField("nodes", "expire").Info("end")
}

// adds the nodes interface addresses to the internal map
Expand Down Expand Up @@ -240,6 +262,7 @@ func (nodes *Nodes) save() {

// serialize nodes
SaveJSON(nodes, nodes.config.StatePath)
log.WithField("nodes", "save").Info("end")
}

// SaveJSON to path
Expand Down
2 changes: 2 additions & 0 deletions runtime/nodes_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ type NodesConfig struct {
SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically
OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period
PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity
PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1)
PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node
Output map[string]interface{}
}
39 changes: 39 additions & 0 deletions runtime/nodes_ping.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package runtime

import (
"net"

"github.com/bdlm/log"
)

func (nodes *Nodes) ping(node *Node) bool {
logNode := log.WithField("node_id", "unknown")
if node.Nodeinfo != nil {
logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID)
}
var addr *net.IPAddr
if node.Address != nil {
addr = &net.IPAddr{IP:node.Address.IP, Zone: node.Address.Zone}
} else {
logNode.Debug("error no address found")
if node.Nodeinfo != nil {
for _, addrMaybeString := range node.Nodeinfo.Network.Addresses {
if len(addrMaybeString) >= 5 && addrMaybeString[:5] != "fe80:" {
addrMaybe, err := net.ResolveIPAddr("ip6", addrMaybeString)
if err == nil {
addr = addrMaybe
}
}
}
}
}

logAddr := logNode.WithField("addr", addr.String())

_, err := nodes.pinger.PingAttempts(addr, nodes.config.PingTimeout.Duration, nodes.config.PingCount)

logAddr.WithFields(map[string]interface{}{
"success": err == nil,
}).Debug("pong")
return err == nil
}
44 changes: 44 additions & 0 deletions runtime/nodes_ping_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package runtime

import (
"net"
"testing"
"time"

"github.com/bdlm/log"
"github.com/stretchr/testify/assert"

"github.com/FreifunkBremen/yanic/data"
)

func TestPing(t *testing.T) {
log.SetLevel(log.DebugLevel)

assert := assert.New(t)
config := &NodesConfig{
PingCount: 1,
}
config.OfflineAfter.Duration = time.Minute * 10
// to get default (100%) path of testing
// config.PruneAfter.Duration = time.Hour * 24 * 6
nodes := &Nodes{
config: config,
List: make(map[string]*Node),
ifaceToNodeID: make(map[string]string),
}

node := nodes.Update("expire", &data.ResponseData{Nodeinfo: &data.Nodeinfo{
NodeID: "nodeID-Lola",
Network: data.Network{Addresses: []string{"fe80::1", "fd2f::1"}},
}})
// get fallback
assert.False(nodes.ping(node))

node.Address = &net.UDPAddr{Zone: "bat0"}
// error during ping
assert.False(nodes.ping(node))

node.Address.IP = net.ParseIP("fe80::1")
// error during ping
assert.False(nodes.ping(node))
}
16 changes: 10 additions & 6 deletions runtime/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ type CounterMap map[string]uint32

// GlobalStats struct
type GlobalStats struct {
Clients uint32
ClientsWifi uint32
ClientsWifi24 uint32
ClientsWifi5 uint32
Gateways uint32
Nodes uint32
Clients uint32
ClientsWifi uint32
ClientsWifi24 uint32
ClientsWifi5 uint32
Gateways uint32
Nodes uint32
NodesNoRespondd uint32

Firmwares CounterMap
Models CounterMap
Expand Down Expand Up @@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) {
s.ClientsWifi5 += stats.Clients.Wifi5
s.ClientsWifi += stats.Clients.Wifi
}
if node.NoRespondd {
s.NodesNoRespondd++
}
if node.IsGateway() {
s.Gateways++
}
Expand Down
4 changes: 3 additions & 1 deletion runtime/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) {
//check GLOBAL_SITE stats
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways)
assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes)
assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd)
assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients)

// check models
Expand Down Expand Up @@ -98,7 +99,8 @@ func createTestNodes() *Nodes {
nodes.AddNode(nodeData)

nodes.AddNode(&Node{
Online: true,
Online: true,
NoRespondd: true,
Statistics: &data.Statistics{
Clients: data.Clients{
Total: 2,
Expand Down