diff --git a/.version b/.version index 60acae26..5f8379dc 100644 --- a/.version +++ b/.version @@ -1 +1 @@ -version=0.5.3 +version=0.5.4 diff --git a/CHANGELOG.md b/CHANGELOG.md index ce4b781f..96956500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,13 @@ # Changelog -## [unreleased]((https://github.com/NodeFactoryIo/vedran/tree/HEAD)) -[Full Changelog](https://github.com/NodeFactoryIo/vedran/compare/v0.5.3...HEAD) +## [v0.5.4]((https://github.com/NodeFactoryIo/vedran/tree/v0.5.4)) +[Full Changelog](https://github.com/NodeFactoryIo/vedran/compare/v0.5.3...v0.5.4) ### Added ### Fix - Fix tunnel tcp connections not closing after requests finish [\#197](https://github.com/NodeFactoryIo/vedran/pull/197) ([mpetrun5](https://github.com/mpetrun5)) +- Ping handling upgrade [\#198](https://github.com/NodeFactoryIo/vedran/pull/198) ([MakMuftic](https://github.com/MakMuftic)) ### Changed diff --git a/docker-compose.yml b/docker-compose.yml index fc6346cf..dc1eb265 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,11 +26,11 @@ services: container_name: "vedran" vedran-daemon: - image: nodefactory/vedran-daemon:v0.3.2 + image: nodefactory/vedran-daemon:v0.3.4 depends_on: - vedran - polkadot - command: --id test-id --lb http://vedran:4000 --node-rpc http://polkadot:9933 --node-ws http://polkadot:9944 --node-metrics http://polkadot:9615 --payout-address 1Z4GTfUN2QHkSeHqdBUDawgbEWzqtfRG8ouJZ26z3cm7ePN --log-level info + command: --id test-id --lb http://vedran:4000 --node-rpc http://polkadot:9933 --node-ws ws://polkadot:9944 --node-metrics http://polkadot:9615 --payout-address 1Z4GTfUN2QHkSeHqdBUDawgbEWzqtfRG8ouJZ26z3cm7ePN --log-level info prometheus: image: prom/prometheus diff --git a/internal/controllers/ping.go b/internal/controllers/ping.go index 021c5db4..bd3fecc2 100644 --- a/internal/controllers/ping.go +++ b/internal/controllers/ping.go @@ -10,7 +10,7 @@ import ( log "github.com/sirupsen/logrus" ) -const pingOffset = 5 +const pingOffset = 8 func (c ApiController) PingHandler(w http.ResponseWriter, r *http.Request) { request := r.Context().Value(auth.RequestContextKey).(*auth.RequestContext) @@ -20,18 +20,24 @@ func (c ApiController) PingHandler(w http.ResponseWriter, r *http.Request) { log.Errorf("Unable to calculate node downtime, error: %v", err) } - if math.Abs(downtimeDuration.Seconds()) > (stats.PingIntervalInSeconds + pingOffset) { - downtime := models.Downtime{ - Start: lastPingTime, - End: request.Timestamp, - NodeId: request.NodeId, + // if two pings come one after another (in 2 second interval) + // this means that one ping stuck in network and + // there is no need to write multiple downtimes + if math.Abs(request.Timestamp.Sub(lastPingTime).Seconds()) > 2 { + // check if there were downtime + if math.Abs(downtimeDuration.Seconds()) > (stats.PingIntervalInSeconds + pingOffset) { + downtime := models.Downtime{ + Start: lastPingTime, + End: request.Timestamp, + NodeId: request.NodeId, + } + err = c.repositories.DowntimeRepo.Save(&downtime) + if err != nil { + log.Errorf("Unable to save node downtime, error: %v", err) + } + + log.Debugf("Saved node %s downtime of: %f", request.NodeId, math.Abs(downtimeDuration.Seconds())) } - err = c.repositories.DowntimeRepo.Save(&downtime) - if err != nil { - log.Errorf("Unable to save node downtime, error: %v", err) - } - - log.Debugf("Saved node %s downtime of: %f", request.NodeId, math.Abs(downtimeDuration.Seconds())) } // save ping to database diff --git a/internal/controllers/ping_test.go b/internal/controllers/ping_test.go index 3ff40f78..c56d6a54 100644 --- a/internal/controllers/ping_test.go +++ b/internal/controllers/ping_test.go @@ -27,6 +27,8 @@ func TestApiController_PingHandler(t *testing.T) { downtimeSaveErr error calculateDowntimeErr error downtimeDuration time.Duration + requestTimestamp time.Time + lastPingTimestamp time.Time }{ { name: "Returns 200 if downtime calculation fails", @@ -37,26 +39,32 @@ func TestApiController_PingHandler(t *testing.T) { downtimeSaveCallCount: 0, downtimeDuration: time.Duration(0), calculateDowntimeErr: fmt.Errorf("ERROR"), + requestTimestamp: time.Now(), + lastPingTimestamp: time.Now().Add(-5 * time.Second), }, { - name: "Returns 200 if donwtime save fails", + name: "Returns 200 if downtime save fails", statusCode: 200, pingSaveCallCount: 1, pingSaveErr: nil, downtimeSaveErr: fmt.Errorf("ERROR"), downtimeSaveCallCount: 1, - downtimeDuration: time.Duration(time.Second * 11), + downtimeDuration: time.Duration(time.Second * 19), calculateDowntimeErr: nil, + requestTimestamp: time.Now(), + lastPingTimestamp: time.Now().Add(-19 * time.Second), }, { - name: "Saves downtime if downtime duration more than 5 seconds", + name: "Saves downtime if downtime duration more than 18 seconds", statusCode: 200, pingSaveCallCount: 1, pingSaveErr: nil, downtimeSaveErr: nil, downtimeSaveCallCount: 1, - downtimeDuration: time.Duration(time.Second * 11), + downtimeDuration: time.Duration(time.Second * 19), calculateDowntimeErr: nil, + requestTimestamp: time.Now(), + lastPingTimestamp: time.Now().Add(-19 * time.Second), }, { name: "Returns 500 if saving ping fails", @@ -67,6 +75,8 @@ func TestApiController_PingHandler(t *testing.T) { downtimeSaveCallCount: 0, downtimeDuration: time.Duration(time.Second * 8), calculateDowntimeErr: nil, + requestTimestamp: time.Now(), + lastPingTimestamp: time.Now().Add(-5 * time.Second), }, { name: "Returns 200 and does not save downtime if downtime duration less than 5 + 5 seconds", @@ -77,13 +87,14 @@ func TestApiController_PingHandler(t *testing.T) { downtimeSaveCallCount: 0, downtimeDuration: time.Duration(time.Second * 8), calculateDowntimeErr: nil, + requestTimestamp: time.Now(), + lastPingTimestamp: time.Now().Add(-5 * time.Second), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - timestamp := time.Now() // create mock controller nodeRepoMock := mocks.NodeRepository{} recordRepoMock := mocks.RecordRepository{} @@ -92,10 +103,10 @@ func TestApiController_PingHandler(t *testing.T) { pingRepoMock := mocks.PingRepository{} pingRepoMock.On("Save", &models.Ping{ NodeId: "1", - Timestamp: timestamp, + Timestamp: test.requestTimestamp, }).Return(test.pingSaveErr) pingRepoMock.On("CalculateDowntime", mock.Anything, mock.Anything).Return( - time.Now(), test.downtimeDuration, test.calculateDowntimeErr) + test.lastPingTimestamp, test.downtimeDuration, test.calculateDowntimeErr) downtimeRepoMock := mocks.DowntimeRepository{} downtimeRepoMock.On("Save", mock.Anything).Return(test.downtimeSaveErr) @@ -113,7 +124,7 @@ func TestApiController_PingHandler(t *testing.T) { req, _ := http.NewRequest("POST", "/api/v1/node", bytes.NewReader(nil)) c := &auth.RequestContext{ NodeId: "1", - Timestamp: timestamp, + Timestamp: test.requestTimestamp, } ctx := context.WithValue(req.Context(), auth.RequestContextKey, c) req = req.WithContext(ctx) diff --git a/internal/schedule/checkactive/schedule.go b/internal/schedule/checkactive/schedule.go index c02eaa2b..308924ef 100644 --- a/internal/schedule/checkactive/schedule.go +++ b/internal/schedule/checkactive/schedule.go @@ -34,7 +34,7 @@ func StartScheduledTask(repos *repositories.Repos) { func scheduledTask(repos *repositories.Repos, actions actions.Actions) { log.Debug("Started task: check all active nodes") activeNodes := repos.NodeRepo.GetAllActiveNodes() - + var activeNodesAfterCheck []string for _, node := range *activeNodes { pingActive, err := active.CheckIfPingActive(node.ID, repos) @@ -60,6 +60,14 @@ func scheduledTask(repos *repositories.Repos, actions actions.Actions) { log.Errorf("Unable to remove node %s from active because of %v", node.ID, err) } log.Debugf("Node %s metrics lagging more than 10 blocks, removed node from active", node.ID) + } else { + activeNodesAfterCheck = append(activeNodesAfterCheck, node.ID) } } + + if len(activeNodesAfterCheck) == 0 { + log.Debug("There is no active nodes currently") + } else { + log.Debugf("Currently active nodes: %v", activeNodesAfterCheck) + } }