Skip to content

Commit

Permalink
Merge pull request #90 from crytic/feature/health-checker-abstraction
Browse files Browse the repository at this point in the history
Improve health checker abstraction
  • Loading branch information
bsamuels453 authored Mar 29, 2024
2 parents 48d546e + 48532b3 commit a335aa6
Show file tree
Hide file tree
Showing 13 changed files with 291 additions and 185 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Go

on:
push:
branches: [ main ]
branches: [ main, develop ]
pull_request:
branches: [ main ]
branches: [ main, develop ]

jobs:

Expand Down
74 changes: 0 additions & 74 deletions pkg/artifacts/artifacts.go

This file was deleted.

16 changes: 16 additions & 0 deletions pkg/health/artifacts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package health

import (
"attacknet/cmd/pkg/health/ethereum"
"attacknet/cmd/pkg/health/types"
"github.com/kurtosis-tech/stacktrace"
)

func BuildArtifactSerializer(networkType string) (types.ArtifactSerializer, error) {
switch networkType {
case "ethereum":
return ethereum.CreateEthereumArtifactSerializer(), nil
default:
return nil, stacktrace.NewError("no networkType %s supported in artifact serializer", networkType)
}
}
72 changes: 22 additions & 50 deletions pkg/health/checker.go
Original file line number Diff line number Diff line change
@@ -1,91 +1,63 @@
package health

import (
chaos_mesh "attacknet/cmd/pkg/chaos-mesh"
chaosmesh "attacknet/cmd/pkg/chaos-mesh"
"attacknet/cmd/pkg/health/ethereum"
"attacknet/cmd/pkg/health/types"
"attacknet/cmd/pkg/kubernetes"
confTypes "attacknet/cmd/pkg/types"
"context"
"github.com/kurtosis-tech/stacktrace"
log "github.com/sirupsen/logrus"
"time"
)

type CheckOrchestrator struct {
checkerImpl types.GenericNetworkChecker
checkerImpl types.HealthChecker
gracePeriod *time.Duration
}

func BuildHealthChecker(kubeClient *kubernetes.KubeClient, podsUnderTest []*chaos_mesh.PodUnderTest, healthCheckConfig confTypes.HealthCheckConfig) (*CheckOrchestrator, error) {
networkType := "ethereum"
var checkerImpl types.GenericNetworkChecker
func BuildHealthChecker(
networkType string,
kubeClient *kubernetes.KubeClient,
podsUnderTest []*chaosmesh.PodUnderTest,
gracePeriod *time.Duration) (*CheckOrchestrator, error) {

switch networkType {
case "ethereum":
a := ethereum.CreateEthNetworkChecker(kubeClient, podsUnderTest)
checkerImpl = a
return &CheckOrchestrator{
checkerImpl: ethereum.CreateEthNetworkChecker(kubeClient, podsUnderTest),
gracePeriod: gracePeriod,
}, nil
default:
log.Errorf("unknown network type: %s", networkType)
return nil, stacktrace.NewError("unknown network type: %s", networkType)
}
return &CheckOrchestrator{checkerImpl: checkerImpl, gracePeriod: healthCheckConfig.GracePeriod}, nil
}

func (hc *CheckOrchestrator) RunChecks(ctx context.Context) (*types.HealthCheckResult, error) {
func (co *CheckOrchestrator) RunChecksUntilPassOrGrace(ctx context.Context) (bool, interface{}, error) {
start := time.Now()
latestAllowable := start.Add(*hc.gracePeriod)
log.Infof("Allowing up to %.0f seconds for health checks to pass on all nodes", hc.gracePeriod.Seconds())
latestAllowable := start.Add(*co.gracePeriod)
log.Infof("Allowing up to %.0f seconds for health checks to pass on all nodes", co.gracePeriod.Seconds())

lastHealthCheckResult := &types.HealthCheckResult{}
for {
results, err := hc.checkerImpl.RunAllChecks(ctx, lastHealthCheckResult)
pass, err := co.checkerImpl.RunChecks(ctx)
if err != nil {
return nil, err
return false, nil, err
}
lastHealthCheckResult = results
if AllChecksPassed(results) {

if pass {
timeToPass := time.Since(start).Seconds()
pctGraceUsed := timeToPass / hc.gracePeriod.Seconds() * 100
log.Infof("Checks passed in %.0f seconds. Consumed %.1f pct of the %.0f second grace period", timeToPass, pctGraceUsed, hc.gracePeriod.Seconds())
return results, nil
pctGraceUsed := timeToPass / co.gracePeriod.Seconds() * 100
log.Infof("Checks passed in %.0f seconds. Consumed %.1f pct of the %.0f second grace period", timeToPass, pctGraceUsed, co.gracePeriod.Seconds())
return true, co.checkerImpl.PopFinalResult(), nil
}

if time.Now().After(latestAllowable) {
log.Warnf("Grace period elapsed and a health check is still failing. Time: %d", time.Now().Unix())
return results, nil
return false, co.checkerImpl.PopFinalResult(), nil
} else {
log.Warn("Health checks failed but still in grace period")
time.Sleep(1 * time.Second)
}
}
}

func AllChecksPassed(checks *types.HealthCheckResult) bool {
if len(checks.LatestElBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.LatestElBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.FinalizedElBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.FinalizedElBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.LatestClBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.LatestClBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.FinalizedClBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.FinalizedClBlockResult.FailingClientsReportedHash) > 0 {
return false
}

return true
}
52 changes: 52 additions & 0 deletions pkg/health/ethereum/artifacts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package ethereum

import (
chaosMesh "attacknet/cmd/pkg/chaos-mesh"
healthTypes "attacknet/cmd/pkg/health/types"
"attacknet/cmd/pkg/types"
"github.com/kurtosis-tech/stacktrace"
"gopkg.in/yaml.v3"
)

func CreateEthereumArtifactSerializer() healthTypes.ArtifactSerializer {
return &artifactSerializer{
artifacts: []*testArtifact{},
}
}

func (e *artifactSerializer) AddHealthCheckResult(
result interface{},
podsUnderTest []*chaosMesh.PodUnderTest,
test types.SuiteTest,
) error {
castResult, ok := result.(*healthCheckResult)
if !ok {
return stacktrace.NewError("cannot cast health check result %s to healthCheckResult", result)
}

var containersTargeted []string
for _, p := range podsUnderTest {
containersTargeted = append(containersTargeted, p.GetName())
}

testPassed := castResult.AllChecksPassed()

artifact := &testArtifact{
test.TestName,
containersTargeted,
testPassed,
castResult,
}

e.artifacts = append(e.artifacts, artifact)
return nil
}

func (e *artifactSerializer) SerializeArtifacts() ([]byte, error) {
bs, err := yaml.Marshal(e.artifacts)
if err != nil {
return nil, stacktrace.Propagate(err, "could not marshal test artifacts")
}

return bs, nil
}
4 changes: 2 additions & 2 deletions pkg/health/ethereum/beacon_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type BeaconClientRpc struct {
client eth2client.BeaconBlockHeadersProvider
}

func (e *EthNetworkChecker) getBeaconClientConsensus(ctx context.Context, clients []*BeaconClientRpc, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
func (e *healthChecker) getBeaconClientConsensus(ctx context.Context, clients []*BeaconClientRpc, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
forkChoice, err := getBeaconNetworkConsensus(ctx, clients, blockType)
if err != nil {
return nil, err
Expand Down Expand Up @@ -58,7 +58,7 @@ func (e *EthNetworkChecker) getBeaconClientConsensus(ctx context.Context, client
}, nil
}

func (e *EthNetworkChecker) dialToBeaconClients(ctx context.Context) ([]*BeaconClientRpc, error) {
func (e *healthChecker) dialToBeaconClients(ctx context.Context) ([]*BeaconClientRpc, error) {
labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
labelValue := "beacon"
podsToHealthCheck, err := getPodsToHealthCheck(
Expand Down
4 changes: 2 additions & 2 deletions pkg/health/ethereum/execution_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type ExecClientRPC struct {
client *ethclient.Client
}

func (e *EthNetworkChecker) getExecBlockConsensus(ctx context.Context, clients []*ExecClientRPC, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
func (e *healthChecker) getExecBlockConsensus(ctx context.Context, clients []*ExecClientRPC, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
forkChoice, err := getExecNetworkConsensus(ctx, clients, blockType)
if err != nil {
return nil, err
Expand Down Expand Up @@ -53,7 +53,7 @@ func (e *EthNetworkChecker) getExecBlockConsensus(ctx context.Context, clients [
}, nil
}

func (e *EthNetworkChecker) dialToExecutionClients(ctx context.Context) ([]*ExecClientRPC, error) {
func (e *healthChecker) dialToExecutionClients(ctx context.Context) ([]*ExecClientRPC, error) {
labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
labelValue := "execution"
podsToHealthCheck, err := getPodsToHealthCheck(
Expand Down
Loading

0 comments on commit a335aa6

Please sign in to comment.