Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve health checker abstraction #90

Merged
merged 3 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Go

on:
push:
branches: [ main ]
branches: [ main, develop ]
pull_request:
branches: [ main ]
branches: [ main, develop ]

jobs:

Expand Down
74 changes: 0 additions & 74 deletions pkg/artifacts/artifacts.go

This file was deleted.

16 changes: 16 additions & 0 deletions pkg/health/artifacts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package health

import (
"attacknet/cmd/pkg/health/ethereum"
"attacknet/cmd/pkg/health/types"
"github.com/kurtosis-tech/stacktrace"
)

func BuildArtifactSerializer(networkType string) (types.ArtifactSerializer, error) {
switch networkType {
case "ethereum":
return ethereum.CreateEthereumArtifactSerializer(), nil
default:
return nil, stacktrace.NewError("no networkType %s supported in artifact serializer", networkType)
}
}
72 changes: 22 additions & 50 deletions pkg/health/checker.go
Original file line number Diff line number Diff line change
@@ -1,91 +1,63 @@
package health

import (
chaos_mesh "attacknet/cmd/pkg/chaos-mesh"
chaosmesh "attacknet/cmd/pkg/chaos-mesh"
"attacknet/cmd/pkg/health/ethereum"
"attacknet/cmd/pkg/health/types"
"attacknet/cmd/pkg/kubernetes"
confTypes "attacknet/cmd/pkg/types"
"context"
"github.com/kurtosis-tech/stacktrace"
log "github.com/sirupsen/logrus"
"time"
)

type CheckOrchestrator struct {
checkerImpl types.GenericNetworkChecker
checkerImpl types.HealthChecker
gracePeriod *time.Duration
}

func BuildHealthChecker(kubeClient *kubernetes.KubeClient, podsUnderTest []*chaos_mesh.PodUnderTest, healthCheckConfig confTypes.HealthCheckConfig) (*CheckOrchestrator, error) {
networkType := "ethereum"
var checkerImpl types.GenericNetworkChecker
func BuildHealthChecker(
networkType string,
kubeClient *kubernetes.KubeClient,
podsUnderTest []*chaosmesh.PodUnderTest,
gracePeriod *time.Duration) (*CheckOrchestrator, error) {

switch networkType {
case "ethereum":
a := ethereum.CreateEthNetworkChecker(kubeClient, podsUnderTest)
checkerImpl = a
return &CheckOrchestrator{
checkerImpl: ethereum.CreateEthNetworkChecker(kubeClient, podsUnderTest),
gracePeriod: gracePeriod,
}, nil
default:
log.Errorf("unknown network type: %s", networkType)
return nil, stacktrace.NewError("unknown network type: %s", networkType)
}
return &CheckOrchestrator{checkerImpl: checkerImpl, gracePeriod: healthCheckConfig.GracePeriod}, nil
}

func (hc *CheckOrchestrator) RunChecks(ctx context.Context) (*types.HealthCheckResult, error) {
func (co *CheckOrchestrator) RunChecksUntilPassOrGrace(ctx context.Context) (bool, interface{}, error) {
start := time.Now()
latestAllowable := start.Add(*hc.gracePeriod)
log.Infof("Allowing up to %.0f seconds for health checks to pass on all nodes", hc.gracePeriod.Seconds())
latestAllowable := start.Add(*co.gracePeriod)
log.Infof("Allowing up to %.0f seconds for health checks to pass on all nodes", co.gracePeriod.Seconds())

lastHealthCheckResult := &types.HealthCheckResult{}
for {
results, err := hc.checkerImpl.RunAllChecks(ctx, lastHealthCheckResult)
pass, err := co.checkerImpl.RunChecks(ctx)
if err != nil {
return nil, err
return false, nil, err
}
lastHealthCheckResult = results
if AllChecksPassed(results) {

if pass {
timeToPass := time.Since(start).Seconds()
pctGraceUsed := timeToPass / hc.gracePeriod.Seconds() * 100
log.Infof("Checks passed in %.0f seconds. Consumed %.1f pct of the %.0f second grace period", timeToPass, pctGraceUsed, hc.gracePeriod.Seconds())
return results, nil
pctGraceUsed := timeToPass / co.gracePeriod.Seconds() * 100
log.Infof("Checks passed in %.0f seconds. Consumed %.1f pct of the %.0f second grace period", timeToPass, pctGraceUsed, co.gracePeriod.Seconds())
return true, co.checkerImpl.PopFinalResult(), nil
}

if time.Now().After(latestAllowable) {
log.Warnf("Grace period elapsed and a health check is still failing. Time: %d", time.Now().Unix())
return results, nil
return false, co.checkerImpl.PopFinalResult(), nil
} else {
log.Warn("Health checks failed but still in grace period")
time.Sleep(1 * time.Second)
}
}
}

func AllChecksPassed(checks *types.HealthCheckResult) bool {
if len(checks.LatestElBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.LatestElBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.FinalizedElBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.FinalizedElBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.LatestClBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.LatestClBlockResult.FailingClientsReportedHash) > 0 {
return false
}
if len(checks.FinalizedClBlockResult.FailingClientsReportedBlock) > 0 {
return false
}
if len(checks.FinalizedClBlockResult.FailingClientsReportedHash) > 0 {
return false
}

return true
}
52 changes: 52 additions & 0 deletions pkg/health/ethereum/artifacts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package ethereum

import (
chaosMesh "attacknet/cmd/pkg/chaos-mesh"
healthTypes "attacknet/cmd/pkg/health/types"
"attacknet/cmd/pkg/types"
"github.com/kurtosis-tech/stacktrace"
"gopkg.in/yaml.v3"
)

func CreateEthereumArtifactSerializer() healthTypes.ArtifactSerializer {
return &artifactSerializer{
artifacts: []*testArtifact{},
}
}

func (e *artifactSerializer) AddHealthCheckResult(
result interface{},
podsUnderTest []*chaosMesh.PodUnderTest,
test types.SuiteTest,
) error {
castResult, ok := result.(*healthCheckResult)
if !ok {
return stacktrace.NewError("cannot cast health check result %s to healthCheckResult", result)
}

var containersTargeted []string
for _, p := range podsUnderTest {
containersTargeted = append(containersTargeted, p.GetName())
}

testPassed := castResult.AllChecksPassed()

artifact := &testArtifact{
test.TestName,
containersTargeted,
testPassed,
castResult,
}

e.artifacts = append(e.artifacts, artifact)
return nil
}

func (e *artifactSerializer) SerializeArtifacts() ([]byte, error) {
bs, err := yaml.Marshal(e.artifacts)
if err != nil {
return nil, stacktrace.Propagate(err, "could not marshal test artifacts")
}

return bs, nil
}
4 changes: 2 additions & 2 deletions pkg/health/ethereum/beacon_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type BeaconClientRpc struct {
client eth2client.BeaconBlockHeadersProvider
}

func (e *EthNetworkChecker) getBeaconClientConsensus(ctx context.Context, clients []*BeaconClientRpc, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
func (e *healthChecker) getBeaconClientConsensus(ctx context.Context, clients []*BeaconClientRpc, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
forkChoice, err := getBeaconNetworkConsensus(ctx, clients, blockType)
if err != nil {
return nil, err
Expand Down Expand Up @@ -58,7 +58,7 @@ func (e *EthNetworkChecker) getBeaconClientConsensus(ctx context.Context, client
}, nil
}

func (e *EthNetworkChecker) dialToBeaconClients(ctx context.Context) ([]*BeaconClientRpc, error) {
func (e *healthChecker) dialToBeaconClients(ctx context.Context) ([]*BeaconClientRpc, error) {
labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
labelValue := "beacon"
podsToHealthCheck, err := getPodsToHealthCheck(
Expand Down
4 changes: 2 additions & 2 deletions pkg/health/ethereum/execution_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type ExecClientRPC struct {
client *ethclient.Client
}

func (e *EthNetworkChecker) getExecBlockConsensus(ctx context.Context, clients []*ExecClientRPC, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
func (e *healthChecker) getExecBlockConsensus(ctx context.Context, clients []*ExecClientRPC, blockType string, maxAttempts int) (*types.BlockConsensusTestResult, error) {
forkChoice, err := getExecNetworkConsensus(ctx, clients, blockType)
if err != nil {
return nil, err
Expand Down Expand Up @@ -53,7 +53,7 @@ func (e *EthNetworkChecker) getExecBlockConsensus(ctx context.Context, clients [
}, nil
}

func (e *EthNetworkChecker) dialToExecutionClients(ctx context.Context) ([]*ExecClientRPC, error) {
func (e *healthChecker) dialToExecutionClients(ctx context.Context) ([]*ExecClientRPC, error) {
labelKey := "kurtosistech.com.custom/ethereum-package.client-type"
labelValue := "execution"
podsToHealthCheck, err := getPodsToHealthCheck(
Expand Down
Loading
Loading