Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add status conditions for ebpf SDKs #1218

Merged
merged 5 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cli/cmd/resources/odiglet.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ func NewOdigletClusterRole(psp bool) *rbacv1.ClusterRole {
"instrumentedapplications",
},
},
{
Verbs: []string{
"get",
"patch",
"update",
},
APIGroups: []string{
"odigos.io",
},
Resources: []string{
"instrumentedapplications/status",
},
},
{
Verbs: []string{
"get",
Expand Down
18 changes: 10 additions & 8 deletions odiglet/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func main() {
Expand All @@ -41,11 +42,7 @@ func main() {
os.Exit(-1)
}

ebpfDirectors, err := initEbpf()
if err != nil {
log.Logger.Error(err, "Failed to init eBPF director")
os.Exit(-1)
}
ctx := signals.SetupSignalHandler()

go startDeviceManager(clientset)

Expand All @@ -55,13 +52,18 @@ func main() {
os.Exit(-1)
}

ebpfDirectors, err := initEbpf(ctx, mgr.GetClient())
if err != nil {
log.Logger.Error(err, "Failed to init eBPF director")
os.Exit(-1)
}

err = kube.SetupWithManager(mgr, ebpfDirectors)
if err != nil {
log.Logger.Error(err, "Failed to setup controller-runtime manager")
os.Exit(-1)
}

ctx := signals.SetupSignalHandler()
err = kube.StartManager(ctx, mgr)
if err != nil {
log.Logger.Error(err, "Failed to start controller-runtime manager")
Expand Down Expand Up @@ -107,9 +109,9 @@ func startDeviceManager(clientset *kubernetes.Clientset) {
manager.Run()
}

func initEbpf() (ebpf.DirectorsMap, error) {
func initEbpf(ctx context.Context, client client.Client) (ebpf.DirectorsMap, error) {
goInstrumentationFactory := ebpf.NewGoInstrumentationFactory()
goDirector := ebpf.NewEbpfDirector(common.GoProgrammingLanguage, goInstrumentationFactory)
goDirector := ebpf.NewEbpfDirector(ctx, client, common.GoProgrammingLanguage, goInstrumentationFactory)
goDirectorKey := ebpf.DirectorKey{
Language: common.GoProgrammingLanguage,
OtelSdk: common.OtelSdkEbpfCommunity,
Expand Down
4 changes: 2 additions & 2 deletions odiglet/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/odigos-io/odigos/procdiscovery v0.0.0
github.com/odigos-io/opentelemetry-zap-bridge v0.0.5
github.com/otiai10/copy v1.14.0
go.opentelemetry.io/auto v0.12.0-alpha.0.20240510155300-a8d8a98172ce
go.opentelemetry.io/auto v0.12.0-alpha.0.20240523062926-f9ad92d875aa
go.opentelemetry.io/otel v1.26.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.26.0
go.uber.org/zap v1.27.0
Expand Down Expand Up @@ -56,7 +56,7 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
Expand Down
8 changes: 5 additions & 3 deletions odiglet/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,9 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
Expand Down Expand Up @@ -318,8 +319,8 @@ github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5t
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opentelemetry.io/auto v0.12.0-alpha.0.20240510155300-a8d8a98172ce h1:JngoTZelK6ssW1iA6FwJISuEHj81wwLLb2QjGlktvJs=
go.opentelemetry.io/auto v0.12.0-alpha.0.20240510155300-a8d8a98172ce/go.mod h1:l97HvRF4QQdOvb3LobVnUlmGb5eju7yfvccYWuJj84k=
go.opentelemetry.io/auto v0.12.0-alpha.0.20240523062926-f9ad92d875aa h1:EOH/Hcqgu2p/33uTHIu7RYTh4hTMdTTzfLEnwpOSqcs=
go.opentelemetry.io/auto v0.12.0-alpha.0.20240523062926-f9ad92d875aa/go.mod h1:l97HvRF4QQdOvb3LobVnUlmGb5eju7yfvccYWuJj84k=
go.opentelemetry.io/contrib/exporters/autoexport v0.51.0 h1:imlL5MBzKu+NWhnJM62bHws6m+6LN8HMT3V9PcSTbaY=
go.opentelemetry.io/contrib/exporters/autoexport v0.51.0/go.mod h1:gn1wFA1uVEKIXrM3DC7SN9ee83oJ0yALY/HbUfqMszo=
go.opentelemetry.io/otel v1.26.0 h1:LQwgL5s/1W7YiiRwxf03QGnWLb2HW4pLiAhaA5cZXBs=
Expand Down Expand Up @@ -466,6 +467,7 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
Expand Down
106 changes: 100 additions & 6 deletions odiglet/pkg/ebpf/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@ import (
"context"
"sync"

_ "github.com/odigos-io/odigos/api/odigos/v1alpha1"
"github.com/odigos-io/odigos/common"
"github.com/odigos-io/odigos/k8sutils/pkg/conditions"
runtime_details "github.com/odigos-io/odigos/odiglet/pkg/kube/runtime_details"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/odigos-io/odigos/odiglet/pkg/log"
"k8s.io/apimachinery/pkg/types"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// This interface should be implemented by all ebpf sdks
Expand All @@ -18,7 +23,7 @@ type OtelEbpfSdk interface {

// users can use different eBPF otel SDKs by returning them from this function
type InstrumentationFactory[T OtelEbpfSdk] interface {
CreateEbpfInstrumentation(ctx context.Context, pid int, serviceName string, podWorkload *common.PodWorkload, containerName string, podName string) (T, error)
CreateEbpfInstrumentation(ctx context.Context, pid int, serviceName string, podWorkload *common.PodWorkload, containerName string, podName string, loadedIndicator chan struct{}) (T, error)
RonFed marked this conversation as resolved.
Show resolved Hide resolved
}

// Director manages the instrumentation for a specific SDK in a specific language
Expand All @@ -34,6 +39,24 @@ type podDetails struct {
Pids []int
}

type InstrumentationStatusReason string

const (
FailedToLoad InstrumentationStatusReason = "FailedToLoad"
FailedToInitialize InstrumentationStatusReason = "FailedToInitialize"
LoadedSuccessfully InstrumentationStatusReason = "LoadedSuccessfully"
)

const ebpfSDKConditionRunning = "ebpfSDKRunning"

type instrumentationStatus struct {
Workload common.PodWorkload
PodName types.NamespacedName
Healthy bool
Message string
Reason InstrumentationStatusReason
}

type EbpfDirector[T OtelEbpfSdk] struct {
mux sync.Mutex

Expand All @@ -56,6 +79,16 @@ type EbpfDirector[T OtelEbpfSdk] struct {

// this map can be used when we only have the workload, and need to find the pods to derive pids.
workloadToPods map[common.PodWorkload]map[types.NamespacedName]struct{}

// this channel is used to send the status of the instrumentation SDK after it is created and ran.
// the status is used to update the status conditions for the instrumentedApplication CR.
// The status can be either a failure to initialize the SDK, or a failure to load the eBPF probes or a success which
// means the eBPF probes were loaded successfully.
// TODO: this channel should probably be buffered, so we don't block the instrumentation goroutine?
instrumentationStatusChan chan instrumentationStatus

// k8s client used to update status conditions for the instrumentedApplication CR
client client.Client
}

type DirectorKey struct {
Expand All @@ -65,14 +98,61 @@ type DirectorKey struct {

type DirectorsMap map[DirectorKey]Director

func NewEbpfDirector[T OtelEbpfSdk](language common.ProgrammingLanguage, instrumentationFactory InstrumentationFactory[T]) *EbpfDirector[T] {
return &EbpfDirector[T]{
func NewEbpfDirector[T OtelEbpfSdk](ctx context.Context, client client.Client, language common.ProgrammingLanguage, instrumentationFactory InstrumentationFactory[T]) *EbpfDirector[T] {
director := &EbpfDirector[T]{
language: language,
instrumentationFactory: instrumentationFactory,
pidsToInstrumentation: make(map[int]T),
pidsAttemptedInstrumentation: make(map[int]struct{}),
podsToDetails: make(map[types.NamespacedName]podDetails),
workloadToPods: make(map[common.PodWorkload]map[types.NamespacedName]struct{}),
instrumentationStatusChan: make(chan instrumentationStatus),
client: client,
}

go director.observeInstrumentations(ctx)

return director
}

func (d *EbpfDirector[T]) observeInstrumentations(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
case status, more := <-d.instrumentationStatusChan:
if !more {
return
}

if d.client == nil {
log.Logger.V(0).Info("Client is nil, cannot update status conditions", "workload", status.Workload)
continue
}

runtimeDetails, err := runtime_details.GetRuntimeDetails(ctx, d.client, &status.Workload)
if err != nil {
log.Logger.Error(err, "error getting runtime details", "workload", status.Workload)
continue
}

condStatus := metav1.ConditionTrue
if !status.Healthy {
condStatus = metav1.ConditionFalse
}

if !status.Healthy {
log.Logger.Error(nil, "eBPF instrumentation unhealthy", "reason", status.Reason, "message", status.Message, "workload", status.Workload)
}

// write the status to the CR. Since we are writing the status to the instrumentedApplication CR,
// we might overwrite the status of another pod which corresponds to the same workload.
// this can cause the status to not represent the full state of the workload, in case some of the pods are healthy and some are not for the same workload.
err = conditions.UpdateStatusConditions(ctx, d.client, runtimeDetails, &runtimeDetails.Status.Conditions, condStatus, ebpfSDKConditionRunning, string(status.Reason), status.Message)
if err != nil {
log.Logger.Error(err, "error updating status conditions", "workload", status.Workload)
}
}
}
}

Expand Down Expand Up @@ -103,10 +183,23 @@ func (d *EbpfDirector[T]) Instrument(ctx context.Context, pid int, pod types.Nam
}
d.workloadToPods[*podWorkload][pod] = struct{}{}

loadedIndicator := make(chan struct{})
loadedCtx, loadedObserverCancel := context.WithCancel(ctx)
go func() {
select {
case <-loadedCtx.Done():
return
case <-loadedIndicator:
d.instrumentationStatusChan <- instrumentationStatus{Healthy: true, Message: "Successfully loaded eBPF probes to pod: " + pod.String(), Workload: *podWorkload, PodName: pod, Reason: LoadedSuccessfully}
}
}()

go func() {
inst, err := d.instrumentationFactory.CreateEbpfInstrumentation(ctx, pid, appName, podWorkload, containerName, pod.Name)
// once the instrumentation finished running (either by error or successful exit), we can cancel the 'loaded' observer for this instrumentation
defer loadedObserverCancel()
inst, err := d.instrumentationFactory.CreateEbpfInstrumentation(ctx, pid, appName, podWorkload, containerName, pod.Name, loadedIndicator)
if err != nil {
log.Logger.Error(err, "instrumentation setup failed", "workload", podWorkload, "pod", pod)
d.instrumentationStatusChan <- instrumentationStatus{Healthy: false, Message: err.Error(), Workload: *podWorkload, Reason: FailedToInitialize, PodName: pod}
return
}

Expand All @@ -129,7 +222,7 @@ func (d *EbpfDirector[T]) Instrument(ctx context.Context, pid int, pod types.Nam
log.Logger.V(0).Info("Running ebpf instrumentation", "workload", podWorkload, "pod", pod, "language", d.language)

if err := inst.Run(context.Background()); err != nil {
log.Logger.Error(err, "instrumentation crashed after running")
d.instrumentationStatusChan <- instrumentationStatus{Healthy: false, Message: err.Error(), Workload: *podWorkload, Reason: FailedToLoad, PodName: pod}
}
}()

Expand Down Expand Up @@ -180,6 +273,7 @@ func (d *EbpfDirector[T]) Cleanup(pod types.NamespacedName) {

func (d *EbpfDirector[T]) Shutdown() {
log.Logger.V(0).Info("Shutting down instrumentation director")
close(d.instrumentationStatusChan)
for details := range d.podsToDetails {
d.Cleanup(details)
}
Expand Down
3 changes: 2 additions & 1 deletion odiglet/pkg/ebpf/go.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func NewGoInstrumentationFactory() InstrumentationFactory[*GoOtelEbpfSdk] {
return &GoInstrumentationFactory{}
}

func (g *GoInstrumentationFactory) CreateEbpfInstrumentation(ctx context.Context, pid int, serviceName string, podWorkload *common.PodWorkload, containerName string, podName string) (*GoOtelEbpfSdk, error) {
func (g *GoInstrumentationFactory) CreateEbpfInstrumentation(ctx context.Context, pid int, serviceName string, podWorkload *common.PodWorkload, containerName string, podName string, loadedIndicator chan struct{}) (*GoOtelEbpfSdk, error) {
defaultExporter, err := otlptracegrpc.New(
ctx,
otlptracegrpc.WithInsecure(),
Expand All @@ -42,6 +42,7 @@ func (g *GoInstrumentationFactory) CreateEbpfInstrumentation(ctx context.Context
auto.WithServiceName(serviceName),
auto.WithTraceExporter(defaultExporter),
auto.WithGlobal(),
auto.WithLoadedIndicator(loadedIndicator),
)
if err != nil {
log.Logger.Error(err, "instrumentation setup failed")
Expand Down
3 changes: 2 additions & 1 deletion odiglet/pkg/kube/instrumentation_ebpf/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/odigos-io/odigos/common"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
runtime_details "github.com/odigos-io/odigos/odiglet/pkg/kube/runtime_details"
kubeutils "github.com/odigos-io/odigos/odiglet/pkg/kube/utils"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -72,7 +73,7 @@ func (p *PodsReconciler) Reconcile(ctx context.Context, request ctrl.Request) (c
}

func (p *PodsReconciler) instrumentWithEbpf(ctx context.Context, pod *corev1.Pod, podWorkload *common.PodWorkload) (error, bool) {
runtimeDetails, err := getRuntimeDetails(ctx, p.Client, podWorkload)
runtimeDetails, err := runtime_details.GetRuntimeDetails(ctx, p.Client, podWorkload)
if err != nil {
if apierrors.IsNotFound(err) {
// Probably shutdown in progress, cleanup will be done as soon as the pod object is deleted
Expand Down
17 changes: 0 additions & 17 deletions odiglet/pkg/kube/instrumentation_ebpf/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@ import (
odigosv1 "github.com/odigos-io/odigos/api/odigos/v1alpha1"
odgiosK8s "github.com/odigos-io/odigos/k8sutils/pkg/container"
"github.com/odigos-io/odigos/common"
"github.com/odigos-io/odigos/k8sutils/pkg/workload"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
"github.com/odigos-io/odigos/odiglet/pkg/process"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

Expand Down Expand Up @@ -78,18 +76,3 @@ func instrumentPodWithEbpf(ctx context.Context, pod *corev1.Pod, directors ebpf.
}
return nil, instrumentedEbpf
}

func getRuntimeDetails(ctx context.Context, kubeClient client.Client, podWorkload *common.PodWorkload) (*odigosv1.InstrumentedApplication, error) {
instrumentedApplicationName := workload.GetRuntimeObjectName(podWorkload.Name, podWorkload.Kind)

var runtimeDetails odigosv1.InstrumentedApplication
err := kubeClient.Get(ctx, client.ObjectKey{
Namespace: podWorkload.Namespace,
Name: instrumentedApplicationName,
}, &runtimeDetails)
if err != nil {
return nil, err
}

return &runtimeDetails, nil
}
15 changes: 15 additions & 0 deletions odiglet/pkg/kube/runtime_details/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,18 @@ func persistRuntimeResults(ctx context.Context, results []odigosv1.RuntimeDetail
}
return nil
}

func GetRuntimeDetails(ctx context.Context, kubeClient client.Client, podWorkload *common.PodWorkload) (*odigosv1.InstrumentedApplication, error) {
instrumentedApplicationName := workload.GetRuntimeObjectName(podWorkload.Name, podWorkload.Kind)

var runtimeDetails odigosv1.InstrumentedApplication
err := kubeClient.Get(ctx, client.ObjectKey{
Namespace: podWorkload.Namespace,
Name: instrumentedApplicationName,
}, &runtimeDetails)
if err != nil {
return nil, err
}

return &runtimeDetails, nil
}
Loading