Skip to content

Commit

Permalink
Calculate ratio by node class (#46)
Browse files Browse the repository at this point in the history
  • Loading branch information
bastjan authored Nov 25, 2022
1 parent 8baa405 commit b14e457
Show file tree
Hide file tree
Showing 14 changed files with 650 additions and 141 deletions.
40 changes: 32 additions & 8 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"errors"
"os"

"github.com/appuio/appuio-cloud-agent/limits"
"go.uber.org/multierr"
"k8s.io/apimachinery/pkg/api/resource"
"sigs.k8s.io/yaml"
)

Expand All @@ -13,7 +15,12 @@ type Config struct {
OrganizationLabel string

// MemoryPerCoreLimit is the fair use limit of memory usage per CPU core
MemoryPerCoreLimit string
// it is deprecated and will be removed in a future version.
// Use MemoryPerCoreLimits: {Limit: "XGi"} instead.
MemoryPerCoreLimit *resource.Quantity
// MemoryPerCoreLimits is the fair use limit of memory usage per CPU core
// It is possible to select limits by node selector labels
MemoryPerCoreLimits limits.Limits

// Privileged* is a list of the given type allowed to bypass restrictions.
// Wildcards are supported (e.g. "system:serviceaccount:default:*" or "cluster-*-operator").
Expand All @@ -35,13 +42,18 @@ type Config struct {
DefaultOrganizationClusterRoles map[string]string
}

func ConfigFromFile(path string) (Config, error) {
func ConfigFromFile(path string) (c Config, warn []string, err error) {
raw, err := os.ReadFile(path)
if err != nil {
return Config{}, err
return Config{}, nil, err
}
var c Config
return c, yaml.Unmarshal(raw, &c, yaml.DisallowUnknownFields)
err = yaml.Unmarshal(raw, &c, yaml.DisallowUnknownFields)
if err != nil {
return Config{}, nil, err
}

c, warnings := migrateConfig(c)
return c, warnings, nil
}

func (c Config) Validate() error {
Expand All @@ -50,9 +62,21 @@ func (c Config) Validate() error {
if c.OrganizationLabel == "" {
errs = append(errs, errors.New("OrganizationLabel must not be empty"))
}
if c.MemoryPerCoreLimit == "" {
errs = append(errs, errors.New("MemoryPerCoreLimit must not be empty"))
}

return multierr.Combine(errs...)
}

func migrateConfig(c Config) (Config, []string) {
warnings := make([]string, 0)

if c.MemoryPerCoreLimit != nil && c.MemoryPerCoreLimits == nil {
warnings = append(warnings, "MemoryPerCoreLimit is deprecated and will be removed in a future version. Use MemoryPerCoreLimits: {Limit: \"XGi\"} instead.")
c.MemoryPerCoreLimits = limits.Limits{
{
Limit: c.MemoryPerCoreLimit,
},
}
}

return c, warnings
}
12 changes: 10 additions & 2 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
---
# The label used to mark namespaces to belong to an organization
OrganizationLabel: appuio.io/organization
# The fair use limit of memory usage per CPU core
MemoryPerCoreLimit: 4Gi

# The fair use limit of memory usage per CPU core.
# It is possible to select limits by node selector labels.
MemoryPerCoreLimits:
- Limit: 4Gi
NodeSelector:
matchExpressions:
- key: class
operator: DoesNotExist

# Privileged* is a list of the given type allowed to bypass restrictions.
# Wildcards are supported (e.g. "system:serviceaccount:default:*" or "cluster-*-operator").
# ClusterRoles are only ever matched if they are bound through a ClusterRoleBinding,
Expand Down
84 changes: 84 additions & 0 deletions config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package main

import (
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const limitsYAML = `
MemoryPerCoreLimits:
- NodeSelector:
matchExpressions:
- key: class
operator: DoesNotExist
Limit: 2Gi
`
const limitYAML = `
MemoryPerCoreLimit: 1Gi
`

func Test_Config_MemoryPerCoreLimits(t *testing.T) {
testCases := []struct {
desc string
yaml string
warnings int
limit string
nodeSelector metav1.LabelSelector
}{
{
desc: "MemoryPerCoreLimits",
yaml: limitsYAML,
warnings: 0,
limit: "2Gi",
nodeSelector: metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "class",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
},
{
desc: "MemoryPerCoreLimit_Migrate",
yaml: limitYAML,
warnings: 1,
limit: "1Gi",
nodeSelector: metav1.LabelSelector{},
},
{
desc: "MemoryPerCoreLimit_NoMigrateIfMemoryPerCoreLimitsIsSet",
yaml: strings.Join([]string{limitsYAML, limitYAML}, "\n"),
warnings: 0,
limit: "2Gi",
nodeSelector: metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "class",
Operator: metav1.LabelSelectorOpDoesNotExist,
},
},
},
},
}
for _, tC := range testCases {
t.Run(tC.desc, func(t *testing.T) {
tmp := t.TempDir()
configPath := filepath.Join(tmp, "config.yaml")
require.NoError(t, os.WriteFile(configPath, []byte(tC.yaml), 0o644))

c, warnings, err := ConfigFromFile(configPath)
assert.Len(t, warnings, tC.warnings)
require.NoError(t, err)

assert.Equal(t, tC.limit, c.MemoryPerCoreLimits[0].Limit.String())
assert.Equal(t, tC.nodeSelector, c.MemoryPerCoreLimits[0].NodeSelector)
})
}
}
45 changes: 30 additions & 15 deletions controllers/ratio_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ package controllers
import (
"context"
"errors"
"fmt"

"github.com/appuio/appuio-cloud-agent/limits"
"github.com/appuio/appuio-cloud-agent/ratio"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record"

"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -21,12 +24,12 @@ type RatioReconciler struct {
Recorder record.EventRecorder
Scheme *runtime.Scheme

Ratio ratioFetcher
RatioLimit *resource.Quantity
Ratio ratioFetcher
RatioLimits limits.Limits
}

type ratioFetcher interface {
FetchRatio(ctx context.Context, ns string) (*ratio.Ratio, error)
FetchRatios(ctx context.Context, ns string) (map[string]*ratio.Ratio, error)
}

//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
Expand All @@ -38,7 +41,7 @@ var eventReason = "TooMuchCPURequest"
func (r *RatioReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
l := log.FromContext(ctx).WithValues("namespace", req.Namespace, "name", req.Name)

nsRatio, err := r.Ratio.FetchRatio(ctx, req.Namespace)
nsRatios, err := r.Ratio.FetchRatios(ctx, req.Namespace)
if err != nil {
if errors.Is(err, ratio.ErrorDisabled) {
l.V(1).Info("namespace disabled")
Expand All @@ -48,21 +51,33 @@ func (r *RatioReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, err
}

if nsRatio.Below(*r.RatioLimit) {
l.Info("recording warn event: ratio too low")

if err := r.warnPod(ctx, req.Name, req.Namespace, nsRatio); err != nil {
l.Error(err, "failed to record event on pod")
for nodeSel, ratio := range nsRatios {
sel, err := labels.ConvertSelectorToLabelsMap(nodeSel)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to convert node selector '%s' to labels map: %w", nodeSel, err)
}
limit := r.RatioLimits.GetLimitForNodeSelector(sel)
if limit == nil {
l.Info("no limit found for node selector", "nodeSelector", nodeSel)
continue
}
if err := r.warnNamespace(ctx, req.Namespace, nsRatio); err != nil {
l.Error(err, "failed to record event on namespace")

if ratio.Below(*limit) {
l.Info("recording warn event: ratio too low")

if err := r.warnPod(ctx, req.Name, req.Namespace, ratio, nodeSel, limit); err != nil {
l.Error(err, "failed to record event on pod")
}
if err := r.warnNamespace(ctx, req.Namespace, ratio, nodeSel, limit); err != nil {
l.Error(err, "failed to record event on namespace")
}
}
}

return ctrl.Result{}, nil
}

func (r *RatioReconciler) warnPod(ctx context.Context, name, namespace string, nsRatio *ratio.Ratio) error {
func (r *RatioReconciler) warnPod(ctx context.Context, name, namespace string, nsRatio *ratio.Ratio, sel string, limit *resource.Quantity) error {
pod := corev1.Pod{}
err := r.Get(ctx, client.ObjectKey{
Namespace: namespace,
Expand All @@ -71,18 +86,18 @@ func (r *RatioReconciler) warnPod(ctx context.Context, name, namespace string, n
if err != nil {
return err
}
r.Recorder.Event(&pod, "Warning", eventReason, nsRatio.Warn(r.RatioLimit))
r.Recorder.Event(&pod, "Warning", eventReason, nsRatio.Warn(limit, sel))
return nil
}
func (r *RatioReconciler) warnNamespace(ctx context.Context, name string, nsRatio *ratio.Ratio) error {
func (r *RatioReconciler) warnNamespace(ctx context.Context, name string, nsRatio *ratio.Ratio, sel string, limit *resource.Quantity) error {
ns := corev1.Namespace{}
err := r.Get(ctx, client.ObjectKey{
Name: name,
}, &ns)
if err != nil {
return err
}
r.Recorder.Event(&ns, "Warning", eventReason, nsRatio.Warn(r.RatioLimit))
r.Recorder.Event(&ns, "Warning", eventReason, nsRatio.Warn(limit, sel))
return nil
}

Expand Down
17 changes: 11 additions & 6 deletions controllers/ratio_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"

"github.com/appuio/appuio-cloud-agent/limits"
"github.com/appuio/appuio-cloud-agent/ratio"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -164,22 +165,26 @@ func prepareRatioTest(t *testing.T, cfg testRatioCfg) *RatioReconciler {
Scheme: scheme,
Ratio: fakeRatioFetcher{
err: cfg.fetchErr,
ratio: &ratio.Ratio{
ratios: map[string]*ratio.Ratio{"": {
CPU: cfg.fetchCPU.AsDec(),
Memory: cfg.fetchMemory.AsDec(),
},
}},
RatioLimits: limits.Limits{
{
Limit: &cfg.limit,
},
},
RatioLimit: &cfg.limit,
}
}

type fakeRatioFetcher struct {
err error
ratio *ratio.Ratio
err error
ratios map[string]*ratio.Ratio
}

func (f fakeRatioFetcher) FetchRatio(ctx context.Context, ns string) (*ratio.Ratio, error) {
return f.ratio, f.err
func (f fakeRatioFetcher) FetchRatios(ctx context.Context, ns string) (map[string]*ratio.Ratio, error) {
return f.ratios, f.err
}

var testNs = &corev1.Namespace{
Expand Down
32 changes: 32 additions & 0 deletions limits/limits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package limits

import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

type Limit struct {
NodeSelector metav1.LabelSelector
Limit *resource.Quantity
}

type Limits []Limit

// GetLimitForNodeSelector returns the first limit that matches the given node selector.
// If no limit matches, an empty string is returned.
// Limits with invalid node selectors are ignored.
func (l Limits) GetLimitForNodeSelector(nodeSelector map[string]string) *resource.Quantity {
for _, limit := range l {
limitSel, err := metav1.LabelSelectorAsSelector(&limit.NodeSelector)
if err != nil {
continue
}

if limitSel.Matches(labels.Set(nodeSelector)) {
return limit.Limit
}
}

return nil
}
Loading

0 comments on commit b14e457

Please sign in to comment.