Skip to content

Commit

Permalink
Merge branch 'main' into bsoghigian/modifying-logs-verbosity-for-perf…
Browse files Browse the repository at this point in the history
…-testing
  • Loading branch information
Bryce-Soghigian authored Jan 30, 2025
2 parents 30dee8d + 7563bca commit 6f02726
Show file tree
Hide file tree
Showing 43 changed files with 922 additions and 250 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/approval-comment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
disable-sudo: true
Expand All @@ -30,7 +30,7 @@ jobs:
mkdir -p /tmp/artifacts
{ echo ${{ github.event.pull_request.number }}; echo ${{ github.event.review.commit_id }}; } >> /tmp/artifacts/metadata.txt
cat /tmp/artifacts/metadata.txt
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: artifacts
path: /tmp/artifacts
2 changes: 1 addition & 1 deletion .github/workflows/build-publish-mcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
labels: [self-hosted, "1ES.Pool=${{ vars.RELEASE_1ES_POOL }}"]
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
egress-policy: audit

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
K8S_VERSION: ${{ matrix.k8sVersion }}
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
egress-policy: block
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
egress-policy: block
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
egress-policy: block
Expand All @@ -46,8 +46,8 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: ./.github/actions/install-deps
- run: make vulncheck
- uses: github/codeql-action/init@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
- uses: github/codeql-action/init@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
with:
languages: ${{ matrix.language }}
- uses: github/codeql-action/autobuild@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
- uses: github/codeql-action/analyze@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
- uses: github/codeql-action/autobuild@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
- uses: github/codeql-action/analyze@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
2 changes: 1 addition & 1 deletion .github/workflows/deflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
statuses: write
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
egress-policy: block
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dependency-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
disable-sudo: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
E2E_HASH: ${{ steps.generate-e2e-run-hash.outputs.E2E_HASH }}
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
disable-sudo: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
AZURE_SUBSCRIPTION_ID: ${{ secrets.E2E_SUBSCRIPTION_ID }}
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
egress-policy: block
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-trigger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-telemetry: true
disable-sudo: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/resolve-args.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
steps:
# Download the artifact and resolve the GIT_REF
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-sudo: true
disable-telemetry: true
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/scorecards.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:

steps:
- name: Harden Runner
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
uses: step-security/harden-runner@c95a14d0e5bab51a9f56296a4eb0e416910cd350 # v2.10.3
with:
disable-sudo: true
disable-telemetry: true
Expand Down Expand Up @@ -82,14 +82,14 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: SARIF file
path: results.sarif
retention-days: 5

# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0
uses: github/codeql-action/upload-sarif@b6a472f63d85b9c78a3ac5e89422239fc15e9b3c # v3.28.1
with:
sarif_file: results.sarif
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ Karpenter provider for AKS can be used in two modes:
* **Self-hosted mode**: Karpenter is run as a standalone deployment in the cluster. This mode is useful for advanced users who want to customize or experiment with Karpenter's deployment. The rest of this page describes how to use Karpenter in self-hosted mode.

## Known limitations

* Only AKS clusters with Azure CNI Overlay + Cilium networking are supported.
* Only Linux nodes are supported.
* Kubenet and Calico are not supported

## Installation (self-hosted)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.0
controller-gen.kubebuilder.io/version: v0.17.1
name: aksnodeclasses.karpenter.azure.com
spec:
group: karpenter.azure.com
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/Azure/karpenter-provider-azure

go 1.23.0
go 1.23.5

require (
github.com/Azure/azure-kusto-go v0.16.1
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/crds/karpenter.azure.com_aksnodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.0
controller-gen.kubebuilder.io/version: v0.17.1
name: aksnodeclasses.karpenter.azure.com
spec:
group: karpenter.azure.com
Expand Down
14 changes: 2 additions & 12 deletions pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func (c *CloudProvider) List(ctx context.Context) ([]*karpv1.NodeClaim, error) {
if err != nil {
return nil, fmt.Errorf("listing instances, %w", err)
}

var nodeClaims []*karpv1.NodeClaim
for _, instance := range instances {
instanceType, err := c.resolveInstanceTypeFromInstance(ctx, instance)
Expand Down Expand Up @@ -328,18 +329,15 @@ func (c *CloudProvider) instanceToNodeClaim(ctx context.Context, vm *armcompute.
nodeClaim.Status.Allocatable = lo.PickBy(instanceType.Allocatable(), func(_ v1.ResourceName, v resource.Quantity) bool { return !resources.IsZero(v) })
}

// TODO: review logic for determining zone (AWS uses Zone from subnet resolved and aviailable from NodeClass conditions ...)
if zoneID, err := instance.GetZoneID(vm); err != nil {
if zone, err := utils.GetZone(vm); err != nil {
logging.FromContext(ctx).Warnf("Failed to get zone for VM %s, %v", *vm.Name, err)
} else {
zone := makeZone(*vm.Location, zoneID)
// aks-node-validating-webhook protects v1.LabelTopologyZone, will be set elsewhere, so we use a different label
labels[v1alpha2.AlternativeLabelTopologyZone] = zone
}

labels[karpv1.CapacityTypeLabelKey] = instance.GetCapacityType(vm)

// TODO: v1beta1 new kes/labels
if tag, ok := vm.Tags[instance.NodePoolTagKey]; ok {
labels[karpv1.NodePoolLabelKey] = *tag
}
Expand Down Expand Up @@ -369,14 +367,6 @@ func GenerateNodeClaimName(vmName string) string {
return strings.TrimLeft("aks-", vmName)
}

// makeZone returns the zone value in format of <region>-<zone-id>.
func makeZone(location string, zoneID string) string {
if zoneID == "" {
return ""
}
return fmt.Sprintf("%s-%s", strings.ToLower(location), zoneID)
}

// newTerminatingNodeClassError returns a NotFound error for handling by
func newTerminatingNodeClassError(name string) *errors.StatusError {
qualifiedResource := schema.GroupResource{Group: apis.Group, Resource: "aksnodeclasses"}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ var _ = Describe("CloudProvider", func() {
nodeClaims, _ := cloudProvider.List(ctx)
Expect(azureEnv.AzureResourceGraphAPI.AzureResourceGraphResourcesBehavior.CalledWithInput.Len()).To(Equal(1))
queryRequest := azureEnv.AzureResourceGraphAPI.AzureResourceGraphResourcesBehavior.CalledWithInput.Pop().Query
Expect(*queryRequest.Query).To(Equal(instance.GetListQueryBuilder(azureEnv.AzureResourceGraphAPI.ResourceGroup).String()))
Expect(*queryRequest.Query).To(Equal(instance.GetVMListQueryBuilder(azureEnv.AzureResourceGraphAPI.ResourceGroup).String()))
Expect(nodeClaims).To(HaveLen(1))
Expect(nodeClaims[0]).ToNot(BeNil())
resp, _ := azureEnv.VirtualMachinesAPI.Get(ctx, azureEnv.AzureResourceGraphAPI.ResourceGroup, nodeClaims[0].Name, nil)
Expand Down
5 changes: 4 additions & 1 deletion pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ func NewControllers(ctx context.Context, mgr manager.Manager, kubeClient client.
nodeclasshash.NewController(kubeClient),
nodeclassstatus.NewController(kubeClient),
nodeclasstermination.NewController(kubeClient, recorder),
nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider),

nodeclaimgarbagecollection.NewVirtualMachine(kubeClient, cloudProvider),
nodeclaimgarbagecollection.NewNetworkInterface(kubeClient, instanceProvider),

// TODO: nodeclaim tagging
inplaceupdate.NewController(kubeClient, instanceProvider),
status.NewController[*v1alpha2.AKSNodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (

"github.com/awslabs/operatorpkg/singleton"

// "github.com/Azure/karpenter-provider-azure/pkg/cloudprovider"
"github.com/samber/lo"
"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
Expand All @@ -41,21 +40,21 @@ import (
corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider"
)

type Controller struct {
type VirtualMachine struct {
kubeClient client.Client
cloudProvider corecloudprovider.CloudProvider
successfulCount uint64 // keeps track of successful reconciles for more aggressive requeueing near the start of the controller
successfulCount uint64 // keeps track of successful reconciles for more aggressive requeuing near the start of the controller
}

func NewController(kubeClient client.Client, cloudProvider corecloudprovider.CloudProvider) *Controller {
return &Controller{
func NewVirtualMachine(kubeClient client.Client, cloudProvider corecloudprovider.CloudProvider) *VirtualMachine {
return &VirtualMachine{
kubeClient: kubeClient,
cloudProvider: cloudProvider,
successfulCount: 0,
}
}

func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) {
func (c *VirtualMachine) Reconcile(ctx context.Context) (reconcile.Result, error) {
ctx = injection.WithControllerName(ctx, "instance.garbagecollection")

// We LIST VMs on the CloudProvider BEFORE we grab NodeClaims/Nodes on the cluster so that we make sure that, if
Expand All @@ -65,6 +64,7 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) {
if err != nil {
return reconcile.Result{}, fmt.Errorf("listing cloudprovider VMs, %w", err)
}

managedRetrieved := lo.Filter(retrieved, func(nc *karpv1.NodeClaim, _ int) bool {
return nc.DeletionTimestamp.IsZero()
})
Expand Down Expand Up @@ -93,7 +93,7 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) {
return reconcile.Result{RequeueAfter: lo.Ternary(c.successfulCount <= 20, time.Second*10, time.Minute*2)}, nil
}

func (c *Controller) garbageCollect(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodeList *v1.NodeList) error {
func (c *VirtualMachine) garbageCollect(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodeList *v1.NodeList) error {
ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", nodeClaim.Status.ProviderID))
if err := c.cloudProvider.Delete(ctx, nodeClaim); err != nil {
return corecloudprovider.IgnoreNodeClaimNotFoundError(err)
Expand All @@ -112,7 +112,7 @@ func (c *Controller) garbageCollect(ctx context.Context, nodeClaim *karpv1.NodeC
return nil
}

func (c *Controller) Register(_ context.Context, m manager.Manager) error {
func (c *VirtualMachine) Register(_ context.Context, m manager.Manager) error {
return controllerruntime.NewControllerManagedBy(m).
Named("instance.garbagecollection").
WatchesRawSource(singleton.Source()).
Expand Down
Loading

0 comments on commit 6f02726

Please sign in to comment.