Skip to content
This repository has been archived by the owner on Aug 28, 2024. It is now read-only.

Support for local kind cluster #190

Merged
merged 10 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Dockerfile.sci-kind
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Start from the latest go base image
FROM golang:1.19 AS builder
ARG TARGETOS=linux
ARG TARGETARCH=amd64

WORKDIR /workspace
COPY go.mod go.sum ./
RUN go mod download

COPY cmd/sci-kind/main.go cmd/sci-kind/main.go
COPY internal/ internal/

# Build the app
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -a -o main cmd/sci-kind/main.go

FROM gcr.io/distroless/static:nonroot
WORKDIR /

# Copy the Pre-built binary file from the previous stage
COPY --from=builder /workspace/main .
USER root
EXPOSE 10080
EXPOSE 8080

# run the executable
CMD ["/main"]
49 changes: 44 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
VERSION ?= v0.8.1
IMG ?= docker.io/substratusai/controller-manager:${VERSION}
IMG_GCPMANAGER ?= docker.io/substratusai/gcp-manager:${VERSION}
IMG_SCI_KIND ?= docker.io/substratusai/sci-kind:${VERSION}

# Set to false if you don't want GPU nodepools created
ATTACH_GPU_NODEPOOLS=true
Expand Down Expand Up @@ -36,7 +37,11 @@ ifeq ($(UNAME_M),arm64)
SKAFFOLD_ARCH := arm64
else
PROTOC_ARCH := $(UNAME_M)
SKAFFOLD_ARCH := $(UNAME_M)
ifeq ($(UNAME_M),x86_64)
SKAFFOLD_ARCH := amd64
else
SKAFFOLD_ARCH := $(UNAME_M)
endif
endif

PROTOC_PLATFORM := $(PROTOC_OS)-$(PROTOC_ARCH)
Expand Down Expand Up @@ -142,6 +147,34 @@ dev-down-gcp: build-installer
substratus-installer gcp-down.sh
rm ./secrets/gcp-manager-key.json

.PHONY: dev-up-kind
dev-up-kind:
cd install/scripts && ./kind-up.sh

#
# TODO(nstogner): Running outside of cluster is tricky to support b/c of how substratus
# Pods need to mount the same directories as the SCI.
#
# .PHONY: dev-run-kind
# dev-run-kind:
# ...
#

.PHONY: dev-skaffold-kind
dev-skaffold-kind: skaffold
# NOTE: Installing the registry restarts containerd which causes
# skaffold to lose its connections to the Pods. To fix this, the registry is
# installed before running "skaffold dev".
$(SKAFFOLD) run -f skaffold.kind.yaml -m registry
$(SKAFFOLD) dev -f skaffold.kind.yaml -m install \
--cache-artifacts=true \
--tolerate-failures-until-deadline=true

.PHONY: dev-down-kind
dev-down-kind:
cd install/scripts && ./kind-down.sh


.PHONY: dev-up-aws
dev-up-aws: build-installer
docker run -it \
Expand Down Expand Up @@ -257,7 +290,10 @@ installation-scripts:
installation-manifests: manifests kustomize
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
cd config/gcpmanager && $(KUSTOMIZE) edit set image gcp-manager=${IMG_GCPMANAGER}
$(KUSTOMIZE) build config/default > install/kubernetes/system.yaml
cd config/sci-kind && $(KUSTOMIZE) edit set image sci=${IMG_SCI_KIND}
# TODO: Fix in another PR:
#$(KUSTOMIZE) build config/install-gcp > install/kubernetes/system.yaml
$(KUSTOMIZE) build config/install-kind > install/kubernetes/kind/system.yaml

.PHONY: prepare-release
prepare-release: installation-scripts installation-manifests docs
Expand All @@ -273,6 +309,7 @@ $(LOCALBIN):
KUSTOMIZE ?= $(LOCALBIN)/kustomize
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
ENVTEST ?= $(LOCALBIN)/setup-envtest
SKAFFOLD ?= $(LOCALBIN)/skaffold
EMBEDMD ?= $(LOCALBIN)/embedmd
CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
PROTOC ?= $(LOCALBIN)/protoc
Expand All @@ -284,6 +321,7 @@ CRD_REF_DOCS_VERSION ?= v0.0.9
PROTOC_VERSION ?= 23.4
PROTOC_GEN_GO_GRPC_VERSION ?= v1.1.0
PROTOC_GEN_GO_VERSION ?= v1.31.0
SKAFFOLD_VERSION ?= v2.6.3

KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
.PHONY: kustomize
Expand Down Expand Up @@ -332,11 +370,12 @@ $(PROTOC): $(LOCALBIN)
fi

.PHONY: skaffold
skaffold:
skaffold: $(SKAFFOLD)
$(SKAFFOLD): $(LOCALBIN)
@ test -s $(LOCALBIN)/skaffold || \
( curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-$(SKAFFOLD_PLATFORM) && \
curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-$(SKAFFOLD_PLATFORM) && \
chmod +x skaffold && \
mv skaffold $(LOCALBIN)/skaffold )
mv skaffold $(LOCALBIN)/skaffold

.PHONY: envsubst
envsubst:
Expand Down
2 changes: 1 addition & 1 deletion cmd/controllermanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func main() {
var sciAddr string
flag.StringVar(&configDumpPath, "config-dump-path", "", "The filepath to dump the running config to.")
// TODO: Change SCI Service name to be cloud-agnostic.
flag.StringVar(&sciAddr, "sci-address", "gcp-manager.substratus.svc.cluster.local:10080", "The address of the Substratus Cloud Interface server.")
flag.StringVar(&sciAddr, "sci-address", "sci.substratus.svc.cluster.local:10080", "The address of the Substratus Cloud Interface server.")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand Down
59 changes: 59 additions & 0 deletions cmd/sci-kind/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package main

import (
"flag"
"fmt"
"log"
"net"
"net/http"

"github.com/substratusai/substratus/internal/sci"
scikind "github.com/substratusai/substratus/internal/sci/kind"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
hv1 "google.golang.org/grpc/health/grpc_health_v1"
)

func main() {
var cfg struct {
port int
signedURLPort int
hostSignedURLAddress string
}
flag.IntVar(&cfg.port, "port", 10080, "port number to listen on")
flag.IntVar(&cfg.signedURLPort, "signed-url-port", 8080, "port to listen for signed url traffic")
flag.StringVar(&cfg.hostSignedURLAddress, "host-signed-url-address", "http://localhost:30080",
"host address that port forwards to the signed url port within the cluster. this should be set in kind config.yaml.")
flag.Parse()

s := &scikind.Server{
SignedURLAddress: cfg.hostSignedURLAddress,
}
signedURLServer := &http.Server{
Addr: fmt.Sprintf(":%v", cfg.signedURLPort),
Handler: s,
}
go func() {
log.Printf("Listening for signed URL traffic on address: %v", cfg.signedURLPort)
log.Fatal(signedURLServer.ListenAndServe())
}()

gs := grpc.NewServer()
sci.RegisterControllerServer(gs, s)

// Setup Health Check
hs := health.NewServer()
hs.SetServingStatus("", hv1.HealthCheckResponse_SERVING)
hv1.RegisterHealthServer(gs, hs)

addr := fmt.Sprintf(":%v", cfg.port)
log.Printf("Listening for gRPC traffic on address: %v", addr)
lis, err := net.Listen("tcp", addr)
if err != nil {
log.Fatalf("failed to listen: %v", err)
}

if err := gs.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
}
58 changes: 58 additions & 0 deletions config/install-gcp/manager_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This patch inject a sidecar container which is a HTTP proxy for the
# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews.
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- amd64
- arm64
- ppc64le
- s390x
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: kube-rbac-proxy
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "ALL"
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
args:
- "--secure-listen-address=0.0.0.0:8443"
- "--upstream=http://127.0.0.1:8080/"
- "--logtostderr=true"
- "--v=0"
ports:
- containerPort: 8443
protocol: TCP
name: https
resources:
limits:
cpu: 500m
memory: 128Mi
requests:
cpu: 5m
memory: 64Mi
- name: manager
envFrom:
- configMapRef:
name: system
args:
- "--health-probe-bind-address=:8081"
- "--metrics-bind-address=127.0.0.1:8080"
- "--leader-elect"
9 changes: 9 additions & 0 deletions config/install-kind/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: system
namespace: substratus
data:
CLOUD: kind
CLUSTER_NAME: substratus
PRINCIPAL: unused
32 changes: 32 additions & 0 deletions config/install-kind/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Adds namespace to all resources.
namespace: substratus

# Labels to add to all resources and selectors.
#labels:
#- includeSelectors: true
# pairs:
# someName: someValue

resources:
- ./namespace.yaml
- ./config.yaml
- ../crd
- ../rbac
- ../manager
- ../registry-kind
- ../sci-kind
# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in
# crd/kustomization.yaml
#- ../webhook
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus

# Protect the /metrics endpoint by putting it behind auth.
# If you want your controller-manager to expose the /metrics
# endpoint w/o any authn/z, please comment the following line.
patches:
- path: manager_patch.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
12 changes: 12 additions & 0 deletions config/install-kind/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be included by default in all installation not just kind?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have done this in my PR/branch for now as well: 01d2ec1

Approach I took is add namespace.yaml under config/manager. I thought it wasn't worth creating a separate directory.

kind: Namespace
metadata:
name: substratus
labels:
control-plane: controller-manager
app.kubernetes.io/name: namespace
app.kubernetes.io/instance: system
app.kubernetes.io/component: manager
app.kubernetes.io/created-by: substratus
app.kubernetes.io/part-of: substratus
app.kubernetes.io/managed-by: kustomize
3 changes: 0 additions & 3 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ images:
- name: controller
newName: docker.io/substratusai/controller-manager
newTag: v0.8.1
- name: gcp-manager
newName: docker.io/substratusai/gcp-manager
newTag: v0.6.5-alpha
34 changes: 34 additions & 0 deletions config/registry-kind/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: v1
nstogner marked this conversation as resolved.
Show resolved Hide resolved
kind: ConfigMap
metadata:
name: configure-cri
data:
configure-cri.sh: |
#!/usr/bin/env bash

set -x
# Exit on non-existant variable.
set -u
# Exit on error.
set -e

export IMAGE_REGISTRY=$REGISTRY_PORT_5000_TCP_ADDR:5000

if ! grep -q $IMAGE_REGISTRY /mnt/etc/containerd/config.toml; then
containerd_version=$(nsenter --target 1 --mount bash -c "containerd --version | awk '{ print substr(\$3,0,4) }'")
if [ "$containerd_version" = "1.3." ] || [ "$containerd_version" = "1.4." ]; then
cat <<EOF >> /mnt/etc/containerd/config.toml
[plugins.cri.registry.configs."$IMAGE_REGISTRY"]
endpoint = ["http://$IMAGE_REGISTRY"]
EOF
else
# Correct config for containerd 1.5 and above
cat <<EOF >> /mnt/etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."$IMAGE_REGISTRY"]
endpoint = ["http://$IMAGE_REGISTRY"]
EOF
fi
nsenter --target 1 --mount bash -c "systemctl is-active --quiet containerd && echo 'Restarting containerd' && systemctl restart containerd"
# Wait for containerd to be ready so that skaffold doesn't fail.
nsenter --target 1 --mount bash -c "while ! ctr -n k8s.io containers ls; do sleep 1; done"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably can be removed

fi
41 changes: 41 additions & 0 deletions config/registry-kind/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: configure-cri
labels:
app: configure-cri
spec:
selector:
matchLabels:
app: configure-cri
template:
metadata:
labels:
app: configure-cri
spec:
hostPID: true
initContainers:
- name: configure-cri
image: ubuntu:22.04
command: ["/scripts/configure-cri.sh"]
volumeMounts:
- name: etc
mountPath: "/mnt/etc"
- mountPath: /scripts
name: scripts
securityContext:
privileged: true
volumes:
- name: etc
hostPath:
path: /etc
- name: scripts
configMap:
name: configure-cri
defaultMode: 0744
containers:
- name: pause
image: gcr.io/google_containers/pause
tolerations:
- effect: NoSchedule
operator: Exists
Loading