Skip to content

Commit 83a91a6

Browse files
committed
implemented kvcache-aware-scorer
Signed-off-by: Maroon Ayoub <[email protected]>
1 parent 94b55cb commit 83a91a6

File tree

10 files changed

+374
-41
lines changed

10 files changed

+374
-41
lines changed

Dockerfile

+21-2
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,47 @@ FROM quay.io/projectquay/golang:1.24 AS builder
33
ARG TARGETOS
44
ARG TARGETARCH
55

6-
# ENV GOPROXY=https://goproxy.io,direct
6+
# Install build tools
7+
RUN dnf install -y gcc-c++ libstdc++ libstdc++-devel && dnf clean all
78

89
WORKDIR /workspace
10+
11+
## NeuralMagic internal repos pull config
12+
ARG GIT_NM_USER
13+
ARG NM_TOKEN
14+
### use git token
15+
RUN echo -e "machine github.com\n\tlogin ${GIT_NM_USER}\n\tpassword ${NM_TOKEN}" >> ~/.netrc
16+
ENV GOPRIVATE=github.com/neuralmagic
17+
ENV GIT_TERMINAL_PROMPT=1
18+
919
# Copy the Go Modules manifests
1020
COPY go.mod go.mod
1121
COPY go.sum go.sum
1222
# cache deps before building and copying source so that we don't need to re-download as much
1323
# and so that source changes don't invalidate our downloaded layer
1424
RUN go mod download
25+
RUN rm -rf ~/.netrc # remove git token
1526

1627
# Copy the go source
1728
COPY cmd ./cmd
1829
COPY pkg ./pkg
1930
COPY internal ./internal
2031
COPY api ./api
2132

33+
# HuggingFace tokenizer bindings
34+
RUN mkdir -p lib
35+
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
36+
RUN ranlib lib/*.a
37+
2238
# Build
2339
# the GOARCH has not a default value to allow the binary be built according to the host where the command
2440
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
2541
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2642
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
27-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o bin/epp cmd/epp/main.go cmd/epp/health.go
43+
ENV CGO_ENABLED=1
44+
ENV GOOS=${TARGETOS:-linux}
45+
ENV GOARCH=${TARGETARCH}
46+
RUN go build -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib'" cmd/epp/main.go cmd/epp/health.go
2847

2948
# Use distroless as minimal base image to package the manager binary
3049
# Refer to https://github.com/GoogleContainerTools/distroless for more details

Makefile

+146-13
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ DEV_VERSION ?= 0.0.1
398398
PROD_VERSION ?= 0.0.0
399399
IMAGE_TAG_BASE ?= quay.io/vllm-d/$(PROJECT_NAME)/epp
400400
IMG = $(IMAGE_TAG_BASE):$(DEV_VERSION)
401+
NAMESPACE ?= hc4ai-operator
401402

402403
# CONTAINER_TOOL := $(shell command -v docker >/dev/null 2>&1 && echo docker || command -v podman >/dev/null 2>&1 && echo podman || echo "")
403404
BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
@@ -448,22 +449,15 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
448449
@echo "✅ Using builder: $(BUILDER)"
449450
@if [ "$(BUILDER)" = "buildah" ]; then \
450451
echo "🔧 Buildah detected: Performing multi-arch build..."; \
451-
FINAL_TAG=$(IMG); \
452452
for arch in amd64; do \
453-
ARCH_TAG=$$FINAL_TAG-$$arch; \
454453
echo "📦 Building for architecture: $$arch"; \
455454
buildah build --arch=$$arch --os=linux -t $(IMG)-$$arch . || exit 1; \
456455
echo "🚀 Pushing image: $(IMG)-$$arch"; \
457456
buildah push $(IMG)-$$arch docker://$(IMG)-$$arch || exit 1; \
458457
done; \
459-
echo "🧼 Removing existing manifest (if any)..."; \
460-
buildah manifest rm $$FINAL_TAG || true; \
461458
echo "🧱 Creating and pushing manifest list: $(IMG)"; \
462459
buildah manifest create $(IMG); \
463-
for arch in amd64; do \
464-
ARCH_TAG=$$FINAL_TAG-$$arch; \
465-
buildah manifest add $$FINAL_TAG $$ARCH_TAG; \
466-
done; \
460+
buildah manifest add $(IMG) $(IMG)-amd64; \
467461
buildah manifest push --all $(IMG) docker://$(IMG); \
468462
elif [ "$(BUILDER)" = "docker" ]; then \
469463
echo "🐳 Docker detected: Building with buildx..."; \
@@ -485,7 +479,12 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
485479
.PHONY: image-build
486480
image-build: check-container-tool load-version-json ## Build container image using $(CONTAINER_TOOL)
487481
@printf "\033[33;1m==== Building container image $(IMG) ====\033[0m\n"
488-
$(CONTAINER_TOOL) build --build-arg TARGETOS=$(TARGETOS) --build-arg TARGETARCH=$(TARGETARCH) -t $(IMG) .
482+
$(CONTAINER_TOOL) build --platform=$(TARGETOS)/$(TARGETARCH) \
483+
--build-arg TARGETOS=$(TARGETOS) \
484+
--build-arg TARGETARCH=$(TARGETARCH) \
485+
--build-arg GIT_NM_USER=$(GIT_NM_USER)\
486+
--build-arg NM_TOKEN=$(NM_TOKEN) \
487+
-t $(IMG) .
489488

490489
.PHONY: image-push
491490
image-push: check-container-tool load-version-json ## Push container image $(IMG) to registry
@@ -517,6 +516,142 @@ uninstall-docker: check-container-tool ## Uninstall app from $(CONTAINER_TOOL)
517516
-$(CONTAINER_TOOL) stop $(PROJECT_NAME)-container && $(CONTAINER_TOOL) rm $(PROJECT_NAME)-container
518517
@echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"
519518

519+
### Kubernetes Targets (kubectl)
520+
521+
# TODO: currently incorrect because it depends on OpenShift APIs.
522+
# See: https://github.com/neuralmagic/gateway-api-inference-extension/issues/14
523+
.PHONY: install-k8s
524+
install-k8s: check-kubectl check-kustomize check-envsubst ## Install on Kubernetes
525+
export PROJECT_NAME=${PROJECT_NAME}
526+
export NAMESPACE=${NAMESPACE}
527+
@echo "Creating namespace (if needed) and setting context to $(NAMESPACE)..."
528+
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
529+
kubectl config set-context --current --namespace=$(NAMESPACE)
530+
@echo "Deploying resources from deploy/ ..."
531+
# Build the kustomization from deploy, substitute variables, and apply the YAML
532+
kustomize build deploy/environments/openshift | envsubst | kubectl apply -f -
533+
@echo "Waiting for pod to become ready..."
534+
sleep 5
535+
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -o jsonpath='{.items[0].metadata.name}'); \
536+
echo "Kubernetes installation complete."; \
537+
echo "To use the app, run:"; \
538+
echo "alias $(PROJECT_NAME)='kubectl exec -n $(NAMESPACE) -it $$POD -- /app/$(PROJECT_NAME)'"
539+
540+
# TODO: currently incorrect because it depends on OpenShift APIs.
541+
# See: https://github.com/neuralmagic/gateway-api-inference-extension/issues/14
542+
.PHONY: uninstall-k8s
543+
uninstall-k8s: check-kubectl check-kustomize check-envsubst ## Uninstall from Kubernetes
544+
export PROJECT_NAME=${PROJECT_NAME}
545+
export NAMESPACE=${NAMESPACE}
546+
@echo "Removing resources from Kubernetes..."
547+
kustomize build deploy/environments/openshift | envsubst | kubectl delete --force -f - || true
548+
POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -o jsonpath='{.items[0].metadata.name}'); \
549+
echo "Deleting pod: $$POD"; \
550+
kubectl delete pod "$$POD" --force --grace-period=0 || true; \
551+
echo "Kubernetes uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"
552+
553+
### OpenShift Targets (oc)
554+
555+
# ------------------------------------------------------------------------------
556+
# OpenShift Infrastructure Installer
557+
#
558+
# This target deploys infrastructure requirements for the entire cluster.
559+
# Among other things, this includes CRDs and operators which all users of the
560+
# cluster need for development (e.g. Gateway API, Istio, etc).
561+
#
562+
# **Warning**: Only run this if you're certain you should be running it. It
563+
# has implications for all users of the cluster!
564+
# ------------------------------------------------------------------------------
565+
.PHONY: install-openshift-infrastructure
566+
install-openshift-infrastructure:
567+
ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
568+
@echo "INFRASTRUCTURE_OVERRIDE is set to true, deploying infrastructure components"
569+
@echo "Installing CRDs for Gateway API & GIE"
570+
kustomize build deploy/components/crds | kubectl apply --server-side --force-conflicts -f -
571+
@echo "Installing the Istio Sail Operator and CRDs for Istio"
572+
kustomize build --enable-helm deploy/components/sail-operator | kubectl apply --server-side --force-conflicts -f -
573+
@echo "Installing the Istio Control Plane"
574+
kustomize build deploy/components/istio-control-plane | kubectl apply -f -
575+
else
576+
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
577+
endif
578+
579+
# ------------------------------------------------------------------------------
580+
# OpenShift Infrastructure Uninstaller
581+
#
582+
# This target removes all infrastructure components (e.g. CRDs, operators,
583+
# etc) for the entire cluster.
584+
#
585+
# **Warning**: Only run this if you're certain you should be running it. **This
586+
# will disrupt everyone using the cluster**. Generally this should only be run
587+
# when the infrastructure components have undergone very significant change, and
588+
# you need to do a hard cleanup and re-deploy.
589+
# ------------------------------------------------------------------------------
590+
.PHONY: uninstall-openshift-infrastructure
591+
uninstall-openshift-infrastructure:
592+
ifeq ($(strip $(INFRASTRUCTURE_OVERRIDE)),true)
593+
@echo "INFRASTRUCTURE_OVERRIDE is set to true, removing infrastructure components"
594+
@echo "Uninstalling the Istio Control Plane"
595+
kustomize build deploy/components/istio-control-plane | kubectl delete -f - || true
596+
@echo "Uninstalling the Istio Sail Operator and CRDs for Istio"
597+
kustomize build --enable-helm deploy/components/sail-operator | kubectl delete -f - || true
598+
@echo "Uninstalling CRDs for Gateway API & GIE"
599+
kustomize build deploy/components/crds | kubectl delete -f - || true
600+
else
601+
$(error "Error: The environment variable INFRASTRUCTURE_OVERRIDE must be set to true in order to run this target.")
602+
endif
603+
604+
# ------------------------------------------------------------------------------
605+
# OpenShift Installer
606+
#
607+
# This target deploys components in a namespace on an OpenShift cluster for
608+
# a developer to do development and testing cycles.
609+
# ------------------------------------------------------------------------------
610+
.PHONY: install-openshift
611+
install-openshift: check-kubectl check-kustomize check-envsubst ## Install on OpenShift
612+
@echo $$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION
613+
@echo "Creating namespace $(NAMESPACE)..."
614+
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
615+
@echo "Deploying common resources from deploy/ ..."
616+
# Build and substitute the base manifests from deploy, then apply them
617+
kustomize build deploy/environments/openshift | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -n $(NAMESPACE) -f -
618+
@echo "Waiting for pod to become ready..."
619+
sleep 5
620+
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
621+
echo "OpenShift installation complete."; \
622+
echo "To use the app, run:"; \
623+
echo "alias $(PROJECT_NAME)='kubectl exec -n $(NAMESPACE) -it $$POD -- /app/$(PROJECT_NAME)'"
624+
625+
# ------------------------------------------------------------------------------
626+
# OpenShift Uninstaller
627+
#
628+
# This target cleans up a developer's testing and development namespace,
629+
# removing all components therein.
630+
# ------------------------------------------------------------------------------
631+
.PHONY: uninstall-openshift
632+
uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall from OpenShift
633+
@echo "Removing resources from OpenShift..."
634+
kustomize build deploy/environments/openshift | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete --force -f - || true
635+
# @if kubectl api-resources --api-group=route.openshift.io | grep -q Route; then \
636+
# envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
637+
# fi
638+
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
639+
echo "Deleting pod: $$POD"; \
640+
kubectl delete pod "$$POD" --force --grace-period=0 || true; \
641+
echo "OpenShift uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"
642+
643+
### RBAC Targets (using kustomize and envsubst)
644+
645+
.PHONY: install-rbac
646+
install-rbac: check-kubectl check-kustomize check-envsubst ## Install RBAC
647+
@echo "Applying RBAC configuration from deploy/rbac..."
648+
kustomize build deploy/environments/openshift/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -f -
649+
650+
.PHONY: uninstall-rbac
651+
uninstall-rbac: check-kubectl check-kustomize check-envsubst ## Uninstall RBAC
652+
@echo "Removing RBAC configuration from deploy/rbac..."
653+
kustomize build deploy/environments/openshift/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete -f - || true
654+
520655

521656
##@ Version Extraction
522657
.PHONY: version dev-registry prod-registry extract-version-info
@@ -652,11 +787,9 @@ check-alias: check-container-tool
652787
echo "✅ Alias is likely to work: alias $(PROJECT_NAME)='$(CONTAINER_TOOL) exec -it $(PROJECT_NAME)-container /app/$(PROJECT_NAME)'"; \
653788
fi
654789

655-
# This is being used for tekton builds in the CI/CD pipeline, to provide a
656-
# default namespace to do a test deployment of the Kubernetes dev environment.
657790
.PHONY: print-namespace
658-
print-namespace:
659-
@echo "hc4ai-operator"
791+
print-namespace: ## Print the current namespace
792+
@echo "$(NAMESPACE)"
660793

661794
.PHONY: print-project-name
662795
print-project-name: ## Print the current project name

go.mod

+14-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
module sigs.k8s.io/gateway-api-inference-extension
22

3-
go 1.24.0
3+
go 1.24.1
4+
5+
toolchain go1.24.2
46

57
require (
68
github.com/elastic/crd-ref-docs v0.1.0
79
github.com/envoyproxy/go-control-plane/envoy v1.32.4
810
github.com/go-logr/logr v1.4.2
911
github.com/google/go-cmp v0.7.0
12+
github.com/google/uuid v1.6.0
13+
github.com/neuralmagic/kvcache-manager v0.0.0-20250422070607-db465f8aaa71
1014
github.com/onsi/ginkgo/v2 v2.23.4
1115
github.com/onsi/gomega v1.37.0
1216
github.com/prometheus/client_golang v1.22.0
@@ -41,7 +45,9 @@ require (
4145
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4246
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4347
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
48+
github.com/daulet/tokenizers v1.20.2 // indirect
4449
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
50+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
4551
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
4652
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
4753
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
@@ -66,9 +72,9 @@ require (
6672
github.com/google/gnostic-models v0.6.8 // indirect
6773
github.com/google/gofuzz v1.2.0 // indirect
6874
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
69-
github.com/google/uuid v1.6.0 // indirect
7075
github.com/gorilla/websocket v1.5.0 // indirect
7176
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
77+
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
7278
github.com/huandu/xstrings v1.3.3 // indirect
7379
github.com/imdario/mergo v0.3.11 // indirect
7480
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@@ -90,6 +96,7 @@ require (
9096
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
9197
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
9298
github.com/prometheus/procfs v0.15.1 // indirect
99+
github.com/redis/go-redis/v9 v9.7.3 // indirect
93100
github.com/spf13/cobra v1.8.1 // indirect
94101
github.com/spf13/pflag v1.0.5 // indirect
95102
github.com/stoewer/go-strcase v1.3.0 // indirect
@@ -104,15 +111,15 @@ require (
104111
go.opentelemetry.io/otel/trace v1.34.0 // indirect
105112
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
106113
go.uber.org/automaxprocs v1.6.0 // indirect
107-
golang.org/x/crypto v0.36.0 // indirect
114+
golang.org/x/crypto v0.37.0 // indirect
108115
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
109116
golang.org/x/mod v0.24.0 // indirect
110-
golang.org/x/net v0.38.0 // indirect
117+
golang.org/x/net v0.39.0 // indirect
111118
golang.org/x/oauth2 v0.27.0 // indirect
112-
golang.org/x/sync v0.12.0 // indirect
119+
golang.org/x/sync v0.13.0 // indirect
113120
golang.org/x/sys v0.32.0 // indirect
114-
golang.org/x/term v0.30.0 // indirect
115-
golang.org/x/text v0.23.0 // indirect
121+
golang.org/x/term v0.31.0 // indirect
122+
golang.org/x/text v0.24.0 // indirect
116123
golang.org/x/time v0.7.0 // indirect
117124
golang.org/x/tools v0.31.0 // indirect
118125
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect

0 commit comments

Comments
 (0)