Skip to content

Commit

Permalink
GPU Count Ambiguity
Browse files Browse the repository at this point in the history
We used to report logical GPUs based on what was needed for cluster
autoscaling, but users may be confued if UX elements talk about logical
things rather than phyiscal, so support both.  And also update compilers
and other jazz.
  • Loading branch information
spjmurray committed Oct 18, 2024
1 parent 800433c commit bade36e
Show file tree
Hide file tree
Showing 17 changed files with 270 additions and 176 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ GOBIN := $(if $(shell go env GOBIN),$(shell go env GOBIN),$(GOPATH)/bin)
FLAGS=-trimpath -ldflags '-X $(MODULE)/pkg/constants.Version=$(VERSION) -X $(MODULE)/pkg/constants.Revision=$(REVISION)'

# Defines the linter version.
LINT_VERSION=v1.59.1
LINT_VERSION=v1.61.0

# Defines the version of the CRD generation tools to use.
CONTROLLER_TOOLS_VERSION=v0.16.1
Expand Down
4 changes: 2 additions & 2 deletions charts/region/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ description: A Helm chart for deploying Unikorn's Region Controller

type: application

version: v0.1.43
appVersion: v0.1.43
version: v0.1.44
appVersion: v0.1.44

icon: https://raw.githubusercontent.com/unikorn-cloud/assets/main/images/logos/dark-on-light/icon.png

Expand Down
15 changes: 11 additions & 4 deletions charts/region/crds/region.unikorn-cloud.org_regions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,10 @@ spec:
GPU defines additional GPU metadata. When provided it will enable selection
of images based on GPU vendor and model.
properties:
count:
description: Count is the number of logical
GPUs in the flavor.
logicalCount:
description: |-
LogicalCount is the number of logical GPUs e.g. an AMD MI250 is 2 MI200s.
This is primarily for scheduling e.g. autoscaling.
type: integer
memory:
anyOf:
Expand All @@ -121,6 +122,11 @@ spec:
Model is a free-form model name that corresponds to the supported models
property included on images, and must be an exact match e.g. H100.
type: string
physicalCount:
description: |-
PhysicalCount is the number of physical cards in the flavor.
This is primarily for end users, so it's not confusing.
type: integer
vendor:
description: |-
Vendor is the GPU vendor, used for coarse grained flavor and image
Expand All @@ -130,9 +136,10 @@ spec:
- AMD
type: string
required:
- count
- logicalCount
- memory
- model
- physicalCount
- vendor
type: object
id:
Expand Down
8 changes: 6 additions & 2 deletions charts/region/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,17 @@
"gpu": {
"type": "object",
"required": [
"count",
"physicalCount",
"logicalCount",
"memory",
"model",
"vendor"
],
"properties": {
"count": {
"physicalCount": {
"type": "integer"
},
"logicalCount": {
"type": "integer"
},
"memory": {
Expand Down
3 changes: 2 additions & 1 deletion charts/region/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ organization: unikorn-cloud
# vendor: NVIDIA
# model: H100
# memory: 192Gi
# count: 2
# physicalCount: 2
# logicalCount: 2
# # Image service configuration.
# image:
# # Image selection, the result is a boolean intersection of chosen options.
Expand Down
61 changes: 31 additions & 30 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
module github.com/unikorn-cloud/region

go 1.22.5
go 1.23.2

require (
github.com/getkin/kin-openapi v0.127.0
github.com/getkin/kin-openapi v0.128.0
github.com/go-chi/chi/v5 v5.1.0
github.com/gophercloud/gophercloud/v2 v2.1.0
github.com/gophercloud/gophercloud/v2 v2.1.1
github.com/gophercloud/utils v0.0.0-20231010081019-80377eca5d56
github.com/oapi-codegen/runtime v1.1.1
github.com/spf13/pflag v1.0.5
github.com/unikorn-cloud/core v0.1.72
github.com/unikorn-cloud/identity v0.2.40
go.opentelemetry.io/otel v1.29.0
go.opentelemetry.io/otel/sdk v1.29.0
go.opentelemetry.io/otel/trace v1.29.0
golang.org/x/crypto v0.26.0
github.com/unikorn-cloud/core v0.1.76
github.com/unikorn-cloud/identity v0.2.42
go.opentelemetry.io/otel v1.31.0
go.opentelemetry.io/otel/sdk v1.31.0
go.opentelemetry.io/otel/trace v1.31.0
golang.org/x/crypto v0.28.0
k8s.io/api v0.31.1
k8s.io/apimachinery v0.31.1
sigs.k8s.io/controller-runtime v0.19.0
Expand Down Expand Up @@ -46,53 +46,54 @@ require (
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gophercloud/gophercloud v1.14.0 // indirect
github.com/gophercloud/gophercloud v1.14.1 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/hashicorp/go-uuid v1.0.3 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/invopop/yaml v0.3.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/masterminds/semver v1.5.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/perimeterx/marshmallow v1.1.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.20.2 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/common v0.60.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.29.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 // indirect
go.opentelemetry.io/otel/metric v1.31.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/oauth2 v0.22.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/term v0.23.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/time v0.6.0 // indirect
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/term v0.25.0 // indirect
golang.org/x/text v0.19.0 // indirect
golang.org/x/time v0.7.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/grpc v1.65.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect
google.golang.org/grpc v1.67.1 // indirect
google.golang.org/protobuf v1.35.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/client-go v0.31.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240826222958-65a50c78dec5 // indirect
k8s.io/utils v0.0.0-20240821151609-f90d01438635 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
k8s.io/kube-openapi v0.0.0-20241009091222-67ed5848f094 // indirect
k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 // indirect
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)
Loading

0 comments on commit bade36e

Please sign in to comment.