Skip to content

Commit

Permalink
refactor: Make functions and structs private where possible
Browse files Browse the repository at this point in the history
Co-authored-by: logan <[email protected]>
  • Loading branch information
bgins and noryev committed Feb 4, 2025
1 parent 12d4238 commit f8c43ac
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 82 deletions.
40 changes: 20 additions & 20 deletions pkg/resourceprovider/preflight/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,48 +11,48 @@ type dockerInfo struct {
Runtimes map[string]interface{} `json:"Runtimes"`
}

func (p *preflightChecker) CheckDockerRuntime(ctx context.Context) CheckResult {
func (p *preflightChecker) checkDockerRuntime(ctx context.Context) checkResult {
cmd := exec.CommandContext(ctx, "docker", "info", "--format", "{{json .}}")
output, err := cmd.Output()
if err != nil {
return CheckResult{
Passed: false,
Error: fmt.Errorf("failed to get Docker info: %w", err),
Message: "Docker check failed",
return checkResult{
passed: false,
error: fmt.Errorf("failed to get Docker info: %w", err),
message: "Docker check failed",
}
}

var info dockerInfo
if err := json.Unmarshal(output, &info); err != nil {
return CheckResult{
Passed: false,
Error: fmt.Errorf("failed to parse Docker info: %w", err),
Message: "Docker info parsing failed",
return checkResult{
passed: false,
error: fmt.Errorf("failed to parse Docker info: %w", err),
message: "Docker info parsing failed",
}
}

// Check for nvidia runtime
_, hasNvidia := info.Runtimes["nvidia"]
if !hasNvidia {
return CheckResult{
Passed: false,
Error: fmt.Errorf("nvidia runtime not found in Docker configuration"),
Message: "NVIDIA runtime not found in Docker",
return checkResult{
passed: false,
error: fmt.Errorf("nvidia runtime not found in Docker configuration"),
message: "NVIDIA runtime not found in Docker",
}
}

// Test nvidia runtime
testCmd := exec.CommandContext(ctx, "docker", "run", "--rm", "--runtime=nvidia", "nvidia/cuda:11.8.0-base", "nvidia-smi")
if err := testCmd.Run(); err != nil {
return CheckResult{
Passed: false,
Error: fmt.Errorf("failed to run NVIDIA runtime test: %w", err),
Message: "NVIDIA runtime test failed",
return checkResult{
passed: false,
error: fmt.Errorf("failed to run NVIDIA runtime test: %w", err),
message: "NVIDIA runtime test failed",
}
}

return CheckResult{
Passed: true,
Message: "NVIDIA runtime is available and functional",
return checkResult{
passed: true,
message: "NVIDIA runtime is available and functional",
}
}
80 changes: 40 additions & 40 deletions pkg/resourceprovider/preflight/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ import (
"github.com/rs/zerolog/log"
)

type GPUCheckConfig struct {
Required bool
MinGPUs int
MinMemory int64
Capabilities []string
type gpuCheckConfig struct {
required bool
minGPUs int
minMemory int64
capabilities []string
}

func checkNvidiaSMI() error {
Expand All @@ -23,13 +23,13 @@ func checkNvidiaSMI() error {
}

type nvidiaSmiResponse struct {
UUID string
Name string
MemoryTotal string
DriverVersion string
uuid string
name string
memoryTotal string
driverVersion string
}

func parseGPURecord(record string) (*GPUInfo, error) {
func parseGPURecord(record string) (*gpuInfo, error) {
fields := strings.Split(record, ", ")
if len(fields) != 4 {
return nil, fmt.Errorf("invalid record format: expected 4 fields, got %d", len(fields))
Expand All @@ -52,28 +52,28 @@ func parseGPURecord(record string) (*GPUInfo, error) {
}

// Create GPU info with trimmed fields and validated memory
gpu := &GPUInfo{
UUID: strings.TrimSpace(fields[0]),
Name: strings.TrimSpace(fields[1]),
MemoryTotal: memoryMiB,
DriverVersion: strings.TrimSpace(fields[3]),
gpu := &gpuInfo{
uuid: strings.TrimSpace(fields[0]),
name: strings.TrimSpace(fields[1]),
memoryTotal: memoryMiB,
driverVersion: strings.TrimSpace(fields[3]),
}

// Validate required fields
if gpu.UUID == "" {
if gpu.uuid == "" {
return nil, fmt.Errorf("empty UUID")
}
if gpu.Name == "" {
if gpu.name == "" {
return nil, fmt.Errorf("empty Name")
}
if gpu.DriverVersion == "" {
if gpu.driverVersion == "" {
return nil, fmt.Errorf("empty DriverVersion")
}

return gpu, nil
}

func (p *preflightChecker) GetGPUInfo(ctx context.Context) ([]GPUInfo, error) {
func (p *preflightChecker) getGPUInfo(ctx context.Context) ([]gpuInfo, error) {
if err := checkNvidiaSMI(); err != nil {
return nil, fmt.Errorf("nvidia-smi not available: %w", err)
}
Expand All @@ -88,7 +88,7 @@ func (p *preflightChecker) GetGPUInfo(ctx context.Context) ([]GPUInfo, error) {
}

records := strings.Split(strings.TrimSpace(string(output)), "\n")
gpus := make([]GPUInfo, 0, len(records))
gpus := make([]gpuInfo, 0, len(records))

for i, record := range records {
gpu, err := parseGPURecord(record)
Expand All @@ -99,9 +99,9 @@ func (p *preflightChecker) GetGPUInfo(ctx context.Context) ([]GPUInfo, error) {

gpus = append(gpus, *gpu)
log.Info().
Str("name", gpu.Name).
Str("uuid", gpu.UUID).
Int64("memory_mb", gpu.MemoryTotal).
Str("name", gpu.name).
Str("uuid", gpu.uuid).
Int64("memory_mb", gpu.memoryTotal).
Msgf("🎮 GPU %d details", len(gpus))
}

Expand All @@ -112,40 +112,40 @@ func (p *preflightChecker) GetGPUInfo(ctx context.Context) ([]GPUInfo, error) {
return gpus, nil
}

func (p *preflightChecker) CheckGPU(ctx context.Context, config *GPUCheckConfig) CheckResult {
if !config.Required {
func (p *preflightChecker) checkGPU(ctx context.Context, config *gpuCheckConfig) checkResult {
if !config.required {
// Attempt to retrieve GPU info
gpus, err := p.GetGPUInfo(ctx)
gpus, err := p.getGPUInfo(ctx)
if err != nil {
log.Warn().Msg("⚠️ Running without GPU support - Resource Provider will operate in CPU-only mode")
return CheckResult{
Passed: true,
Message: "Operating in CPU-only mode",
return checkResult{
passed: true,
message: "Operating in CPU-only mode",
}
}

// If we found GPUs, log them but still continue
log.Info().Msgf("🎮 Found %d optional GPUs available for use", len(gpus))
return CheckResult{
Passed: true,
Message: fmt.Sprintf("Found %d NVIDIA GPUs (optional)", len(gpus)),
return checkResult{
passed: true,
message: fmt.Sprintf("Found %d NVIDIA GPUs (optional)", len(gpus)),
}
}

// Required GPU checks
log.Info().Msg("Starting required GPU checks")
gpus, err := p.GetGPUInfo(ctx)
gpus, err := p.getGPUInfo(ctx)
if err != nil {
return CheckResult{
Passed: false,
Error: err,
Message: "Required GPU check failed - no NVIDIA GPUs detected",
return checkResult{
passed: false,
error: err,
message: "Required GPU check failed - no NVIDIA GPUs detected",
}
}

log.Info().Msg("✅ GPU requirements satisfied")
return CheckResult{
Passed: true,
Message: fmt.Sprintf("Found %d suitable GPUs", len(gpus)),
return checkResult{
passed: true,
message: fmt.Sprintf("Found %d suitable GPUs", len(gpus)),
}
}
41 changes: 20 additions & 21 deletions pkg/resourceprovider/preflight/preflight.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@ import (

const RequiredGPUMemoryGB = 1 // 1GB of VRAM is required to startup if GPU is enabled

type GPUInfo struct {
UUID string
Name string
MemoryTotal int64
DriverVersion string
type gpuInfo struct {
uuid string
name string
memoryTotal int64
driverVersion string
}

type CheckResult struct {
Passed bool
Message string
Error error
type checkResult struct {
passed bool
message string
error error
}

type preflightConfig struct {
Expand All @@ -32,7 +32,7 @@ type preflightConfig struct {
}

type preflightChecker struct {
gpuInfo []GPUInfo
gpuInfo []gpuInfo
}

func RunPreflightChecks() error {
Expand All @@ -48,7 +48,7 @@ func RunPreflightChecks() error {
}

// Logging GPU requirements
gpuInfo, err := checker.GetGPUInfo(ctx)
gpuInfo, err := checker.getGPUInfo(ctx)
if err != nil {
log.Warn().Err(err).Msg("⚠️ No GPU detected - will operate in CPU-only mode")
} else {
Expand All @@ -58,27 +58,26 @@ func RunPreflightChecks() error {
Msg("🎮 GPU requirements")
}

err = checker.RunAllChecks(ctx, config)
err = checker.runAllChecks(ctx, config)
if err != nil {
log.Error().Err(err).Msg("❌ Preflight checks failed")
return err
}
return nil
}

func (p *preflightChecker) RunAllChecks(ctx context.Context, config preflightConfig) error {

gpuResult := p.CheckGPU(ctx, &GPUCheckConfig{
MinMemory: config.GPU.MinMemoryGB * 1024 * 1024 * 1024,
func (p *preflightChecker) runAllChecks(ctx context.Context, config preflightConfig) error {
gpuResult := p.checkGPU(ctx, &gpuCheckConfig{
minMemory: config.GPU.MinMemoryGB * 1024 * 1024 * 1024,
})
if !gpuResult.Passed {
return fmt.Errorf("GPU check failed: %s", gpuResult.Message)
if !gpuResult.passed {
return fmt.Errorf("GPU check failed: %s", gpuResult.message)
}

if config.Docker.CheckRuntime {
runtimeResult := p.CheckDockerRuntime(ctx)
if !runtimeResult.Passed {
return fmt.Errorf("Docker runtime check failed: %s", runtimeResult.Message)
runtimeResult := p.checkDockerRuntime(ctx)
if !runtimeResult.passed {
return fmt.Errorf("Docker runtime check failed: %s", runtimeResult.message)
}
}

Expand Down
1 change: 0 additions & 1 deletion pkg/resourceprovider/resourceprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ func NewResourceProvider(
executor executor.Executor,
tracer trace.Tracer,
) (*ResourceProvider, error) {

if err := preflight.RunPreflightChecks(); err != nil {
return nil, fmt.Errorf("preflight checks failed: %w", err)
}
Expand Down

0 comments on commit f8c43ac

Please sign in to comment.