diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1d61792fd..e2b423928 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -115,10 +115,12 @@ jobs: with: go-version-file: runner/go.mod cache-dependency-path: runner/go.sum + - name: Check if go.mod and go.sum are up-to-date + run: go mod tidy -diff - name: Run golangci-lint uses: golangci/golangci-lint-action@v6 with: - version: v1.58 + version: v1.62.0 args: --timeout=20m working-directory: runner - name: Test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba816f59d..0ba075915 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,8 +7,9 @@ repos: args: ['--fix'] - id: ruff-format - repo: https://github.com/golangci/golangci-lint - rev: v1.58.1 + rev: v1.62.0 # Should match .github/workflows/build.yml hooks: - id: golangci-lint-full - entry: bash -c 'cd runner && golangci-lint run -D depguard --presets import,module,unused "$@"' + language_version: 1.23.0 # Should match runner/go.mod + entry: bash -c 'cd runner && golangci-lint run' stages: [manual] diff --git a/runner/.golangci-lint.yml b/runner/.golangci.yml similarity index 88% rename from runner/.golangci-lint.yml rename to runner/.golangci.yml index ee2bb3814..435ecce78 100644 --- a/runner/.golangci-lint.yml +++ b/runner/.golangci.yml @@ -6,19 +6,17 @@ run: timeout: 1m # exit code when at least one issue was found, default is 1 - issues-exit-code: 0 + issues-exit-code: 1 # include test files or not, default is true tests: false - skip-dirs: - - data - # output configuration options output: # colored-line-number|line-number|json|tab|checkstyle|code-climate|junit-xml|github-actions # default is "colored-line-number" - format: colored-line-number + formats: + - format: colored-line-number # all available settings of specific linters linters-settings: @@ -40,7 +38,6 @@ linters-settings: # default is false: such cases aren't reported by default. check-blank: false - errorlint: # Check whether fmt.Errorf uses the %w verb for formatting errors. See the readme for caveats errorf: true @@ -57,15 +54,6 @@ linters-settings: # switch default-signifies-exhaustive: false - exhaustivestruct: - # Struct Patterns is list of expressions to match struct packages and names - # The struct packages have the form example.com/package.ExampleStruct - # The matching patterns can use matching syntax from https://pkg.go.dev/path#Match - # If this list is empty, all structs are tested. - struct-patterns: - - '*.Test' - - 'example.com/package.ExampleStruct' - funlen: lines: 60 statements: 40 @@ -157,10 +145,6 @@ linters-settings: # it's a comma-separated list of prefixes local-prefixes: github.com/dstackai/dstackai - golint: - # minimal confidence for issues, default is 0.8 - min-confidence: 0.8 - gosec: # To select a subset of rules to run. # Available rules: https://github.com/securego/gosec#available-rules @@ -186,15 +170,10 @@ linters-settings: truncate: "32" gosimple: - # Select the Go version to target. The default is '1.13'. - go: "1.18" # https://staticcheck.io/docs/options#checks checks: [ "all" ] govet: - # report about shadowed variables - check-shadowing: true - # settings per analyzer settings: printf: # analyzer name, run `go tool vet help` to see all analyzers @@ -206,19 +185,10 @@ linters-settings: # enable or disable analyzers by name # run `go tool vet help` to see all analyzers - enable: - - atomicalign - enable-all: false + enable-all: true disable: - shadow - disable-all: false - - ifshort: - # Maximum length of variable declaration measured in number of lines, after which linter won't suggest using short syntax. - # Has higher priority than max-decl-chars. - max-decl-lines: 1 - # Maximum length of variable declaration measured in number of characters, after which linter won't suggest using short syntax. - max-decl-chars: 30 + - fieldalignment importas: # if set to `true`, force to use alias. @@ -245,14 +215,10 @@ linters-settings: tab-width: 1 staticcheck: - # Select the Go version to target. The default is '1.13'. - go: "1.18" # https://staticcheck.io/docs/options#checks checks: [ "all" ] stylecheck: - # Select the Go version to target. The default is '1.13'. - go: "1.18" # https://staticcheck.io/docs/options#checks checks: [ "all", "-ST1000", "-ST1003", "-ST1016", "-ST1020", "-ST1021", "-ST1022" ] # https://staticcheck.io/docs/options#dot_import_whitelist @@ -298,25 +264,22 @@ linters-settings: name: true begin: true - unused: - # Select the Go version to target. The default is '1.13'. - go: "1.18" - whitespace: multi-if: false # Enforces newlines (or comments) after every multi-line if statement multi-func: false # Enforces newlines (or comments) after every multi-line function signatur linters: - enable: - - megacheck - - govet - disable: - - maligned - - prealloc - disable-all: false presets: - bugs - unused + - import + - module + - format + disable: + - depguard + - gomodguard + - unparam + - contextcheck fast: false diff --git a/runner/cmd/shim/main.go b/runner/cmd/shim/main.go index 39d8e3cad..862d0d37e 100644 --- a/runner/cmd/shim/main.go +++ b/runner/cmd/shim/main.go @@ -137,7 +137,7 @@ func main() { log.Printf("Config Runner: %+v\n", args.Runner) log.Printf("Config Docker: %+v\n", args.Docker) - dockerRunner, err := shim.NewDockerRunner(args) + dockerRunner, err := shim.NewDockerRunner(&args) if err != nil { return cli.Exit(err, 1) } diff --git a/runner/go.mod b/runner/go.mod index 9c133eacd..4d030c20a 100644 --- a/runner/go.mod +++ b/runner/go.mod @@ -1,8 +1,6 @@ module github.com/dstackai/dstack/runner -go 1.21 - -toolchain go1.21.9 +go 1.23 require ( github.com/alexellis/go-execute/v2 v2.2.1 @@ -10,15 +8,18 @@ require ( github.com/creack/pty v1.1.21 github.com/docker/docker v26.0.0+incompatible github.com/docker/go-connections v0.5.0 + github.com/docker/go-units v0.5.0 github.com/go-git/go-git/v5 v5.12.0 github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5 github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf + github.com/shirou/gopsutil/v3 v3.24.3 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.9.0 github.com/urfave/cli/v2 v2.27.1 github.com/ztrue/tracerr v0.4.0 golang.org/x/crypto v0.22.0 + golang.org/x/sys v0.19.0 ) require ( @@ -31,13 +32,13 @@ require ( github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/distribution/reference v0.6.0 // indirect - github.com/docker/go-units v0.5.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/go-billy/v5 v5.5.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/h2non/filetype v1.1.3 // indirect @@ -45,6 +46,7 @@ require ( github.com/juju/errors v1.0.0 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/klauspost/compress v1.17.8 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/term v0.5.0 // indirect github.com/morikuni/aec v1.0.0 // indirect @@ -53,13 +55,16 @@ require ( github.com/pjbgf/sha1cd v0.3.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect - github.com/shirou/gopsutil/v3 v3.24.3 // indirect github.com/skeema/knownhosts v1.2.2 // indirect + github.com/tklauser/go-sysconf v0.3.12 // indirect + github.com/tklauser/numcpus v0.6.1 // indirect github.com/ulikunitz/xz v0.5.12 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0 // indirect go.opentelemetry.io/otel v1.25.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.25.0 // indirect @@ -69,7 +74,6 @@ require ( golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.24.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.19.0 // indirect golang.org/x/tools v0.20.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect diff --git a/runner/go.sum b/runner/go.sum index 27a3efb41..3fee80aa2 100644 --- a/runner/go.sum +++ b/runner/go.sum @@ -70,6 +70,7 @@ github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-stack/stack v1.6.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -120,6 +121,7 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/magiconair/properties v1.7.4-0.20170902060319-8d7837e64d3c/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mattn/go-colorable v0.0.10-0.20170816031813-ad5389df28cd/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= @@ -144,6 +146,7 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= @@ -177,7 +180,9 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= @@ -190,6 +195,7 @@ github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQut github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/ztrue/tracerr v0.4.0 h1:vT5PFxwIGs7rCg9ZgJ/y0NmOpJkPCPFK8x0vVIYzd04= github.com/ztrue/tracerr v0.4.0/go.mod h1:PaFfYlas0DfmXNpo7Eay4MFhZUONqvXM+T2HyGPpngk= diff --git a/runner/internal/executor/executor.go b/runner/internal/executor/executor.go index f8f201554..cff0de629 100644 --- a/runner/internal/executor/executor.go +++ b/runner/internal/executor/executor.go @@ -263,11 +263,11 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { if _, err := os.Stat(keyPath); err == nil { return nil, gerrors.New("private key already exists") } - if err := os.MkdirAll(filepath.Dir(keyPath), 0700); err != nil { + if err := os.MkdirAll(filepath.Dir(keyPath), 0o700); err != nil { return nil, gerrors.Wrap(err) } log.Info(ctx, "Writing private key", "path", keyPath) - if err := os.WriteFile(keyPath, []byte(*ex.repoCredentials.PrivateKey), 0600); err != nil { + if err := os.WriteFile(keyPath, []byte(*ex.repoCredentials.PrivateKey), 0o600); err != nil { return nil, gerrors.Wrap(err) } return func() { @@ -282,7 +282,7 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { if _, err := os.Stat(hostsPath); err == nil { return nil, gerrors.New("hosts.yml file already exists") } - if err := os.MkdirAll(filepath.Dir(hostsPath), 0700); err != nil { + if err := os.MkdirAll(filepath.Dir(hostsPath), 0o700); err != nil { return nil, gerrors.Wrap(err) } log.Info(ctx, "Writing OAuth token", "path", hostsPath) @@ -291,7 +291,7 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { return nil, gerrors.Wrap(err) } ghHost := fmt.Sprintf("%s:\n oauth_token: \"%s\"\n", cloneURL.Hostname(), *ex.repoCredentials.OAuthToken) - if err := os.WriteFile(hostsPath, []byte(ghHost), 0644); err != nil { + if err := os.WriteFile(hostsPath, []byte(ghHost), 0o600); err != nil { return nil, gerrors.Wrap(err) } return func() { @@ -305,16 +305,15 @@ func (ex *RunExecutor) setupCredentials(ctx context.Context) (func(), error) { func isPtyError(err error) bool { /* read /dev/ptmx: input/output error */ var e *os.PathError - return errors.As(err, &e) && e.Err == syscall.EIO + return errors.As(err, &e) && errors.Is(e.Err, syscall.EIO) } func buildLDLibraryPathEnv() (string, error) { // Execute shell command to get Python prefix cmd := exec.Command("bash", "-i", "-c", "python3-config --prefix") output, err := cmd.Output() - if err != nil { - return "", fmt.Errorf("error executing command: %v", err) + return "", fmt.Errorf("error executing command: %w", err) } // Extract and trim the prefix path diff --git a/runner/internal/executor/executor_test.go b/runner/internal/executor/executor_test.go index c36668878..b519525ef 100644 --- a/runner/internal/executor/executor_test.go +++ b/runner/internal/executor/executor_test.go @@ -121,7 +121,7 @@ func TestExecutor_RemoteRepo(t *testing.T) { RepoConfigEmail: "developer@dstack.ai", } ex.jobSpec.Commands = append(ex.jobSpec.Commands, "git rev-parse HEAD && git config user.name && git config user.email") - err := os.WriteFile(ex.codePath, []byte{}, 0600) // empty diff + err := os.WriteFile(ex.codePath, []byte{}, 0o600) // empty diff require.NoError(t, err) err = ex.setupRepo(context.TODO()) @@ -164,11 +164,11 @@ func makeTestExecutor(t *testing.T) *RunExecutor { } temp := filepath.Join(baseDir, "temp") - _ = os.Mkdir(temp, 0700) + _ = os.Mkdir(temp, 0o700) home := filepath.Join(baseDir, "home") - _ = os.Mkdir(home, 0700) + _ = os.Mkdir(home, 0o700) repo := filepath.Join(baseDir, "repo") - _ = os.Mkdir(repo, 0700) + _ = os.Mkdir(repo, 0o700) ex := NewRunExecutor(temp, home, repo) ex.SetJob(body) ex.SetCodePath(filepath.Join(baseDir, "code")) // note: create file before run @@ -187,7 +187,7 @@ func makeCodeTar(t *testing.T, path string) { } for _, f := range files { - hdr := &tar.Header{Name: f.name, Mode: 0600, Size: int64(len(f.body))} + hdr := &tar.Header{Name: f.name, Mode: 0o600, Size: int64(len(f.body))} require.NoError(t, tw.WriteHeader(hdr)) _, err := tw.Write([]byte(f.body)) require.NoError(t, err) diff --git a/runner/internal/executor/repo.go b/runner/internal/executor/repo.go index 6d6839d0b..48b36907d 100644 --- a/runner/internal/executor/repo.go +++ b/runner/internal/executor/repo.go @@ -13,7 +13,7 @@ import ( // setupRepo must be called from Run func (ex *RunExecutor) setupRepo(ctx context.Context) error { if _, err := os.Stat(ex.workingDir); err != nil { - if err = os.MkdirAll(ex.workingDir, 0777); err != nil { + if err = os.MkdirAll(ex.workingDir, 0o777); err != nil { return gerrors.Wrap(err) } } diff --git a/runner/internal/log/log.go b/runner/internal/log/log.go index 99478a8f9..72d49e561 100644 --- a/runner/internal/log/log.go +++ b/runner/internal/log/log.go @@ -100,7 +100,7 @@ func GetLogger(ctx context.Context) *logrus.Entry { } func CreateAppendFile(path string) (*os.File, error) { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0644) + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0o644) if err != nil { return nil, gerrors.Wrap(err) } diff --git a/runner/internal/metrics/metrics.go b/runner/internal/metrics/metrics.go index 2fb7f6ba5..a68ec10c7 100644 --- a/runner/internal/metrics/metrics.go +++ b/runner/internal/metrics/metrics.go @@ -64,7 +64,7 @@ func (s *MetricsCollector) GetCPUUsageMicroseconds() (uint64, error) { data, err := os.ReadFile(cgroupCPUUsagePath) if err != nil { - return 0, fmt.Errorf("could not read CPU usage: %v", err) + return 0, fmt.Errorf("could not read CPU usage: %w", err) } if s.cgroupVersion == 1 { @@ -72,7 +72,7 @@ func (s *MetricsCollector) GetCPUUsageMicroseconds() (uint64, error) { usageStr := strings.TrimSpace(string(data)) cpuUsage, err := strconv.ParseUint(usageStr, 10, 64) if err != nil { - return 0, fmt.Errorf("could not parse CPU usage: %v", err) + return 0, fmt.Errorf("could not parse CPU usage: %w", err) } // convert nanoseconds to microseconds return cpuUsage / 1000, nil @@ -87,7 +87,7 @@ func (s *MetricsCollector) GetCPUUsageMicroseconds() (uint64, error) { } usageMicroseconds, err := strconv.ParseUint(parts[1], 10, 64) if err != nil { - return 0, fmt.Errorf("could not parse usage_usec: %v", err) + return 0, fmt.Errorf("could not parse usage_usec: %w", err) } return usageMicroseconds, nil } @@ -103,13 +103,13 @@ func (s *MetricsCollector) GetMemoryUsageBytes() (uint64, error) { data, err := os.ReadFile(cgroupMemoryUsagePath) if err != nil { - return 0, fmt.Errorf("could not read memory usage: %v", err) + return 0, fmt.Errorf("could not read memory usage: %w", err) } usageStr := strings.TrimSpace(string(data)) usedMemory, err := strconv.ParseUint(usageStr, 10, 64) if err != nil { - return 0, fmt.Errorf("could not parse memory usage: %v", err) + return 0, fmt.Errorf("could not parse memory usage: %w", err) } return usedMemory, nil } @@ -122,7 +122,7 @@ func (s *MetricsCollector) GetMemoryCacheBytes() (uint64, error) { statData, err := os.ReadFile(cgroupMemoryStatPath) if err != nil { - return 0, fmt.Errorf("could not read memory.stat: %v", err) + return 0, fmt.Errorf("could not read memory.stat: %w", err) } lines := strings.Split(string(statData), "\n") @@ -135,7 +135,7 @@ func (s *MetricsCollector) GetMemoryCacheBytes() (uint64, error) { } cacheBytes, err := strconv.ParseUint(parts[1], 10, 64) if err != nil { - return 0, fmt.Errorf("could not parse cache value: %v", err) + return 0, fmt.Errorf("could not parse cache value: %w", err) } return cacheBytes, nil } @@ -162,7 +162,7 @@ func (s *MetricsCollector) GetNVIDIAGPUMetrics() ([]schemas.GPUMetrics, error) { var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { - return metrics, fmt.Errorf("failed to execute nvidia-smi: %v", err) + return metrics, fmt.Errorf("failed to execute nvidia-smi: %w", err) } lines := strings.Split(strings.TrimSpace(out.String()), "\n") @@ -173,11 +173,11 @@ func (s *MetricsCollector) GetNVIDIAGPUMetrics() ([]schemas.GPUMetrics, error) { } memUsed, err := strconv.ParseUint(strings.TrimSpace(parts[0]), 10, 64) if err != nil { - return metrics, fmt.Errorf("failed to parse memory used: %v", err) + return metrics, fmt.Errorf("failed to parse memory used: %w", err) } utilization, err := strconv.ParseUint(strings.TrimSpace(strings.TrimSuffix(parts[1], "%")), 10, 64) if err != nil { - return metrics, fmt.Errorf("failed to parse GPU utilization: %v", err) + return metrics, fmt.Errorf("failed to parse GPU utilization: %w", err) } metrics = append(metrics, schemas.GPUMetrics{ GPUMemoryUsage: memUsed * 1024 * 1024, // Convert MiB to bytes @@ -195,7 +195,7 @@ func (s *MetricsCollector) GetAMDGPUMetrics() ([]schemas.GPUMetrics, error) { var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { - return nil, fmt.Errorf("failed to execute amd-smi: %v", err) + return nil, fmt.Errorf("failed to execute amd-smi: %w", err) } lines := strings.Split(strings.TrimSpace(out.String()), "\n") @@ -206,11 +206,11 @@ func (s *MetricsCollector) GetAMDGPUMetrics() ([]schemas.GPUMetrics, error) { } memUsed, err := strconv.ParseUint(strings.TrimSpace(fields[3]), 10, 64) if err != nil { - return nil, fmt.Errorf("failed to parse VRAM used: %v", err) + return nil, fmt.Errorf("failed to parse VRAM used: %w", err) } utilization, err := strconv.ParseUint(strings.TrimSpace(fields[1]), 10, 64) if err != nil { - return nil, fmt.Errorf("failed to parse GPU utilization: %v", err) + return nil, fmt.Errorf("failed to parse GPU utilization: %w", err) } metrics = append(metrics, schemas.GPUMetrics{ GPUMemoryUsage: memUsed * 1024 * 1024, @@ -224,7 +224,7 @@ func (s *MetricsCollector) GetAMDGPUMetrics() ([]schemas.GPUMetrics, error) { func getCgroupVersion() (int, error) { data, err := os.ReadFile("/proc/self/mountinfo") if err != nil { - return 0, fmt.Errorf("could not read /proc/self/mountinfo: %v", err) + return 0, fmt.Errorf("could not read /proc/self/mountinfo: %w", err) } for _, line := range strings.Split(string(data), "\n") { diff --git a/runner/internal/repo/diff.go b/runner/internal/repo/diff.go index f39d151a6..286813267 100644 --- a/runner/internal/repo/diff.go +++ b/runner/internal/repo/diff.go @@ -22,8 +22,8 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { return err } - var output = &bytes.Buffer{} - var empty = bytes.NewReader([]byte{}) + output := &bytes.Buffer{} + empty := bytes.NewReader([]byte{}) for _, fileInfo := range files { log.Trace(ctx, "apply diff file", "file", fileInfo.OldName, "text_fragments_cnt", len(fileInfo.TextFragments)) @@ -54,7 +54,7 @@ func ApplyDiff(ctx context.Context, dir, patch string) error { if !fileInfo.IsDelete { if fileInfo.IsNew || fileInfo.IsRename { dd := path.Dir(path.Join(dir, fileInfo.NewName)) - err = os.MkdirAll(dd, 0755) + err = os.MkdirAll(dd, 0o755) if err != nil { log.Warning(ctx, "diff apply new file mkdir fail", "filename", fileInfo.NewName, @@ -102,7 +102,7 @@ func fileModeHeuristic(ctx context.Context, dir string, fileInfo *gitdiff.File) } } if mode == 0 { - mode = 0644 // fallback to git no-exec default + mode = 0o644 // fallback to git no-exec default } return mode } diff --git a/runner/internal/repo/diff_test.go b/runner/internal/repo/diff_test.go index a9de2a3c7..4aa422b87 100644 --- a/runner/internal/repo/diff_test.go +++ b/runner/internal/repo/diff_test.go @@ -320,7 +320,7 @@ Last line.` if cont == "" { cont = content + cc.contAdd } - err = os.WriteFile(path.Join(dir, "original"), []byte(cont), 0660) + err = os.WriteFile(path.Join(dir, "original"), []byte(cont), 0o660) assert.NoError(t, err, "write original file") ctx := context.Background() err = ApplyDiff(ctx, dir, cc.diff) diff --git a/runner/internal/repo/manager.go b/runner/internal/repo/manager.go index 9a60ca8a1..262913e6d 100644 --- a/runner/internal/repo/manager.go +++ b/runner/internal/repo/manager.go @@ -2,6 +2,7 @@ package repo import ( "context" + "errors" "fmt" "os" @@ -72,7 +73,7 @@ func (m *Manager) Checkout() error { } } ref, err := git.PlainClone(m.localPath, false, &m.clo) - if err != nil && err != git.ErrRepositoryAlreadyExists { + if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) { return err } if ref != nil { @@ -106,7 +107,7 @@ func (m *Manager) Checkout() error { func (m *Manager) CheckoutBranch(branch string) error { log.Info(m.ctx, "git checkout", "auth", fmt.Sprintf("%T", (&m.clo).Auth)) ref, err := git.PlainClone(m.localPath, false, &m.clo) - if err != nil && err != git.ErrRepositoryAlreadyExists { + if err != nil && !errors.Is(err, git.ErrRepositoryAlreadyExists) { return err } if ref != nil { diff --git a/runner/internal/shim/api/schemas.go b/runner/internal/shim/api/schemas.go index e3f0343f7..bc76401b5 100644 --- a/runner/internal/shim/api/schemas.go +++ b/runner/internal/shim/api/schemas.go @@ -29,4 +29,3 @@ type PullResponse struct { type StopResponse struct { State string `json:"state"` } - diff --git a/runner/internal/shim/authorized_keys.go b/runner/internal/shim/authorized_keys.go index 35d5b9d57..1fdce7501 100644 --- a/runner/internal/shim/authorized_keys.go +++ b/runner/internal/shim/authorized_keys.go @@ -54,8 +54,8 @@ func AppendPublicKeys(fileKeys []string, keysToAppend []string) []string { } type AuthorizedKeys struct { - user string - lookup func(username string) (*user.User, error) + user string + lookup func(username string) (*user.User, error) } func (ak AuthorizedKeys) AppendPublicKeys(publicKeys []string) error { diff --git a/runner/internal/shim/backends/aws.go b/runner/internal/shim/backends/aws.go index cfb5eb347..6e8047448 100644 --- a/runner/internal/shim/backends/aws.go +++ b/runner/internal/shim/backends/aws.go @@ -23,7 +23,7 @@ func (e *AWSBackend) GetRealDeviceName(volumeID string) (string, error) { var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { - return "", fmt.Errorf("failed to list block devices: %v", err) + return "", fmt.Errorf("failed to list block devices: %w", err) } // Parse the output to find the device that matches the volume ID diff --git a/runner/internal/shim/backends/gcp.go b/runner/internal/shim/backends/gcp.go index 6933c023a..4d46bdb63 100644 --- a/runner/internal/shim/backends/gcp.go +++ b/runner/internal/shim/backends/gcp.go @@ -22,7 +22,7 @@ func (e *GCPBackend) GetRealDeviceName(volumeID string) (string, error) { if err != nil { deviceName, err = os.Readlink(fmt.Sprintf("/dev/disk/by-id/google-pd-%s", volumeID)) if err != nil { - return "", fmt.Errorf("failed to resolve symlink for volume %s: %v", volumeID, err) + return "", fmt.Errorf("failed to resolve symlink for volume %s: %w", volumeID, err) } } deviceName, err = filepath.Abs(filepath.Join("/dev/disk/by-id/", deviceName)) diff --git a/runner/internal/shim/docker.go b/runner/internal/shim/docker.go index 293b8179e..c8140d32f 100644 --- a/runner/internal/shim/docker.go +++ b/runner/internal/shim/docker.go @@ -36,9 +36,11 @@ import ( // TODO: Allow for configuration via cli arguments or environment variables. const ImagePullTimeout time.Duration = 20 * time.Minute -// Set to "true" on containers spawned by DockerRunner, used for identification. -const LabelKeyIsRun = "ai.dstack.shim.is-run" -const LabelValueTrue = "true" +const ( + // Set to "true" on containers spawned by DockerRunner, used for identification. + LabelKeyIsRun = "ai.dstack.shim.is-run" + LabelValueTrue = "true" +) // Depricated: Remove on next release (0.19) type ContainerStatus struct { @@ -254,7 +256,7 @@ func (d *DockerRunner) Stop(force bool) { } } -func (d DockerRunner) GetState() (RunnerStatus, ContainerStatus, string, JobResult) { +func (d *DockerRunner) GetState() (RunnerStatus, ContainerStatus, string, JobResult) { return d.state, d.containerStatus, d.executorError, d.jobResult } @@ -300,7 +302,7 @@ func unmountVolumes(taskConfig TaskConfig) error { } } if len(failed) > 0 { - return fmt.Errorf("Failed to unmount volume(s): %v", failed) + return fmt.Errorf("failed to unmount volume(s): %v", failed) } return nil } @@ -333,7 +335,7 @@ func getVolumeMountPoint(volumeName string) string { func prepareInstanceMountPoints(taskConfig TaskConfig) error { for _, mountPoint := range taskConfig.InstanceMounts { if _, err := os.Stat(mountPoint.InstancePath); errors.Is(err, os.ErrNotExist) { - if err = os.MkdirAll(mountPoint.InstancePath, 0777); err != nil { + if err = os.MkdirAll(mountPoint.InstancePath, 0o777); err != nil { return tracerr.Wrap(err) } } else if err != nil { @@ -351,7 +353,7 @@ func initFileSystem(deviceName string, errorIfNotExists bool) (bool, error) { var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { - return false, fmt.Errorf("failed to check if disk is formatted: %v", err) + return false, fmt.Errorf("failed to check if disk is formatted: %w", err) } // If the output is not empty, the disk is already formatted @@ -367,7 +369,7 @@ func initFileSystem(deviceName string, errorIfNotExists bool) (bool, error) { log.Printf("Formatting disk %s with ext4 filesystem...\n", deviceName) cmd = exec.Command("mkfs.ext4", "-F", deviceName) if output, err := cmd.CombinedOutput(); err != nil { - return false, fmt.Errorf("failed to format disk: %s, output: %s", err, string(output)) + return false, fmt.Errorf("failed to format disk: %w, output: %s", err, string(output)) } log.Println("Disk formatted succesfully!") return true, nil @@ -377,8 +379,8 @@ func mountDisk(deviceName, mountPoint string) error { // Create the mount point directory if it doesn't exist if _, err := os.Stat(mountPoint); os.IsNotExist(err) { fmt.Printf("Creating mount point %s...\n", mountPoint) - if err := os.MkdirAll(mountPoint, 0755); err != nil { - return fmt.Errorf("failed to create mount point: %s", err) + if err := os.MkdirAll(mountPoint, 0o755); err != nil { + return fmt.Errorf("failed to create mount point: %w", err) } } @@ -386,7 +388,7 @@ func mountDisk(deviceName, mountPoint string) error { log.Printf("Mounting disk %s to %s...\n", deviceName, mountPoint) cmd := exec.Command("mount", deviceName, mountPoint) if output, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to mount disk: %s, output: %s", err, string(output)) + return fmt.Errorf("failed to mount disk: %w, output: %s", err, string(output)) } log.Println("Disk mounted successfully!") @@ -515,7 +517,7 @@ func createContainer(ctx context.Context, client docker.APIClient, runnerDir str } mounts = append(mounts, instanceMounts...) - //Set the environment variables + // Set the environment variables envVars := []string{} if dockerParams.DockerPJRTDevice() != "" { envVars = append(envVars, fmt.Sprintf("PJRT_DEVICE=%s", dockerParams.DockerPJRTDevice())) @@ -690,6 +692,8 @@ func configureGpuIfAvailable(hostConfig *container.HostConfig) { // --security-opt=seccomp=unconfined hostConfig.SecurityOpt = append(hostConfig.SecurityOpt, "seccomp=unconfined") // TODO: in addition, for non-root user, --group-add=video, and possibly --group-add=render, are required. + case NoVendor: + // nothing to do } } @@ -760,7 +764,7 @@ func getContainerLastLogs(client docker.APIClient, containerID string, n int) ([ for scanner.Scan() { lines = append(lines, scanner.Text()) } - if err := scanner.Err(); err != nil && err != io.EOF { + if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) { return nil, err } @@ -769,15 +773,15 @@ func getContainerLastLogs(client docker.APIClient, containerID string, n int) ([ /* DockerParameters interface implementation for CLIArgs */ -func (c CLIArgs) DockerPrivileged() bool { +func (c *CLIArgs) DockerPrivileged() bool { return c.Docker.Privileged } -func (c CLIArgs) DockerPJRTDevice() string { +func (c *CLIArgs) DockerPJRTDevice() string { return c.Docker.PJRTDevice } -func (c CLIArgs) DockerShellCommands(publicKeys []string) []string { +func (c *CLIArgs) DockerShellCommands(publicKeys []string) []string { concatinatedPublicKeys := c.Docker.ConcatinatedPublicSSHKeys if len(publicKeys) > 0 { concatinatedPublicKeys = strings.Join(publicKeys, "\n") @@ -787,7 +791,7 @@ func (c CLIArgs) DockerShellCommands(publicKeys []string) []string { return commands } -func (c CLIArgs) DockerMounts(hostRunnerDir string) ([]mount.Mount, error) { +func (c *CLIArgs) DockerMounts(hostRunnerDir string) ([]mount.Mount, error) { return []mount.Mount{ { Type: mount.TypeBind, @@ -802,11 +806,11 @@ func (c CLIArgs) DockerMounts(hostRunnerDir string) ([]mount.Mount, error) { }, nil } -func (c CLIArgs) DockerPorts() []int { +func (c *CLIArgs) DockerPorts() []int { return []int{c.Runner.HTTPPort, c.Docker.SSHPort} } -func (c CLIArgs) MakeRunnerDir() (string, error) { +func (c *CLIArgs) MakeRunnerDir() (string, error) { runnerTemp := filepath.Join(c.Shim.HomeDir, "runners", time.Now().Format("20060102-150405")) if err := os.MkdirAll(runnerTemp, 0o755); err != nil { return "", tracerr.Wrap(err) @@ -853,7 +857,7 @@ func FindExecutorError(runnerDir string) string { for { line, _, err := scanner.LineBytes() if err != nil { - if err == io.EOF { + if errors.Is(err, io.EOF) { return "" // consts.ExecutorFailedSignature is not found in file } log.Printf("FindExecutorError scan error: %s\n", err) diff --git a/runner/internal/shim/gpu.go b/runner/internal/shim/gpu.go index bdd61d800..b63ef2877 100644 --- a/runner/internal/shim/gpu.go +++ b/runner/internal/shim/gpu.go @@ -46,6 +46,8 @@ func GetGpuInfo() []GpuInfo { return getNvidiaGpuInfo() case Amd: return getAmdGpuInfo() + case NoVendor: + return []GpuInfo{} } return []GpuInfo{} } @@ -79,7 +81,7 @@ func getNvidiaGpuInfo() []GpuInfo { } for { record, err := r.Read() - if err == io.EOF { + if errors.Is(err, io.EOF) { break } if err != nil {