Skip to content

Add time-based cleanup for Maven snapshot versions #33420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2645,7 +2645,12 @@ LEVEL = Info
;LIMIT_SIZE_HELM = -1
;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
;LIMIT_SIZE_MAVEN = -1
;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive.
;; Cleanup expired packages/data then targets the files within all maven snapshots versions
;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1
;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion.
; DEBUG_MAVEN_CLEANUP = true
;LIMIT_SIZE_NPM = -1
;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
;LIMIT_SIZE_NUGET = -1
Expand Down
72 changes: 72 additions & 0 deletions models/packages/package_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package packages

import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"time"
Expand All @@ -21,6 +23,8 @@ func init() {
}

var (
// ErrMetadataFile indicated a metadata file
ErrMetadataFile = errors.New("metadata file")
// ErrDuplicatePackageFile indicates a duplicated package file error
ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists")
// ErrPackageFileNotExist indicates a package file not exist error
Expand Down Expand Up @@ -226,6 +230,74 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error)
return db.Exist[PackageFile](ctx, opts.toConds())
}

// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber.
// Returns two slices: one for filtered files and one for skipped files.
func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) {
if maxBuildNumber <= 0 {
return nil, nil, errors.New("maxBuildNumber must be a positive integer")
}

files, err := GetFilesByVersionID(ctx, versionID)
if err != nil {
return nil, nil, fmt.Errorf("failed to retrieve files: %w", err)
}

var filteredFiles, skippedFiles []*PackageFile
for _, file := range files {
buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...)
if err != nil {
if !errors.Is(err, ErrMetadataFile) {
skippedFiles = append(skippedFiles, file)
}
continue
}
if buildNumber <= maxBuildNumber {
filteredFiles = append(filteredFiles, file)
}
}

return filteredFiles, skippedFiles, nil
}

// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name.
// Expected formats:
//
// "artifact-1.0.0-20250311.083409-9.tgz" returns 9
// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10
func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) {
if strings.Contains(filename, "maven-metadata.xml") {
return 0, ErrMetadataFile
}

dotIdx := strings.LastIndex(filename, ".")
if dotIdx == -1 {
return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename)
}
base := filename[:dotIdx]

// Remove classifier suffix if present.
for _, classifier := range classifiers {
suffix := "-" + classifier
if strings.HasSuffix(base, suffix) {
base = base[:len(base)-len(suffix)]
break
}
}

// The build number should be the token after the last dash.
lastDash := strings.LastIndex(base, "-")
if lastDash == -1 {
return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename)
}
buildNumberStr := base[lastDash+1:]
buildNumber, err := strconv.Atoi(buildNumberStr)
if err != nil {
return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err)
}

return buildNumber, nil
}

// CalculateFileSize sums up all blob sizes matching the search options.
// It does NOT respect the deduplication of blobs.
func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) {
Expand Down
11 changes: 8 additions & 3 deletions models/packages/package_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType

// GetVersionsByPackageType gets all versions of a specific type
func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) {
pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{
OwnerID: ownerID,
opts := &PackageSearchOptions{
Type: packageType,
IsInternal: optional.Some(false),
})
}

if ownerID != 0 {
opts.OwnerID = ownerID
}

pvs, _, err := SearchVersions(ctx, opts)
return pvs, err
}

Expand Down
56 changes: 56 additions & 0 deletions modules/packages/maven/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ package maven

import (
"encoding/xml"
"errors"
"io"
"strconv"

"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/modules/validation"
Expand All @@ -31,6 +33,12 @@ type Dependency struct {
Version string `json:"version,omitempty"`
}

// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version
type SnapshotMetadata struct {
BuildNumber int `json:"build_number,omitempty"`
Classifiers []string `json:"classifiers,omitempty"`
}

type pomStruct struct {
XMLName xml.Name `xml:"project"`

Expand Down Expand Up @@ -61,6 +69,26 @@ type pomStruct struct {
} `xml:"dependencies>dependency"`
}

type snapshotMetadataStruct struct {
XMLName xml.Name `xml:"metadata"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
Version string `xml:"version"`
Versioning struct {
LastUpdated string `xml:"lastUpdated"`
Snapshot struct {
Timestamp string `xml:"timestamp"`
BuildNumber string `xml:"buildNumber"`
} `xml:"snapshot"`
SnapshotVersions []struct {
Extension string `xml:"extension"`
Classifier string `xml:"classifier"`
Value string `xml:"value"`
Updated string `xml:"updated"`
} `xml:"snapshotVersions>snapshotVersion"`
} `xml:"versioning"`
}

// ParsePackageMetaData parses the metadata of a pom file
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
var pom pomStruct
Expand Down Expand Up @@ -109,3 +137,31 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
Dependencies: dependencies,
}, nil
}

// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers.
func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) {
var metadata snapshotMetadataStruct

dec := xml.NewDecoder(r)
dec.CharsetReader = charset.NewReaderLabel
if err := dec.Decode(&metadata); err != nil {
return nil, err
}

buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber)
if err != nil {
return nil, errors.New("invalid or missing build number in snapshot metadata")
}

var classifiers []string
for _, snapshotVersion := range metadata.Versioning.SnapshotVersions {
if snapshotVersion.Classifier != "" {
classifiers = append(classifiers, snapshotVersion.Classifier)
}
}

return &SnapshotMetadata{
BuildNumber: buildNumber,
Classifiers: classifiers,
}, nil
}
11 changes: 8 additions & 3 deletions modules/setting/packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,13 @@ var (
LimitSizeSwift int64
LimitSizeVagrant int64

DefaultRPMSignEnabled bool
DefaultRPMSignEnabled bool
RetainMavenSnapshotBuilds int
DebugMavenCleanup bool
}{
Enabled: true,
LimitTotalOwnerCount: -1,
Enabled: true,
LimitTotalOwnerCount: -1,
RetainMavenSnapshotBuilds: -1,
}
)

Expand Down Expand Up @@ -88,6 +91,8 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) {
Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT")
Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT")
Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false)
Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds)
Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true)
return nil
}

Expand Down
7 changes: 6 additions & 1 deletion services/packages/cleanup/cleanup.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package container
package cleanup

import (
"context"
Expand All @@ -20,11 +20,12 @@
cargo_service "code.gitea.io/gitea/services/packages/cargo"
container_service "code.gitea.io/gitea/services/packages/container"
debian_service "code.gitea.io/gitea/services/packages/debian"
maven_service "code.gitea.io/gitea/services/packages/maven"
rpm_service "code.gitea.io/gitea/services/packages/rpm"
)

// CleanupTask executes cleanup rules and cleanup expired package data
func CleanupTask(ctx context.Context, olderThan time.Duration) error {

Check failure on line 28 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-backend

exported: func name will be used as cleanup.CleanupTask by other packages, and that stutters; consider calling this Task (revive)

Check failure on line 28 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-go-gogit

exported: func name will be used as cleanup.CleanupTask by other packages, and that stutters; consider calling this Task (revive)

Check failure on line 28 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-go-windows

exported: func name will be used as cleanup.CleanupTask by other packages, and that stutters; consider calling this Task (revive)
if err := ExecuteCleanupRules(ctx); err != nil {
return err
}
Expand Down Expand Up @@ -155,7 +156,7 @@
return committer.Commit()
}

func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error {

Check failure on line 159 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-backend

exported: func name will be used as cleanup.CleanupExpiredData by other packages, and that stutters; consider calling this ExpiredData (revive)

Check failure on line 159 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-go-gogit

exported: func name will be used as cleanup.CleanupExpiredData by other packages, and that stutters; consider calling this ExpiredData (revive)

Check failure on line 159 in services/packages/cleanup/cleanup.go

View workflow job for this annotation

GitHub Actions / lint-go-windows

exported: func name will be used as cleanup.CleanupExpiredData by other packages, and that stutters; consider calling this ExpiredData (revive)
ctx, committer, err := db.TxContext(outerCtx)
if err != nil {
return err
Expand All @@ -166,6 +167,10 @@
return err
}

if err := maven_service.CleanupSnapshotVersions(ctx); err != nil {
return err
}

ps, err := packages_model.FindUnreferencedPackages(ctx)
if err != nil {
return err
Expand Down
133 changes: 133 additions & 0 deletions services/packages/maven/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package maven

import (
"context"
"fmt"
"strings"

"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/packages/maven"
"code.gitea.io/gitea/modules/setting"
packages_service "code.gitea.io/gitea/services/packages"
)

// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages.
func CleanupSnapshotVersions(ctx context.Context) error {
retainBuilds := setting.Packages.RetainMavenSnapshotBuilds
debugSession := setting.Packages.DebugMavenCleanup
log.Debug("Starting Maven CleanupSnapshotVersions with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession)

if retainBuilds == -1 {
log.Info("Maven CleanupSnapshotVersions skipped because retainBuilds is set to -1")
return nil
}

if retainBuilds < 1 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd merge this condition into the one above:
if retainBuilds < 0 { skip }

return fmt.Errorf("Maven CleanupSnapshotVersions: forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds)
}

versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven)
if err != nil {
return fmt.Errorf("Maven CleanupSnapshotVersions: failed to retrieve Maven package versions: %w", err)
}

var errors []error

for _, version := range versions {
if !isSnapshotVersion(version.Version) {
continue
}

if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil {
errors = append(errors, fmt.Errorf("Maven CleanupSnapshotVersions: version '%s' (ID: %d): %w", version.Version, version.ID, err))
}
}

if len(errors) > 0 {
for _, err := range errors {
log.Warn("Maven CleanupSnapshotVersions: Error during cleanup: %v", err)
}
return fmt.Errorf("Maven CleanupSnapshotVersions: cleanup completed with errors: %v", errors)
}

log.Debug("Completed Maven CleanupSnapshotVersions")
return nil
}

func isSnapshotVersion(version string) bool {
return strings.HasSuffix(version, "-SNAPSHOT")
}

func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error {
log.Debug("Starting Maven cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession)

metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey)
if err != nil {
return fmt.Errorf("cleanSnapshotFiles: failed to retrieve Maven metadata file for version ID %d: %w", versionID, err)
}

maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile)
if err != nil {
return fmt.Errorf("cleanSnapshotFiles: failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err)
}

thresholdBuildNumber := maxBuildNumber - retainBuilds
if thresholdBuildNumber <= 0 {
log.Debug("cleanSnapshotFiles: No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
return nil
}

filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...)
if err != nil {
return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version ID %d: %w", versionID, err)
}

if debugSession {
var fileNamesToRemove, skippedFileNames []string

for _, file := range filesToRemove {
fileNamesToRemove = append(fileNamesToRemove, file.Name)
}

for _, file := range skippedFiles {
skippedFileNames = append(skippedFileNames, file.Name)
}

log.Info("cleanSnapshotFiles: Debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames)
return nil
}

for _, file := range filesToRemove {
log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber)
if err := packages_service.DeletePackageFile(ctx, file); err != nil {
return fmt.Errorf("Maven cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err)
}
}

log.Debug("Completed Maven cleanSnapshotFiles for versionID: %d", versionID)
return nil
}

func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) {
pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID)
if err != nil {
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package blob: %w", err)
}

content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true)
if err != nil {
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package file stream: %w", err)
}
defer content.Close()

snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content)
if err != nil {
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to parse maven-metadata.xml: %w", err)
}

buildNumber := snapshotMetadata.BuildNumber
classifiers := snapshotMetadata.Classifiers

return buildNumber, classifiers, nil
}
Loading
Loading